summaryrefslogtreecommitdiff
path: root/src/buildvm_x86.dasc
diff options
context:
space:
mode:
authorMike Pall <mike>2009-12-08 19:46:35 +0100
committerMike Pall <mike>2009-12-08 19:46:35 +0100
commit55b16959717084884fd4a0cbae6d19e3786c20c7 (patch)
treec8a07a43c13679751ed25a9d06796e9e7b2134a6 /src/buildvm_x86.dasc
downloadluajit-2.0.0-beta1.tar.gz
luajit-2.0.0-beta1.tar.bz2
luajit-2.0.0-beta1.zip
RELEASE LuaJIT-2.0.0-beta1v2.0.0-beta1
Diffstat (limited to 'src/buildvm_x86.dasc')
-rw-r--r--src/buildvm_x86.dasc3592
1 files changed, 3592 insertions, 0 deletions
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
new file mode 100644
index 00000000..add00c9d
--- /dev/null
+++ b/src/buildvm_x86.dasc
@@ -0,0 +1,3592 @@
1|// Low-level VM code for x86 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2009 Mike Pall. See Copyright Notice in luajit.h
4|
5|.arch x86
6|.section code_op, code_sub
7|
8|.actionlist build_actionlist
9|.globals GLOB_
10|.globalnames globnames
11|.externnames extnames
12|
13|//-----------------------------------------------------------------------
14|
15|// Fixed register assignments for the interpreter.
16|// This is very fragile and has many dependencies. Caveat emptor.
17|.define BASE, edx // Not C callee-save, refetched anyway.
18|.define KBASE, edi // Must be C callee-save.
19|.define PC, esi // Must be C callee-save.
20|.define DISPATCH, ebx // Must be C callee-save.
21|
22|.define RA, ecx
23|.define RAL, cl
24|.define RB, ebp // Must be ebp (C callee-save).
25|.define RC, eax // Must be eax (fcomparepp and others).
26|.define RCW, ax
27|.define RCH, ah
28|.define RCL, al
29|.define OP, RB
30|.define RD, RC
31|.define RDL, RCL
32|
33|// Type definitions. Some of these are only used for documentation.
34|.type L, lua_State
35|.type GL, global_State
36|.type TVALUE, TValue
37|.type GCOBJ, GCobj
38|.type STR, GCstr
39|.type TAB, GCtab
40|.type LFUNC, GCfuncL
41|.type CFUNC, GCfuncC
42|.type PROTO, GCproto
43|.type UPVAL, GCupval
44|.type NODE, Node
45|.type NARGS, int
46|.type TRACE, Trace
47|.type EXITINFO, ExitInfo
48|
49|// Stack layout while in interpreter. Must match with lj_frame.h.
50|.macro saveregs
51| push ebp; push edi; push esi; push ebx
52|.endmacro
53|.macro restoreregs
54| pop ebx; pop esi; pop edi; pop ebp
55|.endmacro
56|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
57|
58|.define INARG_4, aword [esp+aword*15]
59|.define INARG_3, aword [esp+aword*14]
60|.define INARG_2, aword [esp+aword*13]
61|.define INARG_1, aword [esp+aword*12]
62|//----- 16 byte aligned, ^^^ arguments from C caller
63|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
64|.define SAVE_R4, aword [esp+aword*10]
65|.define SAVE_R3, aword [esp+aword*9]
66|.define SAVE_R2, aword [esp+aword*8]
67|//----- 16 byte aligned
68|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
69|.define SAVE_PC, aword [esp+aword*6]
70|.define ARG6, aword [esp+aword*5]
71|.define ARG5, aword [esp+aword*4]
72|//----- 16 byte aligned
73|.define ARG4, aword [esp+aword*3]
74|.define ARG3, aword [esp+aword*2]
75|.define ARG2, aword [esp+aword*1]
76|.define ARG1, aword [esp] //<-- esp while in interpreter.
77|//----- 16 byte aligned, ^^^ arguments for C callee
78|
79|// FPARGx overlaps ARGx and ARG(x+1) on x86.
80|.define FPARG5, qword [esp+qword*2]
81|.define FPARG3, qword [esp+qword*1]
82|.define FPARG1, qword [esp]
83|// NRESULTS overlaps ARG6 (and FPARG5)
84|.define NRESULTS, ARG6
85|
86|// Arguments for vm_call and vm_pcall.
87|.define INARG_P_ERRF, INARG_4 // vm_pcall only.
88|.define INARG_NRES, INARG_3
89|.define INARG_BASE, INARG_2
90|.define SAVE_L, INARG_1
91|
92|.define SAVE_CFRAME, INARG_BASE // Overwrites INARG_BASE!
93|
94|// Arguments for vm_cpcall.
95|.define INARG_CP_UD, INARG_4
96|.define INARG_CP_FUNC, INARG_3
97|.define INARG_CP_CALL, INARG_2
98|
99|//-----------------------------------------------------------------------
100|
101|// Instruction headers.
102|.macro ins_A; .endmacro
103|.macro ins_AD; .endmacro
104|.macro ins_AJ; .endmacro
105|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
106|.macro ins_AB_; movzx RB, RCH; .endmacro
107|.macro ins_A_C; movzx RC, RCL; .endmacro
108|.macro ins_AND; not RD; .endmacro
109|
110|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
111|.macro ins_NEXT
112| mov RC, [PC]
113| movzx RA, RCH
114| movzx OP, RCL
115| add PC, 4
116| shr RC, 16
117| jmp aword [DISPATCH+OP*4]
118|.endmacro
119|
120|// Instruction footer.
121|.if 1
122| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
123| .define ins_next, ins_NEXT
124| .define ins_next_, ins_NEXT
125|.else
126| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
127| // Affects only certain kinds of benchmarks (and only with -j off).
128| // Around 10%-30% slower on Core2, a lot more slower on P4.
129| .macro ins_next
130| jmp ->ins_next
131| .endmacro
132| .macro ins_next_
133| ->ins_next:
134| ins_NEXT
135| .endmacro
136|.endif
137|
138|//-----------------------------------------------------------------------
139|
140|// Macros to test operand types.
141|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
142|.macro checknum, reg, target; checktp reg, LJ_TISNUM; ja target; .endmacro
143|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
144|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
145|
146|// These operands must be used with movzx.
147|.define PC_OP, byte [PC-4]
148|.define PC_RA, byte [PC-3]
149|.define PC_RB, byte [PC-1]
150|.define PC_RC, byte [PC-2]
151|.define PC_RD, word [PC-2]
152|
153|.macro branchPC, reg
154| lea PC, [PC+reg*4-BCBIAS_J*4]
155|.endmacro
156|
157|// Assumes DISPATCH is relative to GL.
158#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
159#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
160|
161|// Decrement hashed hotcount and trigger trace recorder if zero.
162|.macro hotloop, reg
163| mov reg, PC
164| shr reg, 1
165| and reg, HOTCOUNT_PCMASK
166| sub word [DISPATCH+reg+GG_DISP2HOT], 1
167| jz ->vm_hotloop
168|.endmacro
169|
170|.macro hotcall, reg
171| mov reg, PC
172| shr reg, 1
173| and reg, HOTCOUNT_PCMASK
174| sub word [DISPATCH+reg+GG_DISP2HOT], 1
175| jz ->vm_hotcall
176|.endmacro
177|
178|// Set current VM state.
179|.macro set_vmstate, st
180| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
181|.endmacro
182|
183|// Annoying x87 stuff: support for two compare variants.
184|.macro fcomparepp // Compare and pop st0 >< st1.
185||if (cmov) {
186| fucomip st1
187| fpop
188||} else {
189| fucompp
190| fnstsw ax // eax modified!
191| sahf
192||}
193|.endmacro
194|
195|.macro fdup; fld st0; .endmacro
196|.macro fpop1; fstp st1; .endmacro
197|
198|// Move table write barrier back. Overwrites reg.
199|.macro barrierback, tab, reg
200| and byte tab->marked, cast_byte(~LJ_GC_BLACK) // black2gray(tab)
201| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
202| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
203| mov tab->gclist, reg
204|.endmacro
205|
206|//-----------------------------------------------------------------------
207
208/* Generate subroutines used by opcodes and other parts of the VM. */
209/* The .code_sub section should be last to help static branch prediction. */
210static void build_subroutines(BuildCtx *ctx, int cmov)
211{
212 |.code_sub
213 |
214 |//-----------------------------------------------------------------------
215 |//-- Call and return handling -------------------------------------------
216 |//-----------------------------------------------------------------------
217 |
218 |// Reminder: A call gate may be called with func/args above L->maxstack,
219 |// i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
220 |// too. This means all call gates (L*, C and fast functions) must check
221 |// for stack overflow _before_ adding more slots!
222 |
223 |//-- Call gates ---------------------------------------------------------
224 |
225 |->gate_lf: // Call gate for fixarg Lua functions.
226 | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return
227 | // DISPATCH initialized
228 | mov BASE, RA
229 | mov PROTO:RB, LFUNC:RB->pt
230 | mov [BASE-4], PC // Store caller PC.
231 | movzx RA, byte PROTO:RB->framesize
232 | mov PC, PROTO:RB->bc
233 | mov KBASE, PROTO:RB->k
234 | mov L:RB, SAVE_L
235 | lea RA, [BASE+RA*8] // Top of frame.
236 | lea RC, [BASE+NARGS:RC*8-4] // Points to tag of 1st free slot.
237 | cmp RA, L:RB->maxstack
238 | ja ->gate_lf_growstack
239 |9: // Entry point from vararg setup below.
240 | mov RB, LJ_TNIL
241 |1: // Clear free slots until top of frame.
242 | mov [RC], RB
243 | mov [RC+8], RB
244 | add RC, 16
245 | cmp RC, RA
246 | jb <1
247#if LJ_HASJIT
248 | // NYI: Disabled, until the tracer supports recursion/upcalls/leaves.
249 | // hotcall RB
250#endif
251 | ins_next
252 |
253 |->gate_lv: // Call gate for vararg Lua functions.
254 | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return
255 | // DISPATCH initialized
256 | mov [RA-4], PC // Store caller PC.
257 | lea PC, [NARGS:RC*8+FRAME_VARG]
258 | lea BASE, [RA+PC-FRAME_VARG]
259 | mov [BASE-8], LFUNC:RB // Store copy of LFUNC.
260 | mov PROTO:RB, LFUNC:RB->pt
261 | mov [BASE-4], PC // Store delta + FRAME_VARG.
262 | movzx PC, byte PROTO:RB->framesize
263 | lea KBASE, [BASE+PC*8]
264 | mov L:PC, SAVE_L
265 | lea RC, [BASE+4]
266 | cmp KBASE, L:PC->maxstack
267 | ja ->gate_lv_growstack // Need to grow stack.
268 | movzx PC, byte PROTO:RB->numparams
269 | test PC, PC
270 | jz >2
271 |1: // Copy fixarg slots up.
272 | add RA, 8
273 | cmp RA, BASE
274 | jnb >2
275 | mov KBASE, [RA-8]
276 | mov [RC-4], KBASE
277 | mov KBASE, [RA-4]
278 | mov [RC], KBASE
279 | add RC, 8
280 | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
281 | sub PC, 1
282 | jnz <1
283 |2:
284 | movzx RA, byte PROTO:RB->framesize
285 | mov PC, PROTO:RB->bc
286 | mov KBASE, PROTO:RB->k
287 | lea RA, [BASE+RA*8]
288 | jmp <9
289 |
290 |->gate_c: // Call gate for C functions.
291 | // RA = new base, RB = CFUNC, RC = nargs+1, (BASE = old base), PC = return
292 | mov [RA-4], PC
293 | mov KBASE, CFUNC:RB->f
294 | mov L:RB, SAVE_L
295 | lea RC, [RA+NARGS:RC*8-8]
296 | mov L:RB->base, RA
297 | lea RA, [RC+8*LUA_MINSTACK]
298 | mov ARG1, L:RB
299 | mov L:RB->top, RC
300 | cmp RA, L:RB->maxstack
301 | ja ->gate_c_growstack // Need to grow stack.
302 | set_vmstate C
303 | call KBASE // (lua_State *L)
304 | set_vmstate INTERP
305 | // nresults returned in eax (RD).
306 | mov BASE, L:RB->base
307 | lea RA, [BASE+RD*8]
308 | neg RA
309 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
310 |->vm_returnc:
311 | add RD, 1 // RD = nresults+1
312 | mov NRESULTS, RD
313 | test PC, FRAME_TYPE
314 | jz ->BC_RET_Z // Handle regular return to Lua.
315 |
316 |//-- Return handling (non-inline) ---------------------------------------
317 |
318 |->vm_return:
319 | // BASE = base, RA = resultofs, RD = nresults+1 (= NRESULTS), PC = return
320 | test PC, FRAME_C
321 | jz ->vm_returnp
322 |
323 | // Return to C.
324 | set_vmstate C
325 | and PC, -8
326 | sub PC, BASE
327 | neg PC // Previous base = BASE - delta.
328 |
329 | sub RD, 1
330 | jz >2
331 |1:
332 | mov RB, [BASE+RA] // Move results down.
333 | mov [BASE-8], RB
334 | mov RB, [BASE+RA+4]
335 | mov [BASE-4], RB
336 | add BASE, 8
337 | sub RD, 1
338 | jnz <1
339 |2:
340 | mov L:RB, SAVE_L
341 | mov L:RB->base, PC
342 |3:
343 | mov RD, NRESULTS
344 | mov RA, INARG_NRES // RA = wanted nresults+1
345 |4:
346 | cmp RA, RD
347 | jne >6 // More/less results wanted?
348 |5:
349 | sub BASE, 8
350 | mov L:RB->top, BASE
351 |
352 |->vm_leave_cp:
353 | mov RA, SAVE_CFRAME // Restore previous C frame.
354 | mov L:RB->cframe, RA
355 | xor eax, eax // Ok return status for vm_pcall.
356 |
357 |->vm_leave_unw:
358 | add esp, CFRAME_SPACE
359 | restoreregs
360 | ret
361 |
362 |6:
363 | jb >7 // Less results wanted?
364 | // More results wanted. Check stack size and fill up results with nil.
365 | cmp BASE, L:RB->maxstack
366 | ja >8
367 | mov dword [BASE-4], LJ_TNIL
368 | add BASE, 8
369 | add RD, 1
370 | jmp <4
371 |
372 |7: // Less results wanted.
373 | test RA, RA
374 | jz <5 // But check for LUA_MULTRET+1.
375 | sub RA, RD // Negative result!
376 | lea BASE, [BASE+RA*8] // Correct top.
377 | jmp <5
378 |
379 |8: // Corner case: need to grow stack for filling up results.
380 | // This can happen if:
381 | // - A C function grows the stack (a lot).
382 | // - The GC shrinks the stack in between.
383 | // - A return back from a lua_call() with (high) nresults adjustment.
384 | mov L:RB->top, BASE // Save current top held in BASE (yes).
385 | mov NRESULTS, RD // Need to fill only remainder with nil.
386 | mov ARG2, RA // Grow by wanted nresults+1.
387 | mov ARG1, L:RB
388 | call extern lj_state_growstack // (lua_State *L, int n)
389 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
390 | jmp <3
391 |
392 |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
393 | // (void *cframe, int errcode)
394 | mov ecx, [esp+4]
395 | mov eax, [esp+8] // Error return status for vm_pcall.
396 | and ecx, CFRAME_RAWMASK
397 | mov esp, ecx
398 | mov L:RB, SAVE_L
399 | mov GL:RB, L:RB->glref
400 | mov dword GL:RB->vmstate, ~LJ_VMST_C
401 | jmp ->vm_leave_unw
402 |
403 |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
404 | mov ecx, [esp+4]
405 | and ecx, CFRAME_RAWMASK
406 | mov esp, ecx
407 | mov L:RB, SAVE_L
408 | mov RA, -8 // Results start at BASE+RA = BASE-8.
409 | mov RD, 1+1 // Really 1+2 results, incr. later.
410 | mov BASE, L:RB->base
411 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
412 | add DISPATCH, GG_G2DISP
413 | mov PC, [BASE-4] // Fetch PC of previous frame.
414 | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
415 | set_vmstate INTERP
416 | jmp ->vm_returnc // Increments RD/NRESULTS and returns.
417 |
418 |->vm_returnp:
419 | test PC, FRAME_P
420 | jz ->cont_dispatch
421 |
422 | // Return from pcall or xpcall fast func.
423 | and PC, -8
424 | sub BASE, PC // Restore caller base.
425 | lea RA, [RA+PC-8] // Rebase RA and prepend one result.
426 | mov PC, [BASE-4] // Fetch PC of previous frame.
427 | // Prepending may overwrite the pcall frame, so do it at the end.
428 | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
429 | jmp ->vm_returnc // Increments RD/NRESULTS and returns.
430 |
431 |//-- Grow stack on-demand -----------------------------------------------
432 |
433 |->gate_c_growstack: // Grow stack for C function.
434 | mov ARG2, LUA_MINSTACK
435 | jmp >1
436 |
437 |->gate_lv_growstack: // Grow stack for vararg Lua function.
438 | sub RC, 8
439 | mov BASE, RA
440 | mov RA, KBASE
441 | mov PC, PROTO:RB->bc
442 | mov L:RB, SAVE_L
443 |
444 |->gate_lf_growstack: // Grow stack for fixarg Lua function.
445 | // BASE = new base, RA = requested top, RC = top (offset +4 bytes)
446 | // RB = L, PC = first PC of called function (or anything if C function)
447 | sub RC, 4 // Adjust top.
448 | sub RA, BASE
449 | shr RA, 3 // n = pt->framesize - L->top
450 | add PC, 4 // Must point after first instruction.
451 | mov L:RB->base, BASE
452 | mov L:RB->top, RC
453 | mov SAVE_PC, PC
454 | mov ARG2, RA
455 | mov ARG1, L:RB
456 |1:
457 | // L:RB = L, L->base = new base, L->top = top
458 | // SAVE_PC = initial PC+1 (undefined for C functions)
459 | call extern lj_state_growstack // (lua_State *L, int n)
460 | mov RA, L:RB->base
461 | mov RC, L:RB->top
462 | mov LFUNC:RB, [RA-8]
463 | mov PC, [RA-4]
464 | sub RC, RA
465 | shr RC, 3
466 | add NARGS:RC, 1
467 | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = invalid), PC restored.
468 | jmp aword LFUNC:RB->gate // Just retry call.
469 |
470 |//-----------------------------------------------------------------------
471 |//-- Entry points into the assembler VM ---------------------------------
472 |//-----------------------------------------------------------------------
473 |
474 |->vm_resume: // Setup C frame and resume thread.
475 | // (lua_State *L, StkId base, int nres1 = 0, ptrdiff_t ef = 0)
476 | saveregs
477 | mov PC, FRAME_C
478 | sub esp, CFRAME_SPACE
479 | xor RD, RD
480 | mov L:RB, SAVE_L
481 | lea KBASE, [esp+CFRAME_RESUME]
482 | mov RA, INARG_BASE
483 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
484 | add DISPATCH, GG_G2DISP
485 | mov L:RB->cframe, KBASE
486 | mov SAVE_CFRAME, RD // Caveat: overlaps INARG_BASE!
487 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
488 | cmp byte L:RB->status, RDL
489 | je >3 // Initial resume (like a call).
490 |
491 | // Resume after yield (like a return).
492 | set_vmstate INTERP
493 | mov byte L:RB->status, RDL
494 | mov BASE, L:RB->base
495 | mov RD, L:RB->top
496 | sub RD, RA
497 | shr RD, 3
498 | add RD, 1 // RD = nresults+1
499 | sub RA, BASE // RA = resultofs
500 | mov PC, [BASE-4]
501 | mov NRESULTS, RD
502 | test PC, FRAME_TYPE
503 | jz ->BC_RET_Z
504 | jmp ->vm_return
505 |
506 |->vm_pcall: // Setup protected C frame and enter VM.
507 | // (lua_State *L, StkId base, int nres1, ptrdiff_t ef)
508 | saveregs
509 | mov PC, FRAME_CP
510 | jmp >1
511 |
512 |->vm_call: // Setup C frame and enter VM.
513 | // (lua_State *L, StkId base, int nres1)
514 | saveregs
515 | mov PC, FRAME_C
516 |
517 |1: // Entry point for vm_pcall above (PC = ftype).
518 | sub esp, CFRAME_SPACE
519 | mov L:RB, SAVE_L
520 | mov RA, INARG_BASE
521 |
522 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype).
523 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
524 | mov SAVE_CFRAME, KBASE // Caveat: overlaps INARG_BASE!
525 | mov SAVE_PC, esp // Any value outside of bytecode is ok.
526 | mov L:RB->cframe, esp
527 |
528 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
529 | add DISPATCH, GG_G2DISP
530 |
531 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
532 | set_vmstate INTERP
533 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
534 | add PC, RA
535 | sub PC, BASE // PC = frame delta + frame type
536 |
537 | mov RC, L:RB->top
538 | sub RC, RA
539 | shr NARGS:RC, 3
540 | add NARGS:RC, 1 // RC = nargs+1
541 |
542 | mov LFUNC:RB, [RA-8]
543 | cmp dword [RA-4], LJ_TFUNC
544 | jne ->vmeta_call // Ensure KBASE defined and != BASE.
545 | jmp aword LFUNC:RB->gate
546 | // RA = new base, RB = LFUNC/CFUNC, RC = nargs+1.
547 |
548 |->vm_cpcall: // Setup protected C frame, call C.
549 | // (lua_State *L, lua_CPFunction cp, lua_CFunction func, void *ud)
550 | saveregs
551 | sub esp, CFRAME_SPACE
552 |
553 | mov L:RB, SAVE_L
554 | mov RC, INARG_CP_UD
555 | mov RA, INARG_CP_FUNC
556 | mov BASE, INARG_CP_CALL
557 | mov SAVE_PC, esp // Any value outside of bytecode is ok.
558 |
559 | // Caveat: INARG_P_* and INARG_CP_* overlap!
560 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
561 | sub KBASE, L:RB->top
562 | mov INARG_P_ERRF, 0 // No error function.
563 | mov INARG_NRES, KBASE // Neg. delta means cframe w/o frame.
564 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
565 |
566 | mov ARG3, RC
567 | mov ARG2, RA
568 | mov ARG1, L:RB
569 |
570 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
571 | mov SAVE_CFRAME, KBASE // Caveat: overlaps INARG_CP_CALL!
572 | mov L:RB->cframe, esp
573 |
574 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
575 | // StkId (new base) or NULL returned in eax (RC).
576 | test RC, RC
577 | jz ->vm_leave_cp // No base? Just remove C frame.
578 | mov RA, RC
579 | mov PC, FRAME_CP
580 | jmp <2 // Else continue with the call.
581 |
582 |//-----------------------------------------------------------------------
583 |//-- Metamethod handling ------------------------------------------------
584 |//-----------------------------------------------------------------------
585 |
586 |//-- Continuation dispatch ----------------------------------------------
587 |
588 |->cont_dispatch:
589 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in NRESULTS)
590 | add RA, BASE
591 | and PC, -8
592 | mov RB, BASE
593 | sub BASE, PC // Restore caller BASE.
594 | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg.
595 | mov RC, RA // ... in [RC]
596 | mov PC, [RB-12] // Restore PC from [cont|PC].
597 | mov LFUNC:KBASE, [BASE-8]
598 | mov PROTO:KBASE, LFUNC:KBASE->pt
599 | mov KBASE, PROTO:KBASE->k
600 | // BASE = base, RC = result, RB = meta base
601 | jmp dword [RB-16] // Jump to continuation.
602 |
603 |->cont_cat: // BASE = base, RC = result, RB = mbase
604 | movzx RA, PC_RB
605 | sub RB, 16
606 | lea RA, [BASE+RA*8]
607 | sub RA, RB
608 | je ->cont_ra
609 | neg RA
610 | shr RA, 3
611 | mov ARG3, RA
612 | mov RA, [RC+4]
613 | mov RC, [RC]
614 | mov [RB+4], RA
615 | mov [RB], RC
616 | mov ARG2, RB
617 | jmp ->BC_CAT_Z
618 |
619 |//-- Table indexing metamethods -----------------------------------------
620 |
621 |->vmeta_tgets:
622 | mov ARG5, RC // RC = GCstr *
623 | mov ARG6, LJ_TSTR
624 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
625 | cmp PC_OP, BC_GGET
626 | jne >1
627 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
628 | mov [RA], TAB:RB // RB = GCtab *
629 | mov dword [RA+4], LJ_TTAB
630 | mov RB, RA
631 | jmp >2
632 |
633 |->vmeta_tgetb:
634 | movzx RC, PC_RC // Ugly, cannot fild from a byte.
635 | mov ARG4, RC
636 | fild ARG4
637 | fstp FPARG5
638 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
639 | jmp >1
640 |
641 |->vmeta_tgetv:
642 | movzx RC, PC_RC // Reload TValue *k from RC.
643 | lea RC, [BASE+RC*8]
644 |1:
645 | movzx RB, PC_RB // Reload TValue *t from RB.
646 | lea RB, [BASE+RB*8]
647 |2:
648 | mov ARG2, RB
649 | mov L:RB, SAVE_L
650 | mov ARG3, RC
651 | mov ARG1, L:RB
652 | mov SAVE_PC, PC
653 | mov L:RB->base, BASE
654 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
655 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
656 | mov BASE, L:RB->base
657 | test RC, RC
658 | jz >3
659 |->cont_ra: // BASE = base, RC = result
660 | movzx RA, PC_RA
661 | mov RB, [RC+4]
662 | mov RC, [RC]
663 | mov [BASE+RA*8+4], RB
664 | mov [BASE+RA*8], RC
665 | ins_next
666 |
667 |3: // Call __index metamethod.
668 | // BASE = base, L->top = new base, stack = cont/func/t/k
669 | mov RA, L:RB->top
670 | mov [RA-12], PC // [cont|PC]
671 | lea PC, [RA+FRAME_CONT]
672 | sub PC, BASE
673 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
674 | mov NARGS:RC, 3 // 2+1 args for func(t, k).
675 | jmp aword LFUNC:RB->gate
676 |
677 |//-----------------------------------------------------------------------
678 |
679 |->vmeta_tsets:
680 | mov ARG5, RC // RC = GCstr *
681 | mov ARG6, LJ_TSTR
682 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
683 | cmp PC_OP, BC_GSET
684 | jne >1
685 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
686 | mov [RA], TAB:RB // RB = GCtab *
687 | mov dword [RA+4], LJ_TTAB
688 | mov RB, RA
689 | jmp >2
690 |
691 |->vmeta_tsetb:
692 | movzx RC, PC_RC // Ugly, cannot fild from a byte.
693 | mov ARG4, RC
694 | fild ARG4
695 | fstp FPARG5
696 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
697 | jmp >1
698 |
699 |->vmeta_tsetv:
700 | movzx RC, PC_RC // Reload TValue *k from RC.
701 | lea RC, [BASE+RC*8]
702 |1:
703 | movzx RB, PC_RB // Reload TValue *t from RB.
704 | lea RB, [BASE+RB*8]
705 |2:
706 | mov ARG2, RB
707 | mov L:RB, SAVE_L
708 | mov ARG3, RC
709 | mov ARG1, L:RB
710 | mov SAVE_PC, PC
711 | mov L:RB->base, BASE
712 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
713 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
714 | mov BASE, L:RB->base
715 | test RC, RC
716 | jz >3
717 | // NOBARRIER: lj_meta_tset ensures the table is not black.
718 | movzx RA, PC_RA
719 | mov RB, [BASE+RA*8+4]
720 | mov RA, [BASE+RA*8]
721 | mov [RC+4], RB
722 | mov [RC], RA
723 |->cont_nop: // BASE = base, (RC = result)
724 | ins_next
725 |
726 |3: // Call __newindex metamethod.
727 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
728 | mov RA, L:RB->top
729 | mov [RA-12], PC // [cont|PC]
730 | movzx RC, PC_RA
731 | mov RB, [BASE+RC*8+4] // Copy value to third argument.
732 | mov RC, [BASE+RC*8]
733 | mov [RA+20], RB
734 | mov [RA+16], RC
735 | lea PC, [RA+FRAME_CONT]
736 | sub PC, BASE
737 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
738 | mov NARGS:RC, 4 // 3+1 args for func(t, k, v).
739 | jmp aword LFUNC:RB->gate
740 |
741 |//-- Comparison metamethods ---------------------------------------------
742 |
743 |->vmeta_comp:
744 | movzx RB, PC_OP
745 | lea RD, [BASE+RD*8]
746 | lea RA, [BASE+RA*8]
747 | mov ARG4, RB
748 | mov L:RB, SAVE_L
749 | mov ARG3, RD
750 | mov ARG2, RA
751 | mov ARG1, L:RB
752 | mov SAVE_PC, PC
753 | mov L:RB->base, BASE
754 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
755 | // 0/1 or TValue * (metamethod) returned in eax (RC).
756 |3:
757 | mov BASE, L:RB->base
758 | cmp RC, 1
759 | ja ->vmeta_binop
760 |4:
761 | lea PC, [PC+4]
762 | jb >6
763 |5:
764 | movzx RD, PC_RD
765 | branchPC RD
766 |6:
767 | ins_next
768 |
769 |->cont_condt: // BASE = base, RC = result
770 | add PC, 4
771 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true.
772 | jb <5
773 | jmp <6
774 |
775 |->cont_condf: // BASE = base, RC = result
776 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false.
777 | jmp <4
778 |
779 |->vmeta_equal:
780 | mov ARG4, RB
781 | mov L:RB, SAVE_L
782 | sub PC, 4
783 | mov ARG3, RD
784 | mov ARG2, RA
785 | mov ARG1, L:RB
786 | mov SAVE_PC, PC
787 | mov L:RB->base, BASE
788 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
789 | // 0/1 or TValue * (metamethod) returned in eax (RC).
790 | jmp <3
791 |
792 |//-- Arithmetic metamethods ---------------------------------------------
793 |
794 |->vmeta_arith_vn:
795 | lea RC, [KBASE+RC*8]
796 | jmp >1
797 |
798 |->vmeta_arith_nv:
799 | lea RC, [KBASE+RC*8]
800 | lea RB, [BASE+RB*8]
801 | xchg RB, RC
802 | jmp >2
803 |
804 |->vmeta_unm:
805 | lea RC, [BASE+RD*8]
806 | mov RB, RC
807 | jmp >2
808 |
809 |->vmeta_arith_vv:
810 | lea RC, [BASE+RC*8]
811 |1:
812 | lea RB, [BASE+RB*8]
813 |2:
814 | lea RA, [BASE+RA*8]
815 | mov ARG3, RB
816 | mov L:RB, SAVE_L
817 | mov ARG4, RC
818 | movzx RC, PC_OP
819 | mov ARG2, RA
820 | mov ARG5, RC
821 | mov ARG1, L:RB
822 | mov SAVE_PC, PC
823 | mov L:RB->base, BASE
824 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
825 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
826 | mov BASE, L:RB->base
827 | test RC, RC
828 | jz ->cont_nop
829 |
830 | // Call metamethod for binary op.
831 |->vmeta_binop:
832 | // BASE = base, RC = new base, stack = cont/func/o1/o2
833 | mov RA, RC
834 | sub RC, BASE
835 | mov [RA-12], PC // [cont|PC]
836 | lea PC, [RC+FRAME_CONT]
837 | mov LFUNC:RB, [RA-8]
838 | mov NARGS:RC, 3 // 2+1 args for func(o1, o2).
839 | cmp dword [RA-4], LJ_TFUNC
840 | jne ->vmeta_call
841 | jmp aword LFUNC:RB->gate
842 |
843 |->vmeta_len:
844 | lea RD, [BASE+RD*8]
845 | mov L:RB, SAVE_L
846 | mov ARG2, RD
847 | mov ARG1, L:RB
848 | mov SAVE_PC, PC
849 | mov L:RB->base, BASE
850 | call extern lj_meta_len // (lua_State *L, TValue *o)
851 | // TValue * (metamethod) returned in eax (RC).
852 | mov BASE, L:RB->base
853 | jmp ->vmeta_binop // Binop call for compatibility.
854 |
855 |//-- Call metamethod ----------------------------------------------------
856 |
857 |->vmeta_call: // Resolve and call __call metamethod.
858 | // RA = new base, RC = nargs+1, BASE = old base, PC = return
859 | mov ARG4, RA // Save RA, RC for us.
860 | mov ARG5, NARGS:RC
861 | sub RA, 8
862 | lea RC, [RA+NARGS:RC*8]
863 | mov L:RB, SAVE_L
864 | mov ARG2, RA
865 | mov ARG3, RC
866 | mov ARG1, L:RB
867 | mov SAVE_PC, PC
868 | mov L:RB->base, BASE // This is the callers base!
869 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
870 | mov BASE, L:RB->base
871 | mov RA, ARG4
872 | mov NARGS:RC, ARG5
873 | mov LFUNC:RB, [RA-8]
874 | add NARGS:RC, 1
875 | // This is fragile. L->base must not move, KBASE must always be defined.
876 | cmp KBASE, BASE // Continue with CALLT if flag set.
877 | je ->BC_CALLT_Z
878 | jmp aword LFUNC:RB->gate // Otherwise call resolved metamethod.
879 |
880 |//-- Argument coercion for 'for' statement ------------------------------
881 |
882 |->vmeta_for:
883 | mov L:RB, SAVE_L
884 | mov ARG2, RA
885 | mov ARG1, L:RB
886 | mov SAVE_PC, PC
887 | mov L:RB->base, BASE
888 | call extern lj_meta_for // (lua_State *L, StkId base)
889 | mov BASE, L:RB->base
890 | mov RC, [PC-4]
891 | movzx RA, RCH
892 | movzx OP, RCL
893 | shr RC, 16
894 | jmp aword [DISPATCH+OP*4+GG_DISP_STATIC*4] // Retry FORI or JFORI.
895 |
896 |//-----------------------------------------------------------------------
897 |//-- Fast functions -----------------------------------------------------
898 |//-----------------------------------------------------------------------
899 |
900 |.macro .ffunc, name
901 |->ff_ .. name:
902 |.endmacro
903 |
904 |.macro .ffunc_1, name
905 |->ff_ .. name:
906 | cmp NARGS:RC, 1+1; jb ->fff_fallback
907 |.endmacro
908 |
909 |.macro .ffunc_2, name
910 |->ff_ .. name:
911 | cmp NARGS:RC, 2+1; jb ->fff_fallback
912 |.endmacro
913 |
914 |.macro .ffunc_n, name
915 | .ffunc_1 name
916 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
917 | fld qword [RA]
918 |.endmacro
919 |
920 |.macro .ffunc_n, name, op
921 | .ffunc_1 name
922 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
923 | op
924 | fld qword [RA]
925 |.endmacro
926 |
927 |.macro .ffunc_nn, name
928 | .ffunc_2 name
929 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
930 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
931 | fld qword [RA]
932 | fld qword [RA+8]
933 |.endmacro
934 |
935 |.macro .ffunc_nnr, name
936 | .ffunc_2 name
937 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
938 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
939 | fld qword [RA+8]
940 | fld qword [RA]
941 |.endmacro
942 |
943 |// Inlined GC threshold check. Caveat: uses label 1.
944 |.macro ffgccheck
945 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
946 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
947 | jb >1
948 | call ->fff_gcstep
949 |1:
950 |.endmacro
951 |
952 |//-- Base library: checks -----------------------------------------------
953 |
954 |.ffunc_1 assert
955 | mov RB, [RA+4]
956 | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
957 | mov NRESULTS, RD
958 | mov [RA-4], RB
959 | mov RB, [RA]
960 | mov [RA-8], RB
961 | sub RD, 2
962 | jz >2
963 | mov ARG1, RA
964 |1:
965 | add RA, 8
966 | mov RB, [RA+4]
967 | mov [RA-4], RB
968 | mov RB, [RA]
969 | mov [RA-8], RB
970 | sub RD, 1
971 | jnz <1
972 | mov RA, ARG1
973 |2:
974 | mov RD, NRESULTS
975 | jmp ->fff_res_
976 |
977 |.ffunc_1 type
978 | mov RB, [RA+4]
979 | mov RC, ~LJ_TNUMX
980 | not RB
981 | cmp RC, RB
982 ||if (cmov) {
983 | cmova RC, RB
984 ||} else {
985 | jbe >1; mov RC, RB; 1:
986 ||}
987 | mov CFUNC:RB, [RA-8]
988 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
989 | mov dword [RA-4], LJ_TSTR
990 | mov [RA-8], STR:RC
991 | jmp ->fff_res1
992 |
993 |//-- Base library: getters and setters ---------------------------------
994 |
995 |.ffunc_1 getmetatable
996 | mov RB, [RA+4]
997 | cmp RB, LJ_TTAB; jne >6
998 |1: // Field metatable must be at same offset for GCtab and GCudata!
999 | mov TAB:RB, [RA]
1000 | mov TAB:RB, TAB:RB->metatable
1001 |2:
1002 | test TAB:RB, TAB:RB
1003 | mov dword [RA-4], LJ_TNIL
1004 | jz ->fff_res1
1005 | mov CFUNC:RC, [RA-8]
1006 | mov STR:RC, [DISPATCH+DISPATCH_GL(mmname)+4*MM_metatable]
1007 | mov dword [RA-4], LJ_TTAB // Store metatable as default result.
1008 | mov [RA-8], TAB:RB
1009 | mov ARG1, RA // Save result pointer.
1010 | mov RA, TAB:RB->hmask
1011 | and RA, STR:RC->hash
1012 | imul RA, #NODE
1013 | add NODE:RA, TAB:RB->node
1014 |3: // Rearranged logic, because we expect _not_ to find the key.
1015 | cmp dword NODE:RA->key.it, LJ_TSTR
1016 | jne >4
1017 | cmp dword NODE:RA->key.gcr, STR:RC
1018 | je >5
1019 |4:
1020 | mov NODE:RA, NODE:RA->next
1021 | test NODE:RA, NODE:RA
1022 | jnz <3
1023 | jmp ->fff_res1 // Not found, keep default result.
1024 |5:
1025 | mov RB, [RA+4]
1026 | cmp RB, LJ_TNIL; je ->fff_res1 // Dito for nil value.
1027 | mov RC, [RA]
1028 | mov RA, ARG1 // Restore result pointer.
1029 | mov [RA-4], RB // Return value of mt.__metatable.
1030 | mov [RA-8], RC
1031 | jmp ->fff_res1
1032 |
1033 |6:
1034 | cmp RB, LJ_TUDATA; je <1
1035 | cmp RB, LJ_TISNUM; ja >7
1036 | mov RB, LJ_TNUMX
1037 |7:
1038 | not RB
1039 | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(basemt)]
1040 | jmp <2
1041 |
1042 |.ffunc_2 setmetatable
1043 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1044 | // Fast path: no mt for table yet and not clearing the mt.
1045 | mov TAB:RB, [RA]
1046 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
1047 | cmp dword [RA+12], LJ_TTAB; jne ->fff_fallback
1048 | mov TAB:RC, [RA+8]
1049 | mov TAB:RB->metatable, TAB:RC
1050 | mov dword [RA-4], LJ_TTAB // Return original table.
1051 | mov [RA-8], TAB:RB
1052 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1053 | jz >1
1054 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1055 | barrierback TAB:RB, RC
1056 |1:
1057 | jmp ->fff_res1
1058 |
1059 |.ffunc_2 rawget
1060 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1061 | mov TAB:RC, [RA]
1062 | mov L:RB, SAVE_L
1063 | mov ARG2, TAB:RC
1064 | mov ARG1, L:RB
1065 | mov RB, RA
1066 | mov ARG4, BASE // Save BASE and RA.
1067 | add RA, 8
1068 | mov ARG3, RA
1069 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1070 | // cTValue * returned in eax (RC).
1071 | mov RA, RB
1072 | mov BASE, ARG4
1073 | mov RB, [RC] // Copy table slot.
1074 | mov RC, [RC+4]
1075 | mov [RA-8], RB
1076 | mov [RA-4], RC
1077 | jmp ->fff_res1
1078 |
1079 |//-- Base library: conversions ------------------------------------------
1080 |
1081 |.ffunc tonumber
1082 | // Only handles the number case inline (without a base argument).
1083 | cmp NARGS:RC, 1+1; jne ->fff_fallback // Exactly one argument.
1084 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
1085 | fld qword [RA]
1086 | jmp ->fff_resn
1087 |
1088 |.ffunc_1 tostring
1089 | // Only handles the string or number case inline.
1090 | cmp dword [RA+4], LJ_TSTR; jne >3
1091 | // A __tostring method in the string base metatable is ignored.
1092 | mov STR:RC, [RA]
1093 |2:
1094 | mov dword [RA-4], LJ_TSTR
1095 | mov [RA-8], STR:RC
1096 | jmp ->fff_res1
1097 |3: // Handle numbers inline, unless a number base metatable is present.
1098 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
1099 | cmp dword [DISPATCH+DISPATCH_GL(basemt)+4*(~LJ_TNUMX)], 0
1100 | jne ->fff_fallback
1101 | ffgccheck // Caveat: uses label 1.
1102 | mov L:RB, SAVE_L
1103 | mov ARG1, L:RB
1104 | mov ARG2, RA
1105 | mov L:RB->base, RA // Add frame since C call can throw.
1106 | mov [RA-4], PC
1107 | mov SAVE_PC, PC // Redundant (but a defined value).
1108 | mov ARG3, BASE // Save BASE.
1109 | call extern lj_str_fromnum // (lua_State *L, lua_Number *np)
1110 | // GCstr returned in eax (RC).
1111 | mov RA, L:RB->base
1112 | mov BASE, ARG3
1113 | jmp <2
1114 |
1115 |//-- Base library: iterators -------------------------------------------
1116 |
1117 |.ffunc_1 next
1118 | je >2 // Missing 2nd arg?
1119 |1:
1120 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1121 | mov TAB:RB, [RA]
1122 | mov ARG2, TAB:RB
1123 | mov L:RB, SAVE_L
1124 | mov ARG1, L:RB
1125 | mov L:RB->base, RA // Add frame since C call can throw.
1126 | mov [RA-4], PC
1127 | mov SAVE_PC, PC // Redundant (but a defined value).
1128 | mov ARG4, BASE // Save BASE.
1129 | add RA, 8
1130 | mov ARG3, RA
1131 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1132 | // Flag returned in eax (RC).
1133 | mov RA, L:RB->base
1134 | mov BASE, ARG4
1135 | test RC, RC; jz >3 // End of traversal?
1136 | mov RB, [RA+8] // Copy key and value to results.
1137 | mov RC, [RA+12]
1138 | mov [RA-8], RB
1139 | mov [RA-4], RC
1140 | mov RB, [RA+16]
1141 | mov RC, [RA+20]
1142 | mov [RA], RB
1143 | mov [RA+4], RC
1144 |->fff_res2:
1145 | mov RD, 1+2
1146 | jmp ->fff_res
1147 |2: // Set missing 2nd arg to nil.
1148 | mov dword [RA+12], LJ_TNIL
1149 | jmp <1
1150 |3: // End of traversal: return nil.
1151 | mov dword [RA-4], LJ_TNIL
1152 | jmp ->fff_res1
1153 |
1154 |.ffunc_1 pairs
1155 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1156 | mov CFUNC:RC, CFUNC:RB->upvalue[0]
1157 | mov dword [RA-4], LJ_TFUNC
1158 | mov [RA-8], CFUNC:RC
1159 | mov dword [RA+12], LJ_TNIL
1160 | mov RD, 1+3
1161 | jmp ->fff_res
1162 |
1163 |.ffunc_1 ipairs_aux
1164 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1165 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
1166 | fld qword [RA+8]
1167 | fld1
1168 | faddp st1
1169 | fist ARG2
1170 | fstp qword [RA-8]
1171 | mov TAB:RB, [RA]
1172 | mov RC, ARG2
1173 | cmp RC, TAB:RB->asize; jae >2 // Not in array part?
1174 | shl RC, 3
1175 | add RC, TAB:RB->array
1176 |1:
1177 | cmp dword [RC+4], LJ_TNIL; je ->fff_res0
1178 | mov RB, [RC] // Copy array slot.
1179 | mov RC, [RC+4]
1180 | mov [RA], RB
1181 | mov [RA+4], RC
1182 | jmp ->fff_res2
1183 |2: // Check for empty hash part first. Otherwise call C function.
1184 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1185 | mov ARG1, TAB:RB
1186 | mov ARG3, BASE // Save BASE and RA.
1187 | mov RB, RA
1188 | call extern lj_tab_getinth // (GCtab *t, int32_t key)
1189 | // cTValue * or NULL returned in eax (RC).
1190 | mov RA, RB
1191 | mov BASE, ARG3
1192 | test RC, RC
1193 | jnz <1
1194 |->fff_res0:
1195 | mov RD, 1+0
1196 | jmp ->fff_res
1197 |
1198 |.ffunc_1 ipairs
1199 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1200 | mov CFUNC:RC, CFUNC:RB->upvalue[0]
1201 | mov dword [RA-4], LJ_TFUNC
1202 | mov [RA-8], CFUNC:RC
1203 | fldz
1204 | fstp qword [RA+8]
1205 | mov RD, 1+3
1206 | jmp ->fff_res
1207 |
1208 |//-- Base library: catch errors ----------------------------------------
1209 |
1210 |.ffunc_1 pcall
1211 | mov [RA-4], PC
1212 | mov PC, 8+FRAME_PCALL
1213 | mov BASE, RA
1214 | add RA, 8
1215 | sub NARGS:RC, 1
1216 | mov LFUNC:RB, [RA-8]
1217 |1:
1218 | test byte [DISPATCH+DISPATCH_GL(hookmask)], HOOK_ACTIVE
1219 | jnz >3 // Hook active before pcall?
1220 |2:
1221 | cmp dword [RA-4], LJ_TFUNC
1222 | jne ->vmeta_call // Ensure KBASE defined and != BASE.
1223 | jmp aword LFUNC:RB->gate
1224 |3:
1225 | add PC, 1 // Use FRAME_PCALLH if hook was active.
1226 | jmp <2
1227 |
1228 |.ffunc_2 xpcall
1229 | cmp dword [RA+12], LJ_TFUNC; jne ->fff_fallback
1230 | mov [RA-4], PC
1231 | mov RB, [RA+4] // Swap function and traceback.
1232 | mov [RA+12], RB
1233 | mov dword [RA+4], LJ_TFUNC
1234 | mov LFUNC:RB, [RA]
1235 | mov PC, [RA+8]
1236 | mov [RA+8], LFUNC:RB
1237 | mov [RA], PC
1238 | mov PC, 2*8+FRAME_PCALL
1239 | mov BASE, RA
1240 | add RA, 2*8
1241 | sub NARGS:RC, 2
1242 | jmp <1
1243 |
1244 |//-- Coroutine library --------------------------------------------------
1245 |
1246 |.macro coroutine_resume_wrap, resume
1247 |9: // Need to restore PC for fallback handler.
1248 | mov PC, SAVE_PC
1249 | jmp ->fff_fallback
1250 |
1251 |.if resume
1252 |.ffunc_1 coroutine_resume
1253 | mov L:RB, [RA]
1254 |.else
1255 |.ffunc coroutine_wrap_aux
1256 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1257 |.endif
1258 | mov [RA-4], PC
1259 | mov SAVE_PC, PC
1260 | mov ARG1, L:RB
1261 |.if resume
1262 | cmp dword [RA+4], LJ_TTHREAD; jne <9
1263 |.endif
1264 | cmp aword L:RB->cframe, 0; jne <9
1265 | cmp byte L:RB->status, LUA_YIELD; ja <9
1266 | mov PC, L:RB->top
1267 | mov ARG2, PC
1268 | je >1 // Status != LUA_YIELD (i.e. 0)?
1269 | cmp PC, L:RB->base; je <9 // Check for presence of initial func.
1270 |1:
1271 |.if resume
1272 | lea PC, [PC+NARGS:RC*8-16] // Check stack space (-1-thread).
1273 |.else
1274 | lea PC, [PC+NARGS:RC*8-8] // Check stack space (-1).
1275 |.endif
1276 | cmp PC, L:RB->maxstack; ja <9
1277 | mov L:RB->top, PC
1278 |
1279 | mov L:RB, SAVE_L
1280 | mov L:RB->base, RA
1281 |.if resume
1282 | add RA, 8 // Keep resumed thread in stack for GC.
1283 |.endif
1284 | mov L:RB->top, RA
1285 | mov RB, ARG2
1286 |.if resume
1287 | lea RA, [RA+NARGS:RC*8-24] // RA = end of source for stack move.
1288 |.else
1289 | lea RA, [RA+NARGS:RC*8-16] // RA = end of source for stack move.
1290 |.endif
1291 | sub RA, PC // Relative to PC.
1292 |
1293 | cmp PC, RB
1294 | je >3
1295 |2: // Move args to coroutine.
1296 | mov RC, [PC+RA+4]
1297 | mov [PC-4], RC
1298 | mov RC, [PC+RA]
1299 | mov [PC-8], RC
1300 | sub PC, 8
1301 | cmp PC, RB
1302 | jne <2
1303 |3:
1304 | xor RA, RA
1305 | mov ARG4, RA
1306 | mov ARG3, RA
1307 | call ->vm_resume // (lua_State *L, StkId base, 0, 0)
1308 | set_vmstate INTERP
1309 |
1310 | mov L:RB, SAVE_L
1311 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1312 | mov BASE, L:RB->base
1313 | cmp eax, LUA_YIELD
1314 | ja >8
1315 |4:
1316 | mov RA, L:PC->base
1317 | mov KBASE, L:PC->top
1318 | mov L:PC->top, RA // Clear coroutine stack.
1319 | mov PC, KBASE
1320 | sub PC, RA
1321 | je >6 // No results?
1322 | lea RD, [BASE+PC]
1323 | shr PC, 3
1324 | cmp RD, L:RB->maxstack
1325 | ja >9 // Need to grow stack?
1326 |
1327 | mov RB, BASE
1328 | sub RB, RA
1329 |5: // Move results from coroutine.
1330 | mov RD, [RA]
1331 | mov [RA+RB], RD
1332 | mov RD, [RA+4]
1333 | mov [RA+RB+4], RD
1334 | add RA, 8
1335 | cmp RA, KBASE
1336 | jne <5
1337 |6:
1338 |.if resume
1339 | lea RD, [PC+2] // nresults+1 = 1 + true + results.
1340 | mov dword [BASE-4], LJ_TTRUE // Prepend true to results.
1341 |.else
1342 | lea RD, [PC+1] // nresults+1 = 1 + results.
1343 |.endif
1344 |7:
1345 | mov PC, SAVE_PC
1346 | mov NRESULTS, RD
1347 |.if resume
1348 | mov RA, -8
1349 |.else
1350 | xor RA, RA
1351 |.endif
1352 | test PC, FRAME_TYPE
1353 | jz ->BC_RET_Z
1354 | jmp ->vm_return
1355 |
1356 |8: // Coroutine returned with error (at co->top-1).
1357 |.if resume
1358 | mov dword [BASE-4], LJ_TFALSE // Prepend false to results.
1359 | mov RA, L:PC->top
1360 | sub RA, 8
1361 | mov L:PC->top, RA // Clear error from coroutine stack.
1362 | mov RD, [RA] // Copy error message.
1363 | mov [BASE], RD
1364 | mov RD, [RA+4]
1365 | mov [BASE+4], RD
1366 | mov RD, 1+2 // nresults+1 = 1 + false + error.
1367 | jmp <7
1368 |.else
1369 | mov ARG2, L:PC
1370 | mov ARG1, L:RB
1371 | call extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co)
1372 | // Error function does not return.
1373 |.endif
1374 |
1375 |9: // Handle stack expansion on return from yield.
1376 | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
1377 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1378 | mov ARG2, PC
1379 | mov ARG1, L:RB
1380 | call extern lj_state_growstack // (lua_State *L, int n)
1381 | mov BASE, L:RB->base
1382 | jmp <4 // Retry the stack move.
1383 |.endmacro
1384 |
1385 | coroutine_resume_wrap 1 // coroutine.resume
1386 | coroutine_resume_wrap 0 // coroutine.wrap
1387 |
1388 |.ffunc coroutine_yield
1389 | mov L:RB, SAVE_L
1390 | mov [RA-4], PC
1391 | test aword L:RB->cframe, CFRAME_CANYIELD
1392 | jz ->fff_fallback
1393 | mov L:RB->base, RA
1394 | lea RC, [RA+NARGS:RC*8-8]
1395 | mov L:RB->top, RC
1396 | xor eax, eax
1397 | mov aword L:RB->cframe, eax
1398 | mov al, LUA_YIELD
1399 | mov byte L:RB->status, al
1400 | jmp ->vm_leave_unw
1401 |
1402 |//-- Math library -------------------------------------------------------
1403 |
1404 |.ffunc_n math_abs
1405 | fabs
1406 | // fallthrough
1407 |->fff_resn:
1408 | fstp qword [RA-8]
1409 |->fff_res1:
1410 | mov RD, 1+1
1411 |->fff_res:
1412 | mov NRESULTS, RD
1413 |->fff_res_:
1414 | test PC, FRAME_TYPE
1415 | jnz >7
1416 |5:
1417 | cmp PC_RB, RDL // More results expected?
1418 | ja >6
1419 | // BASE and KBASE are assumed to be set for the calling frame.
1420 | ins_next
1421 |
1422 |6: // Fill up results with nil.
1423 | mov dword [RA+RD*8-12], LJ_TNIL
1424 | add RD, 1
1425 | jmp <5
1426 |
1427 |7: // Non-standard return case.
1428 | mov BASE, RA
1429 | mov RA, -8 // Results start at BASE+RA = BASE-8.
1430 | jmp ->vm_return
1431 |
1432 |.ffunc_n math_floor; call ->vm_floor; jmp ->fff_resn
1433 |.ffunc_n math_ceil; call ->vm_ceil; jmp ->fff_resn
1434 |
1435 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
1436 |
1437 |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn
1438 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn
1439 |.ffunc_n math_exp; call ->vm_exp; jmp ->fff_resn
1440 |
1441 |.ffunc_n math_sin; fsin; jmp ->fff_resn
1442 |.ffunc_n math_cos; fcos; jmp ->fff_resn
1443 |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn
1444 |
1445 |.ffunc_n math_asin
1446 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
1447 | jmp ->fff_resn
1448 |.ffunc_n math_acos
1449 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
1450 | jmp ->fff_resn
1451 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
1452 |
1453 |.macro math_extern, func
1454 |.ffunc_n math_ .. func
1455 | mov ARG5, RA
1456 | fstp FPARG1
1457 | mov RB, BASE
1458 | call extern func
1459 | mov RA, ARG5
1460 | mov BASE, RB
1461 | jmp ->fff_resn
1462 |.endmacro
1463 |
1464 | math_extern sinh
1465 | math_extern cosh
1466 | math_extern tanh
1467 |
1468 |->ff_math_deg:
1469 |.ffunc_n math_rad; fmul qword CFUNC:RB->upvalue[0]; jmp ->fff_resn
1470 |
1471 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
1472 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
1473 |
1474 |.ffunc_1 math_frexp
1475 | mov RB, [RA+4]
1476 | cmp RB, LJ_TISNUM; ja ->fff_fallback
1477 | mov RC, [RA]
1478 | mov [RA-4], RB; mov [RA-8], RC
1479 | shl RB, 1; cmp RB, 0xffe00000; jae >3
1480 | or RC, RB; jz >3
1481 | mov RC, 1022
1482 | cmp RB, 0x00200000; jb >4
1483 |1:
1484 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
1485 | mov ARG1, RB; fild ARG1
1486 | mov RB, [RA-4]
1487 | and RB, 0x800fffff // Mask off exponent.
1488 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
1489 | mov [RA-4], RB
1490 |2:
1491 | fstp qword [RA]
1492 | mov RD, 1+2
1493 | jmp ->fff_res
1494 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
1495 | fldz; jmp <2
1496 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
1497 | fld qword [RA]
1498 | mov ARG1, 0x5a800000; fmul ARG1 // x = x*2^54
1499 | fstp qword [RA-8]
1500 | mov RB, [RA-4]; mov RC, 1076; shl RB, 1; jmp <1
1501 |
1502 |.ffunc_n math_modf
1503 | mov RB, [RA+4]
1504 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
1505 | fdup
1506 | call ->vm_trunc
1507 | fsub st1, st0
1508 |1:
1509 | fstp qword [RA-8]; fstp qword [RA]
1510 | mov RC, [RA-4]; mov RB, [RA+4]
1511 | xor RC, RB; js >3 // Need to adjust sign?
1512 |2:
1513 | mov RD, 1+2
1514 | jmp ->fff_res
1515 |3:
1516 | xor RB, 0x80000000; mov [RA+4], RB; jmp <2 // Flip sign of fraction.
1517 |4:
1518 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
1519 |
1520 |.ffunc_nnr math_fmod
1521 |1: ; fprem; fnstsw ax; sahf; jp <1
1522 | fpop1
1523 | jmp ->fff_resn
1524 |
1525 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
1526 |
1527 |.macro math_minmax, name, cmovop, nocmovop
1528 |.ffunc_n name
1529 | mov RB, 2
1530 |1:
1531 | cmp RB, RD; jae ->fff_resn
1532 | cmp dword [RA+RB*8-4], LJ_TISNUM; ja >5
1533 | fld qword [RA+RB*8-8]
1534 ||if (cmov) {
1535 | fucomi st1; cmovop st1; fpop1
1536 ||} else {
1537 | push eax
1538 | fucom st1; fnstsw ax; test ah, 1; nocmovop >2; fxch; 2: ; fpop
1539 | pop eax
1540 ||}
1541 | add RB, 1
1542 | jmp <1
1543 |.endmacro
1544 |
1545 | math_minmax math_min, fcmovnbe, jz
1546 | math_minmax math_max, fcmovbe, jnz
1547 |5:
1548 | fpop; jmp ->fff_fallback
1549 |
1550 |//-- String library -----------------------------------------------------
1551 |
1552 |.ffunc_1 string_len
1553 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1554 | mov STR:RB, [RA]
1555 | fild dword STR:RB->len
1556 | jmp ->fff_resn
1557 |
1558 |.ffunc string_byte // Only handle the 1-arg case here.
1559 | cmp NARGS:RC, 1+1; jne ->fff_fallback
1560 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1561 | mov STR:RB, [RA]
1562 | cmp dword STR:RB->len, 1
1563 | jb ->fff_res0 // Return no results for empty string.
1564 | movzx RB, byte STR:RB[1]
1565 | mov ARG1, RB
1566 | fild ARG1
1567 | jmp ->fff_resn
1568 |
1569 |.ffunc string_char // Only handle the 1-arg case here.
1570 | ffgccheck
1571 | cmp NARGS:RC, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
1572 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback
1573 | fld qword [RA]
1574 | fistp ARG4
1575 | cmp ARG4, 255; ja ->fff_fallback
1576 | lea RC, ARG4 // Little-endian.
1577 | mov ARG5, RA // Save RA.
1578 | mov ARG3, 1
1579 | mov ARG2, RC
1580 |->fff_newstr:
1581 | mov L:RB, SAVE_L
1582 | mov ARG1, L:RB
1583 | mov SAVE_PC, PC
1584 | mov L:RB->base, BASE
1585 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
1586 | // GCstr * returned in eax (RC).
1587 | mov RA, ARG5
1588 | mov BASE, L:RB->base
1589 | mov dword [RA-4], LJ_TSTR
1590 | mov [RA-8], STR:RC
1591 | jmp ->fff_res1
1592 |
1593 |.ffunc string_sub
1594 | ffgccheck
1595 | mov ARG5, RA // Save RA.
1596 | mov ARG4, -1
1597 | cmp NARGS:RC, 1+2; jb ->fff_fallback
1598 | jna >1
1599 | cmp dword [RA+20], LJ_TISNUM; ja ->fff_fallback
1600 | fld qword [RA+16]
1601 | fistp ARG4
1602 |1:
1603 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1604 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
1605 | mov STR:RB, [RA]
1606 | mov ARG2, STR:RB
1607 | mov RB, STR:RB->len
1608 | fld qword [RA+8]
1609 | fistp ARG3
1610 | mov RC, ARG4
1611 | cmp RB, RC // len < end? (unsigned compare)
1612 | jb >5
1613 |2:
1614 | mov RA, ARG3
1615 | test RA, RA // start <= 0?
1616 | jle >7
1617 |3:
1618 | mov STR:RB, ARG2
1619 | sub RC, RA // start > end?
1620 | jl ->fff_emptystr
1621 | lea RB, [STR:RB+RA+#STR-1]
1622 | add RC, 1
1623 |4:
1624 | mov ARG2, RB
1625 | mov ARG3, RC
1626 | jmp ->fff_newstr
1627 |
1628 |5: // Negative end or overflow.
1629 | jl >6
1630 | lea RC, [RC+RB+1] // end = end+(len+1)
1631 | jmp <2
1632 |6: // Overflow.
1633 | mov RC, RB // end = len
1634 | jmp <2
1635 |
1636 |7: // Negative start or underflow.
1637 | je >8
1638 | add RA, RB // start = start+(len+1)
1639 | add RA, 1
1640 | jg <3 // start > 0?
1641 |8: // Underflow.
1642 | mov RA, 1 // start = 1
1643 | jmp <3
1644 |
1645 |->fff_emptystr: // Range underflow.
1646 | xor RC, RC // Zero length. Any ptr in RB is ok.
1647 | jmp <4
1648 |
1649 |.ffunc_2 string_rep // Only handle the 1-char case inline.
1650 | ffgccheck
1651 | mov ARG5, RA // Save RA.
1652 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1653 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback
1654 | mov STR:RB, [RA]
1655 | fld qword [RA+8]
1656 | fistp ARG4
1657 | mov RC, ARG4
1658 | test RC, RC
1659 | jle ->fff_emptystr // Count <= 0? (or non-int)
1660 | cmp dword STR:RB->len, 1
1661 | jb ->fff_emptystr // Zero length string?
1662 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
1663 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
1664 | movzx RA, byte STR:RB[1]
1665 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
1666 | mov ARG3, RC
1667 | mov ARG2, RB
1668 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
1669 | mov [RB], RAL
1670 | add RB, 1
1671 | sub RC, 1
1672 | jnz <1
1673 | jmp ->fff_newstr
1674 |
1675 |.ffunc_1 string_reverse
1676 | ffgccheck
1677 | mov ARG5, RA // Save RA.
1678 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1679 | mov STR:RB, [RA]
1680 | mov RC, STR:RB->len
1681 | test RC, RC
1682 | jz ->fff_emptystr // Zero length string?
1683 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
1684 | add RB, #STR
1685 | mov ARG4, PC // Need another temp register.
1686 | mov ARG3, RC
1687 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
1688 | mov ARG2, PC
1689 |1:
1690 | movzx RA, byte [RB]
1691 | add RB, 1
1692 | sub RC, 1
1693 | mov [PC+RC], RAL
1694 | jnz <1
1695 | mov PC, ARG4
1696 | jmp ->fff_newstr
1697 |
1698 |.macro ffstring_case, name, lo, hi
1699 | .ffunc_1 name
1700 | ffgccheck
1701 | mov ARG5, RA // Save RA.
1702 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback
1703 | mov STR:RB, [RA]
1704 | mov RC, STR:RB->len
1705 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
1706 | add RB, #STR
1707 | mov ARG4, PC // Need another temp register.
1708 | mov ARG3, RC
1709 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
1710 | mov ARG2, PC
1711 | jmp >3
1712 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
1713 | movzx RA, byte [RB+RC]
1714 | cmp RA, lo
1715 | jb >2
1716 | cmp RA, hi
1717 | ja >2
1718 | xor RA, 0x20
1719 |2:
1720 | mov [PC+RC], RAL
1721 |3:
1722 | sub RC, 1
1723 | jns <1
1724 | mov PC, ARG4
1725 | jmp ->fff_newstr
1726 |.endmacro
1727 |
1728 |ffstring_case string_lower, 0x41, 0x5a
1729 |ffstring_case string_upper, 0x61, 0x7a
1730 |
1731 |//-- Table library ------------------------------------------------------
1732 |
1733 |.ffunc_1 table_getn
1734 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback
1735 | mov TAB:RB, [RA]
1736 | mov ARG1, TAB:RB
1737 | mov RB, RA // Save RA and BASE.
1738 | mov ARG2, BASE
1739 | call extern lj_tab_len // (GCtab *t)
1740 | // Length of table returned in eax (RC).
1741 | mov ARG1, RC
1742 | mov RA, RB // Restore RA and BASE.
1743 | mov BASE, ARG2
1744 | fild ARG1
1745 | jmp ->fff_resn
1746 |
1747 |//-- Bit library --------------------------------------------------------
1748 |
1749 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
1750 |
1751 |.ffunc_n bit_tobit
1752 | mov ARG5, TOBIT_BIAS
1753 | fadd ARG5
1754 | fstp FPARG1 // 64 bit FP store.
1755 | fild ARG1 // 32 bit integer load (s2lfwd ok).
1756 | jmp ->fff_resn
1757 |
1758 |.macro .ffunc_bit, name
1759 | .ffunc_n name
1760 | mov ARG5, TOBIT_BIAS
1761 | fadd ARG5
1762 | fstp FPARG1
1763 | mov RB, ARG1
1764 |.endmacro
1765 |
1766 |.macro .ffunc_bit_op, name, ins
1767 | .ffunc_bit name
1768 | mov NRESULTS, NARGS:RC // Save for fallback.
1769 | lea RC, [RA+NARGS:RC*8-16]
1770 |1:
1771 | cmp RC, RA
1772 | jbe ->fff_resbit
1773 | cmp dword [RC+4], LJ_TISNUM; ja ->fff_fallback_bit_op
1774 | fld qword [RC]
1775 | fadd ARG5
1776 | fstp FPARG1
1777 | ins RB, ARG1
1778 | sub RC, 8
1779 | jmp <1
1780 |.endmacro
1781 |
1782 |.ffunc_bit_op bit_band, and
1783 |.ffunc_bit_op bit_bor, or
1784 |.ffunc_bit_op bit_bxor, xor
1785 |
1786 |.ffunc_bit bit_bswap
1787 | bswap RB
1788 | jmp ->fff_resbit
1789 |
1790 |.ffunc_bit bit_bnot
1791 | not RB
1792 |->fff_resbit:
1793 | mov ARG1, RB
1794 | fild ARG1
1795 | jmp ->fff_resn
1796 |
1797 |->fff_fallback_bit_op:
1798 | mov NARGS:RC, NRESULTS // Restore for fallback
1799 | jmp ->fff_fallback
1800 |
1801 |.macro .ffunc_bit_sh, name, ins
1802 | .ffunc_nn name
1803 | mov ARG5, TOBIT_BIAS
1804 | fadd ARG5
1805 | fstp FPARG3
1806 | fadd ARG5
1807 | fstp FPARG1
1808 | mov RC, RA // Assumes RA is ecx.
1809 | mov RA, ARG3
1810 | mov RB, ARG1
1811 | ins RB, cl
1812 | mov RA, RC
1813 | jmp ->fff_resbit
1814 |.endmacro
1815 |
1816 |.ffunc_bit_sh bit_lshift, shl
1817 |.ffunc_bit_sh bit_rshift, shr
1818 |.ffunc_bit_sh bit_arshift, sar
1819 |.ffunc_bit_sh bit_rol, rol
1820 |.ffunc_bit_sh bit_ror, ror
1821 |
1822 |//-----------------------------------------------------------------------
1823 |
1824 |->fff_fallback_2:
1825 | mov NARGS:RC, 1+2 // Other args are ignored, anyway.
1826 | jmp ->fff_fallback
1827 |->fff_fallback_1:
1828 | mov NARGS:RC, 1+1 // Other args are ignored, anyway.
1829 |->fff_fallback: // Call fast function fallback handler.
1830 | // RA = new base, RC = nargs+1
1831 | mov L:RB, SAVE_L
1832 | sub BASE, RA
1833 | mov [RA-4], PC
1834 | mov SAVE_PC, PC // Redundant (but a defined value).
1835 | mov ARG3, BASE // Save old BASE (relative).
1836 | mov L:RB->base, RA
1837 | lea RC, [RA+NARGS:RC*8-8]
1838 | mov ARG1, L:RB
1839 | lea BASE, [RC+8*LUA_MINSTACK] // Ensure enough space for handler.
1840 | mov L:RB->top, RC
1841 | mov CFUNC:RA, [RA-8]
1842 | cmp BASE, L:RB->maxstack
1843 | ja >5 // Need to grow stack.
1844 | call aword CFUNC:RA->f // (lua_State *L)
1845 | // Either throws an error or recovers and returns 0 or NRESULTS (+1).
1846 | test RC, RC; jnz >3
1847 |1: // Returned 0: retry fast path.
1848 | mov RA, L:RB->base
1849 | mov RC, L:RB->top
1850 | sub RC, RA
1851 | shr RC, 3
1852 | add NARGS:RC, 1
1853 | mov LFUNC:RB, [RA-8]
1854 | mov BASE, ARG3 // Restore old BASE.
1855 | add BASE, RA
1856 | cmp [RA-4], PC; jne >2 // Callable modified by handler?
1857 | jmp aword LFUNC:RB->gate // Retry the call.
1858 |
1859 |2: // Run modified callable.
1860 | cmp dword [RA-4], LJ_TFUNC
1861 | jne ->vmeta_call
1862 | jmp aword LFUNC:RB->gate // Retry the call.
1863 |
1864 |3: // Returned NRESULTS (already in RC/RD).
1865 | mov RA, L:RB->base
1866 | mov BASE, ARG3 // Restore old BASE.
1867 | add BASE, RA
1868 | jmp ->fff_res
1869 |
1870 |5: // Grow stack for fallback handler.
1871 | mov ARG2, LUA_MINSTACK
1872 | call extern lj_state_growstack // (lua_State *L, int n)
1873 | jmp <1 // Dumb retry (goes through ff first).
1874 |
1875 |->fff_gcstep: // Call GC step function.
1876 | // RA = new base, RC = nargs+1
1877 | pop RB // Must keep stack at same level.
1878 | mov ARG3, RB // Save return address
1879 | mov L:RB, SAVE_L
1880 | sub BASE, RA
1881 | mov ARG2, BASE // Save old BASE (relative).
1882 | mov [RA-4], PC
1883 | mov SAVE_PC, PC // Redundant (but a defined value).
1884 | mov L:RB->base, RA
1885 | lea RC, [RA+NARGS:RC*8-8]
1886 | mov ARG1, L:RB
1887 | mov L:RB->top, RC
1888 | call extern lj_gc_step // (lua_State *L)
1889 | mov RA, L:RB->base
1890 | mov RC, L:RB->top
1891 | sub RC, RA
1892 | shr RC, 3
1893 | add NARGS:RC, 1
1894 | mov PC, [RA-4]
1895 | mov BASE, ARG2 // Restore old BASE.
1896 | add BASE, RA
1897 | mov RB, ARG3
1898 | push RB // Restore return address.
1899 | mov LFUNC:RB, [RA-8]
1900 | ret
1901 |
1902 |//-----------------------------------------------------------------------
1903 |//-- Special dispatch targets -------------------------------------------
1904 |//-----------------------------------------------------------------------
1905 |
1906 |->vm_record: // Dispatch target for recording phase.
1907#if LJ_HASJIT
1908 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
1909 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
1910 | jnz >5
1911 | // Decrement the hookcount for consistency, but always do the call.
1912 | test RDL, HOOK_ACTIVE
1913 | jnz >1
1914 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
1915 | jz >1
1916 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
1917 | jmp >1
1918#endif
1919 |
1920 |->vm_hook: // Dispatch target with enabled hooks.
1921 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
1922 | test RDL, HOOK_ACTIVE // Hook already active?
1923 | jnz >5
1924 |
1925 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
1926 | jz >5
1927 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
1928 | jz >1
1929 | test RDL, LUA_MASKLINE
1930 | jz >5
1931 |1:
1932 | mov L:RB, SAVE_L
1933 | mov RD, NRESULTS // Dynamic top for *M instructions.
1934 | mov ARG3, RD
1935 | mov L:RB->base, BASE
1936 | mov ARG2, PC
1937 | mov ARG1, L:RB
1938 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
1939 | call extern lj_dispatch_ins // (lua_State *L, BCIns *pc, int nres)
1940 |4:
1941 | mov BASE, L:RB->base
1942 | movzx RA, PC_RA
1943 |5:
1944 | movzx OP, PC_OP
1945 | movzx RD, PC_RD
1946 | jmp aword [DISPATCH+OP*4+GG_DISP_STATIC*4] // Re-dispatch to static ins.
1947 |
1948 |->vm_hotloop: // Hot loop counter underflow.
1949#if LJ_HASJIT
1950 | mov L:RB, SAVE_L
1951 | lea RA, [DISPATCH+GG_DISP2J]
1952 | mov ARG2, PC
1953 | mov ARG1, RA
1954 | mov [DISPATCH+DISPATCH_J(L)], L:RB
1955 | mov SAVE_PC, PC
1956 | mov L:RB->base, BASE
1957 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
1958 | jmp <4
1959#endif
1960 |
1961 |->vm_hotcall: // Hot call counter underflow.
1962#if LJ_HASJIT
1963 | mov L:RB, SAVE_L
1964 | lea RA, [DISPATCH+GG_DISP2J]
1965 | mov ARG2, PC
1966 | mov ARG1, RA
1967 | mov [DISPATCH+DISPATCH_J(L)], L:RB
1968 | mov SAVE_PC, PC
1969 | mov L:RB->base, BASE
1970 | call extern lj_trace_hot // (jit_State *J, const BCIns *pc)
1971 | mov BASE, L:RB->base
1972 | // Dispatch the first instruction and optionally record it.
1973 | ins_next
1974#endif
1975 |
1976 |//-----------------------------------------------------------------------
1977 |//-- Trace exit handler -------------------------------------------------
1978 |//-----------------------------------------------------------------------
1979 |
1980 |// Called from an exit stub with the exit number on the stack.
1981 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
1982 |->vm_exit_handler:
1983#if LJ_HASJIT
1984 | push ebp; lea ebp, [esp+12]; push ebp
1985 | push ebx; push edx; push ecx; push eax
1986 | movzx RC, byte [ebp-4] // Reconstruct exit number.
1987 | mov RCH, byte [ebp-8]
1988 | mov [ebp-4], edi; mov [ebp-8], esi
1989 | // Caveat: DISPATCH is ebx.
1990 | mov DISPATCH, [ebp]
1991 | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
1992 | set_vmstate EXIT
1993 | mov [DISPATCH+DISPATCH_J(exitno)], RC
1994 | mov [DISPATCH+DISPATCH_J(parent)], RA
1995 | sub esp, 8*8+16 // Room for SSE regs + args.
1996 |
1997 | // Must not access SSE regs if SSE2 is not present.
1998 | test dword [DISPATCH+DISPATCH_J(flags)], JIT_F_SSE2
1999 | jz >1
2000 | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
2001 | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
2002 | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
2003 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2004 |1:
2005 | // Caveat: RB is ebp.
2006 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
2007 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2008 | mov [DISPATCH+DISPATCH_J(L)], L:RB
2009 | lea RC, [esp+16]
2010 | mov L:RB->base, BASE
2011 | lea RA, [DISPATCH+GG_DISP2J]
2012 | mov ARG2, RC
2013 | mov ARG1, RA
2014 | call extern lj_trace_exit // (jit_State *J, ExitState *ex)
2015 | // Interpreter C frame returned in eax.
2016 | mov esp, eax // Reposition stack to C frame.
2017 | mov BASE, L:RB->base
2018 | mov PC, SAVE_PC
2019 | mov SAVE_L, L:RB // Needed for on-trace resume/yield.
2020#endif
2021 |->vm_exit_interp:
2022#if LJ_HASJIT
2023 | mov LFUNC:KBASE, [BASE-8]
2024 | mov PROTO:KBASE, LFUNC:KBASE->pt
2025 | mov KBASE, PROTO:KBASE->k
2026 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2027 | set_vmstate INTERP
2028 | ins_next
2029#endif
2030 |
2031 |//-----------------------------------------------------------------------
2032 |//-- Math helper functions ----------------------------------------------
2033 |//-----------------------------------------------------------------------
2034 |
2035 |// FP value rounding. Called by math.floor/math.ceil fast functions
2036 |// and from JIT code. Arg/ret on x87 stack. No int/xmm registers modified.
2037 |.macro vm_round, mode1, mode2
2038 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
2039 | mov [esp+8], eax
2040 | mov ax, mode1
2041 | or ax, [esp+4]
2042 |.if mode2 ~= 0xffff
2043 | and ax, mode2
2044 |.endif
2045 | mov [esp+6], ax
2046 | fldcw word [esp+6]
2047 | frndint
2048 | fldcw word [esp+4]
2049 | mov eax, [esp+8]
2050 | ret
2051 |.endmacro
2052 |
2053 |->vm_floor:
2054 | vm_round 0x0400, 0xf7ff
2055 |
2056 |->vm_ceil:
2057 | vm_round 0x0800, 0xfbff
2058 |
2059 |->vm_trunc:
2060 | vm_round 0x0c00, 0xffff
2061 |
2062 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
2063 |// Args/ret on x87 stack (y on top). No xmm registers modified.
2064 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
2065 |->vm_mod:
2066 | fld st1
2067 | fdiv st1
2068 | fnstcw word [esp+4]
2069 | mov ax, 0x0400
2070 | or ax, [esp+4]
2071 | and ax, 0xf7ff
2072 | mov [esp+6], ax
2073 | fldcw word [esp+6]
2074 | frndint
2075 | fldcw word [esp+4]
2076 | fmulp st1
2077 | fsubp st1
2078 | ret
2079 |
2080 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
2081 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
2082 |// Caveat: needs 3 slots on x87 stack!
2083 |->vm_exp:
2084 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
2085 |->vm_exp2:
2086 | fst dword [esp+4] // Caveat: overwrites ARG1.
2087 | cmp dword [esp+4], 0x7f800000; je >1 // Special case: e^+Inf = +Inf
2088 | cmp dword [esp+4], 0xff800000; je >2 // Special case: e^-Inf = 0
2089 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
2090 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
2091 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
2092 |1:
2093 | ret
2094 |2:
2095 | fpop; fldz; ret
2096 |
2097 |// Generic power function x^y. Called by BC_POW, math.pow fast function
2098 |// and vm_arith. Args/ret on x87 stack (y on top). No int/xmm regs modified.
2099 |// Caveat: needs 3 slots on x87 stack!
2100 |->vm_pow:
2101 | fist dword [esp+4] // Store/reload int before comparison.
2102 | fild dword [esp+4] // Integral exponent used in vm_powi.
2103 ||if (cmov) {
2104 | fucomip st1
2105 ||} else {
2106 | push eax; fucomp st1; fnstsw ax; sahf; pop eax
2107 ||}
2108 | jnz >8 // Branch for FP exponents.
2109 | jp >9 // Branch for NaN exponent.
2110 | fpop // Pop y and fallthrough to vm_powi.
2111 |
2112 |// FP/int power function x^i. Called from JIT code. Arg1/ret on x87 stack.
2113 |// Arg2 (int) on C stack. No int/xmm regs modified.
2114 |// Caveat: needs 2 slots on x87 stack!
2115 |->vm_powi:
2116 | push eax
2117 | mov eax, [esp+8]
2118 | cmp eax, 1; jle >6 // i<=1?
2119 | // Now 1 < (unsigned)i <= 0x80000000.
2120 |1: // Handle leading zeros.
2121 | test eax, 1; jnz >2
2122 | fmul st0
2123 | shr eax, 1
2124 | jmp <1
2125 |2:
2126 | shr eax, 1; jz >5
2127 | fdup
2128 |3: // Handle trailing bits.
2129 | fmul st0
2130 | shr eax, 1; jz >4
2131 | jnc <3
2132 | fmul st1, st0
2133 | jmp <3
2134 |4:
2135 | fmulp st1
2136 |5:
2137 | pop eax
2138 | ret
2139 |6:
2140 | je <5 // x^1 ==> x
2141 | jb >7
2142 | fld1; fdivrp st1
2143 | neg eax
2144 | cmp eax, 1; je <5 // x^-1 ==> 1/x
2145 | jmp <1 // x^-i ==> (1/x)^i
2146 |7:
2147 | fpop; fld1 // x^0 ==> 1
2148 | pop eax
2149 | ret
2150 |
2151 |8: // FP/FP power function x^y.
2152 | push eax
2153 | fst dword [esp+8]
2154 | fxch
2155 | fst dword [esp+12]
2156 | mov eax, [esp+8]; shl eax, 1
2157 | cmp eax, 0xff000000; je >2 // x^+-Inf?
2158 | mov eax, [esp+12]; shl eax, 1; je >4 // +-0^y?
2159 | cmp eax, 0xff000000; je >4 // +-Inf^y?
2160 | pop eax
2161 | fyl2x
2162 | jmp ->vm_exp2raw
2163 |
2164 |9: // Handle x^NaN.
2165 | fld1
2166 ||if (cmov) {
2167 | fucomip st2
2168 ||} else {
2169 | push eax; fucomp st2; fnstsw ax; sahf; pop eax
2170 ||}
2171 | je >1 // 1^NaN ==> 1
2172 | fxch // x^NaN ==> NaN
2173 |1:
2174 | fpop
2175 | ret
2176 |
2177 |2: // Handle x^+-Inf.
2178 | fabs
2179 | fld1
2180 ||if (cmov) {
2181 | fucomip st1
2182 ||} else {
2183 | fucomp st1; fnstsw ax; sahf
2184 ||}
2185 | je >3 // +-1^+-Inf ==> 1
2186 | fpop; fabs; fldz; mov eax, 0; setc al
2187 | ror eax, 1; xor eax, [esp+8]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
2188 | fxch
2189 |3:
2190 | fpop1; fabs; pop eax
2191 | ret
2192 |
2193 |4: // Handle +-0^y or +-Inf^y.
2194 | cmp dword [esp+8], 0; jge <3 // y >= 0, x^y ==> |x|
2195 | fpop; fpop
2196 | test eax, eax; pop eax; jz >5 // y < 0, +-0^y ==> +Inf
2197 | fldz // y < 0, +-Inf^y ==> 0
2198 | ret
2199 |5:
2200 | mov dword [esp+8], 0x7f800000 // Return +Inf.
2201 | fld dword [esp+8]
2202 | ret
2203 |
2204 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
2205 |// Computes fpm(x) for extended math functions. ORDER FPM.
2206 |->vm_foldfpm:
2207 | mov eax, [esp+12]
2208 | fld qword [esp+4]
2209 | cmp eax, 1; jb ->vm_floor; je ->vm_ceil
2210 | cmp eax, 3; jb ->vm_trunc; ja >1
2211 | fsqrt; ret
2212 |1: ; cmp eax, 5; jb ->vm_exp; je ->vm_exp2
2213 | cmp eax, 7; je >1; ja >2
2214 | fldln2; fxch; fyl2x; ret
2215 |1: ; fld1; fxch; fyl2x; ret
2216 |2: ; cmp eax, 9; je >1; ja >2
2217 | fldlg2; fxch; fyl2x; ret
2218 |1: ; fsin; ret
2219 |2: ; cmp eax, 11; je >1; ja >9
2220 | fcos; ret
2221 |1: ; fptan; fpop; ret
2222 |9: ; int3 // Bad fpm.
2223 |
2224 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
2225 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
2226 |// and basic math functions. ORDER ARITH
2227 |->vm_foldarith:
2228 | mov eax, [esp+20]
2229 | fld qword [esp+4]
2230 | fld qword [esp+12]
2231 | cmp eax, 1; je >1; ja >2
2232 | faddp st1; ret
2233 |1: ; fsubp st1; ret
2234 |2: ; cmp eax, 3; je >1; ja >2
2235 | fmulp st1; ret
2236 |1: ; fdivp st1; ret
2237 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
2238 | cmp eax, 7; je >1; ja >2
2239 | fpop; fchs; ret
2240 |1: ; fpop; fabs; ret
2241 |2: ; cmp eax, 9; je >1; ja >2
2242 | fpatan; ret
2243 |1: ; fxch; fscale; fpop1; ret
2244 |2: ; cmp eax, 11; je >1; ja >9
2245 ||if (cmov) {
2246 | fucomi st1; fcmovnbe st1; fpop1; ret
2247 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
2248 ||} else {
2249 | fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret
2250 |1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret
2251 ||}
2252 |9: ; int3 // Bad op.
2253 |
2254 |//-----------------------------------------------------------------------
2255 |//-- Miscellaneous functions --------------------------------------------
2256 |//-----------------------------------------------------------------------
2257 |
2258 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
2259 |->vm_cpuid:
2260 | pushfd
2261 | pop edx
2262 | mov ecx, edx
2263 | xor edx, 0x00200000 // Toggle ID bit in flags.
2264 | push edx
2265 | popfd
2266 | pushfd
2267 | pop edx
2268 | xor eax, eax // Zero means no features supported.
2269 | cmp ecx, edx
2270 | jz >1 // No ID toggle means no CPUID support.
2271 | mov eax, [esp+4] // Argument 1 is function number.
2272 | push edi
2273 | push ebx
2274 | cpuid
2275 | mov edi, [esp+16] // Argument 2 is result area.
2276 | mov [edi], eax
2277 | mov [edi+4], ebx
2278 | mov [edi+8], ecx
2279 | mov [edi+12], edx
2280 | pop ebx
2281 | pop edi
2282 |1:
2283 | ret
2284 |
2285 |//-----------------------------------------------------------------------
2286}
2287
2288/* Generate the code for a single instruction. */
2289static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov)
2290{
2291 int vk = 0;
2292 |// Note: aligning all instructions does not pay off.
2293 |=>defop:
2294
2295 switch (op) {
2296
2297 /* -- Comparison ops ---------------------------------------------------- */
2298
2299 /* Remember: all ops branch for a true comparison, fall through otherwise. */
2300
2301 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
2302 | // RA = src1, RD = src2, JMP with RD = target
2303 | ins_AD
2304 | checknum RA, ->vmeta_comp
2305 | checknum RD, ->vmeta_comp
2306 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
2307 | fld qword [BASE+RD*8]
2308 | add PC, 4
2309 | fcomparepp // eax (RD) modified!
2310 | // Unordered: all of ZF CF PF set, ordered: PF clear.
2311 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
2312 switch (op) {
2313 case BC_ISLT:
2314 | jbe >2
2315 break;
2316 case BC_ISGE:
2317 | ja >2
2318 break;
2319 case BC_ISLE:
2320 | jb >2
2321 break;
2322 case BC_ISGT:
2323 | jae >2
2324 break;
2325 default: break; /* Shut up GCC. */
2326 }
2327 |1:
2328 | movzx RD, PC_RD
2329 | branchPC RD
2330 |2:
2331 | ins_next
2332 break;
2333
2334 case BC_ISEQV: case BC_ISNEV:
2335 vk = op == BC_ISEQV;
2336 | ins_AD // RA = src1, RD = src2, JMP with RD = target
2337 | mov RB, [BASE+RD*8+4]
2338 | add PC, 4
2339 | cmp RB, LJ_TISNUM; ja >5
2340 | checknum RA, >5
2341 | fld qword [BASE+RA*8]
2342 | fld qword [BASE+RD*8]
2343 | fcomparepp // eax (RD) modified!
2344 iseqne_fp:
2345 if (vk) {
2346 | jp >2 // Unordered means not equal.
2347 | jne >2
2348 } else {
2349 | jp >2 // Unordered means not equal.
2350 | je >1
2351 }
2352 iseqne_end:
2353 if (vk) {
2354 |1: // EQ: Branch to the target.
2355 | movzx RD, PC_RD
2356 | branchPC RD
2357 |2: // NE: Fallthrough to next instruction.
2358 } else {
2359 |2: // NE: Branch to the target.
2360 | movzx RD, PC_RD
2361 | branchPC RD
2362 |1: // EQ: Fallthrough to next instruction.
2363 }
2364 | ins_next
2365 |
2366 if (op == BC_ISEQV || op == BC_ISNEV) {
2367 |5: // Either or both types are not numbers.
2368 | checktp RA, RB // Compare types.
2369 | jne <2 // Not the same type?
2370 | cmp RB, LJ_TISPRI
2371 | jae <1 // Same type and primitive type?
2372 |
2373 | // Same types and not a primitive type. Compare GCobj or pvalue.
2374 | mov RA, [BASE+RA*8]
2375 | mov RD, [BASE+RD*8]
2376 | cmp RA, RD
2377 | je <1 // Same GCobjs or pvalues?
2378 | cmp RB, LJ_TISTABUD
2379 | ja <2 // Different objects and not table/ud?
2380 |
2381 | // Different tables or userdatas. Need to check __eq metamethod.
2382 | // Field metatable must be at same offset for GCtab and GCudata!
2383 | mov TAB:RB, TAB:RA->metatable
2384 | test TAB:RB, TAB:RB
2385 | jz <2 // No metatable?
2386 | test byte TAB:RB->nomm, 1<<MM_eq
2387 | jnz <2 // Or 'no __eq' flag set?
2388 if (vk) {
2389 | xor RB, RB // ne = 0
2390 } else {
2391 | mov RB, 1 // ne = 1
2392 }
2393 | jmp ->vmeta_equal // Handle __eq metamethod.
2394 }
2395 break;
2396 case BC_ISEQS: case BC_ISNES:
2397 vk = op == BC_ISEQS;
2398 | ins_AND // RA = src, RD = str const, JMP with RD = target
2399 | add PC, 4
2400 | checkstr RA, >2
2401 | mov RA, [BASE+RA*8]
2402 | cmp RA, [KBASE+RD*4]
2403 iseqne_test:
2404 if (vk) {
2405 | jne >2
2406 } else {
2407 | je >1
2408 }
2409 goto iseqne_end;
2410 case BC_ISEQN: case BC_ISNEN:
2411 vk = op == BC_ISEQN;
2412 | ins_AD // RA = src, RD = num const, JMP with RD = target
2413 | add PC, 4
2414 | checknum RA, >2
2415 | fld qword [BASE+RA*8]
2416 | fld qword [KBASE+RD*8]
2417 | fcomparepp // eax (RD) modified!
2418 goto iseqne_fp;
2419 case BC_ISEQP: case BC_ISNEP:
2420 vk = op == BC_ISEQP;
2421 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
2422 | add PC, 4
2423 | checktp RA, RD
2424 goto iseqne_test;
2425
2426 /* -- Unary test and copy ops ------------------------------------------- */
2427
2428 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
2429 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
2430 | mov RB, [BASE+RD*8+4]
2431 | add PC, 4
2432 | cmp RB, LJ_TISTRUECOND
2433 if (op == BC_IST || op == BC_ISTC) {
2434 | jae >1
2435 } else {
2436 | jb >1
2437 }
2438 if (op == BC_ISTC || op == BC_ISFC) {
2439 | mov [BASE+RA*8+4], RB
2440 | mov RB, [BASE+RD*8]
2441 | mov [BASE+RA*8], RB
2442 }
2443 | movzx RD, PC_RD
2444 | branchPC RD
2445 |1: // Fallthrough to the next instruction.
2446 | ins_next
2447 break;
2448
2449 /* -- Unary ops --------------------------------------------------------- */
2450
2451 case BC_MOV:
2452 | ins_AD // RA = dst, RD = src
2453 | mov RB, [BASE+RD*8+4]
2454 | mov RD, [BASE+RD*8] // Overwrites RD.
2455 | mov [BASE+RA*8+4], RB
2456 | mov [BASE+RA*8], RD
2457 | ins_next_
2458 break;
2459 case BC_NOT:
2460 | ins_AD // RA = dst, RD = src
2461 | xor RB, RB
2462 | checktp RD, LJ_TISTRUECOND
2463 | adc RB, LJ_TTRUE
2464 | mov [BASE+RA*8+4], RB
2465 | ins_next
2466 break;
2467 case BC_UNM:
2468 | ins_AD // RA = dst, RD = src
2469 | checknum RD, ->vmeta_unm
2470 | fld qword [BASE+RD*8]
2471 | fchs
2472 | fstp qword [BASE+RA*8]
2473 | ins_next
2474 break;
2475 case BC_LEN:
2476 | ins_AD // RA = dst, RD = src
2477 | checkstr RD, >2
2478 | mov STR:RD, [BASE+RD*8]
2479 | fild dword STR:RD->len
2480 |1:
2481 | fstp qword [BASE+RA*8]
2482 | ins_next
2483 |2:
2484 | checktab RD, ->vmeta_len
2485 | mov TAB:RD, [BASE+RD*8]
2486 | mov ARG1, TAB:RD
2487 | mov RB, BASE // Save BASE.
2488 | call extern lj_tab_len // (GCtab *t)
2489 | // Length of table returned in eax (RC).
2490 | mov ARG1, RC
2491 | mov BASE, RB // Restore BASE.
2492 | fild ARG1
2493 | movzx RA, PC_RA
2494 | jmp <1
2495 break;
2496
2497 /* -- Binary ops -------------------------------------------------------- */
2498
2499 |.macro ins_arithpre, ins
2500 | ins_ABC
2501 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
2502 ||switch (vk) {
2503 ||case 0:
2504 | checknum RB, ->vmeta_arith_vn
2505 | fld qword [BASE+RB*8]
2506 | ins qword [KBASE+RC*8]
2507 || break;
2508 ||case 1:
2509 | checknum RB, ->vmeta_arith_nv
2510 | fld qword [KBASE+RC*8]
2511 | ins qword [BASE+RB*8]
2512 || break;
2513 ||default:
2514 | checknum RB, ->vmeta_arith_vv
2515 | checknum RC, ->vmeta_arith_vv
2516 | fld qword [BASE+RB*8]
2517 | ins qword [BASE+RC*8]
2518 || break;
2519 ||}
2520 |.endmacro
2521 |
2522 |.macro ins_arith, ins
2523 | ins_arithpre ins
2524 | fstp qword [BASE+RA*8]
2525 | ins_next
2526 |.endmacro
2527
2528 | // RA = dst, RB = src1 or num const, RC = src2 or num const
2529 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
2530 | ins_arith fadd
2531 break;
2532 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
2533 | ins_arith fsub
2534 break;
2535 case BC_MULVN: case BC_MULNV: case BC_MULVV:
2536 | ins_arith fmul
2537 break;
2538 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
2539 | ins_arith fdiv
2540 break;
2541 case BC_MODVN:
2542 | ins_arithpre fld
2543 |->BC_MODVN_Z:
2544 | call ->vm_mod
2545 | fstp qword [BASE+RA*8]
2546 | ins_next
2547 break;
2548 case BC_MODNV: case BC_MODVV:
2549 | ins_arithpre fld
2550 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
2551 break;
2552 case BC_POW:
2553 | ins_arithpre fld
2554 | call ->vm_pow
2555 | fstp qword [BASE+RA*8]
2556 | ins_next
2557 break;
2558
2559 case BC_CAT:
2560 | ins_ABC // RA = dst, RB = src_start, RC = src_end
2561 | lea RA, [BASE+RC*8]
2562 | sub RC, RB
2563 | mov ARG2, RA
2564 | mov ARG3, RC
2565 |->BC_CAT_Z:
2566 | mov L:RB, SAVE_L
2567 | mov ARG1, L:RB
2568 | mov SAVE_PC, PC
2569 | mov L:RB->base, BASE
2570 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
2571 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
2572 | mov BASE, L:RB->base
2573 | test RC, RC
2574 | jnz ->vmeta_binop
2575 | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
2576 | movzx RA, PC_RA
2577 | mov RC, [BASE+RB*8+4]
2578 | mov RB, [BASE+RB*8]
2579 | mov [BASE+RA*8+4], RC
2580 | mov [BASE+RA*8], RB
2581 | ins_next
2582 break;
2583
2584 /* -- Constant ops ------------------------------------------------------ */
2585
2586 case BC_KSTR:
2587 | ins_AND // RA = dst, RD = str const (~)
2588 | mov RD, [KBASE+RD*4]
2589 | mov dword [BASE+RA*8+4], LJ_TSTR
2590 | mov [BASE+RA*8], RD
2591 | ins_next
2592 break;
2593 case BC_KSHORT:
2594 | ins_AD // RA = dst, RD = signed int16 literal
2595 | fild PC_RD // Refetch signed RD from instruction.
2596 | fstp qword [BASE+RA*8]
2597 | ins_next
2598 break;
2599 case BC_KNUM:
2600 | ins_AD // RA = dst, RD = num const
2601 | fld qword [KBASE+RD*8]
2602 | fstp qword [BASE+RA*8]
2603 | ins_next
2604 break;
2605 case BC_KPRI:
2606 | ins_AND // RA = dst, RD = primitive type (~)
2607 | mov [BASE+RA*8+4], RD
2608 | ins_next
2609 break;
2610 case BC_KNIL:
2611 | ins_AD // RA = dst_start, RD = dst_end
2612 | lea RA, [BASE+RA*8+12]
2613 | lea RD, [BASE+RD*8+4]
2614 | mov RB, LJ_TNIL
2615 | mov [RA-8], RB // Sets minimum 2 slots.
2616 |1:
2617 | mov [RA], RB
2618 | add RA, 8
2619 | cmp RA, RD
2620 | jbe <1
2621 | ins_next
2622 break;
2623
2624 /* -- Upvalue and function ops ------------------------------------------ */
2625
2626 case BC_UGET:
2627 | ins_AD // RA = dst, RD = upvalue #
2628 | mov LFUNC:RB, [BASE-8]
2629 | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
2630 | mov RB, UPVAL:RB->v
2631 | mov RD, [RB+4]
2632 | mov RB, [RB]
2633 | mov [BASE+RA*8+4], RD
2634 | mov [BASE+RA*8], RB
2635 | ins_next
2636 break;
2637 case BC_USETV:
2638 | ins_AD // RA = upvalue #, RD = src
2639 | // Really ugly code due to the lack of a 4th free register.
2640 | mov LFUNC:RB, [BASE-8]
2641 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
2642 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
2643 | jnz >4
2644 |1:
2645 | mov RA, [BASE+RD*8]
2646 |2:
2647 | mov RB, UPVAL:RB->v
2648 | mov RD, [BASE+RD*8+4]
2649 | mov [RB], RA
2650 | mov [RB+4], RD
2651 |3:
2652 | ins_next
2653 |
2654 |4: // Upvalue is black. Check if new value is collectable and white.
2655 | mov RA, [BASE+RD*8+4]
2656 | sub RA, LJ_TISGCV
2657 | cmp RA, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
2658 | jbe <1
2659 | mov GCOBJ:RA, [BASE+RD*8]
2660 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
2661 | jz <2
2662 | // Crossed a write barrier. So move the barrier forward.
2663 | mov ARG2, UPVAL:RB
2664 | mov ARG3, GCOBJ:RA
2665 | mov RB, UPVAL:RB->v
2666 | mov RD, [BASE+RD*8+4]
2667 | mov [RB], GCOBJ:RA
2668 | mov [RB+4], RD
2669 |->BC_USETV_Z:
2670 | mov L:RB, SAVE_L
2671 | lea GL:RA, [DISPATCH+GG_DISP2G]
2672 | mov L:RB->base, BASE
2673 | mov ARG1, GL:RA
2674 | call extern lj_gc_barrieruv // (global_State *g, GCobj *o, GCobj *v)
2675 | mov BASE, L:RB->base
2676 | jmp <3
2677 break;
2678 case BC_USETS:
2679 | ins_AND // RA = upvalue #, RD = str const (~)
2680 | mov LFUNC:RB, [BASE-8]
2681 | mov GCOBJ:RD, [KBASE+RD*4]
2682 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
2683 | mov RA, UPVAL:RB->v
2684 | mov dword [RA+4], LJ_TSTR
2685 | mov [RA], GCOBJ:RD
2686 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
2687 | jnz >2
2688 |1:
2689 | ins_next
2690 |
2691 |2: // Upvalue is black. Check if string is white.
2692 | test byte GCOBJ:RD->gch.marked, LJ_GC_WHITES // iswhite(str)
2693 | jz <1
2694 | // Crossed a write barrier. So move the barrier forward.
2695 | mov ARG3, GCOBJ:RD
2696 | mov ARG2, UPVAL:RB
2697 | jmp ->BC_USETV_Z
2698 break;
2699 case BC_USETN:
2700 | ins_AD // RA = upvalue #, RD = num const
2701 | mov LFUNC:RB, [BASE-8]
2702 | fld qword [KBASE+RD*8]
2703 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
2704 | mov RA, UPVAL:RB->v
2705 | fstp qword [RA]
2706 | ins_next
2707 break;
2708 case BC_USETP:
2709 | ins_AND // RA = upvalue #, RD = primitive type (~)
2710 | mov LFUNC:RB, [BASE-8]
2711 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
2712 | mov RA, UPVAL:RB->v
2713 | mov [RA+4], RD
2714 | ins_next
2715 break;
2716 case BC_UCLO:
2717 | ins_AD // RA = level, RD = target
2718 | branchPC RD // Do this first to free RD.
2719 | mov L:RB, SAVE_L
2720 | cmp dword L:RB->openupval, 0
2721 | je >1
2722 | lea RA, [BASE+RA*8]
2723 | mov ARG2, RA
2724 | mov ARG1, L:RB
2725 | mov L:RB->base, BASE
2726 | call extern lj_func_closeuv // (lua_State *L, StkId level)
2727 | mov BASE, L:RB->base
2728 |1:
2729 | ins_next
2730 break;
2731
2732 case BC_FNEW:
2733 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
2734 | mov LFUNC:RA, [BASE-8]
2735 | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *.
2736 | mov L:RB, SAVE_L
2737 | mov ARG3, LFUNC:RA
2738 | mov ARG2, PROTO:RD
2739 | mov SAVE_PC, PC
2740 | mov ARG1, L:RB
2741 | mov L:RB->base, BASE
2742 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
2743 | call extern lj_func_newL_gc
2744 | // GCfuncL * returned in eax (RC).
2745 | mov BASE, L:RB->base
2746 | movzx RA, PC_RA
2747 | mov [BASE+RA*8], LFUNC:RC
2748 | mov dword [BASE+RA*8+4], LJ_TFUNC
2749 | ins_next
2750 break;
2751
2752 /* -- Table ops --------------------------------------------------------- */
2753
2754 case BC_TNEW:
2755 | ins_AD // RA = dst, RD = hbits|asize
2756 | mov RB, RD
2757 | and RD, 0x7ff
2758 | shr RB, 11
2759 | cmp RD, 0x7ff // Turn 0x7ff into 0x801.
2760 | sete RAL
2761 | mov ARG3, RB
2762 | add RD, RA
2763 | mov L:RB, SAVE_L
2764 | add RD, RA
2765 | mov ARG2, RD
2766 | mov SAVE_PC, PC
2767 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
2768 | mov ARG1, L:RB
2769 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
2770 | mov L:RB->base, BASE
2771 | jae >2
2772 |1:
2773 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
2774 | // Table * returned in eax (RC).
2775 | mov BASE, L:RB->base
2776 | movzx RA, PC_RA
2777 | mov [BASE+RA*8], TAB:RC
2778 | mov dword [BASE+RA*8+4], LJ_TTAB
2779 | ins_next
2780 |2:
2781 | call extern lj_gc_step_fixtop // (lua_State *L)
2782 | mov ARG1, L:RB // Args owned by callee. Set it again.
2783 | jmp <1
2784 break;
2785 case BC_TDUP:
2786 | ins_AND // RA = dst, RD = table const (~) (holding template table)
2787 | mov TAB:RD, [KBASE+RD*4]
2788 | mov L:RB, SAVE_L
2789 | mov ARG2, TAB:RD
2790 | mov ARG1, L:RB
2791 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
2792 | mov SAVE_PC, PC
2793 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
2794 | mov L:RB->base, BASE
2795 | jae >3
2796 |2:
2797 | call extern lj_tab_dup // (lua_State *L, Table *kt)
2798 | // Table * returned in eax (RC).
2799 | mov BASE, L:RB->base
2800 | movzx RA, PC_RA
2801 | mov [BASE+RA*8], TAB:RC
2802 | mov dword [BASE+RA*8+4], LJ_TTAB
2803 | ins_next
2804 |3:
2805 | call extern lj_gc_step_fixtop // (lua_State *L)
2806 | mov ARG1, L:RB // Args owned by callee. Set it again.
2807 | jmp <2
2808 break;
2809
2810 case BC_GGET:
2811 | ins_AND // RA = dst, RD = str const (~)
2812 | mov LFUNC:RB, [BASE-8]
2813 | mov TAB:RB, LFUNC:RB->env
2814 | mov STR:RC, [KBASE+RD*4]
2815 | jmp ->BC_TGETS_Z
2816 break;
2817 case BC_GSET:
2818 | ins_AND // RA = src, RD = str const (~)
2819 | mov LFUNC:RB, [BASE-8]
2820 | mov TAB:RB, LFUNC:RB->env
2821 | mov STR:RC, [KBASE+RD*4]
2822 | jmp ->BC_TSETS_Z
2823 break;
2824
2825 case BC_TGETV:
2826 | ins_ABC // RA = dst, RB = table, RC = key
2827 | checktab RB, ->vmeta_tgetv
2828 | mov TAB:RB, [BASE+RB*8]
2829 |
2830 | // Integer key? Convert number to int and back and compare.
2831 | checknum RC, >5
2832 | fld qword [BASE+RC*8]
2833 | fist ARG1
2834 | fild ARG1
2835 | fcomparepp // eax (RC) modified!
2836 | mov RC, ARG1
2837 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
2838 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
2839 | jae ->vmeta_tgetv // Not in array part? Use fallback.
2840 | shl RC, 3
2841 | add RC, TAB:RB->array
2842 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
2843 | je >2
2844 |1:
2845 | mov RB, [RC] // Get array slot.
2846 | mov RC, [RC+4]
2847 | mov [BASE+RA*8], RB
2848 | mov [BASE+RA*8+4], RC
2849 | ins_next
2850 |
2851 |2: // Check for __index if table value is nil.
2852 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
2853 | jz <1
2854 | mov TAB:RA, TAB:RB->metatable
2855 | test byte TAB:RA->nomm, 1<<MM_index
2856 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
2857 | movzx RA, PC_RA // Restore RA.
2858 | jmp <1
2859 |
2860 |5: // String key?
2861 | checkstr RC, ->vmeta_tgetv
2862 | mov STR:RC, [BASE+RC*8]
2863 | jmp ->BC_TGETS_Z
2864 break;
2865 case BC_TGETS:
2866 | ins_ABC // RA = dst, RB = table, RC = str const (~)
2867 | not RC
2868 | mov STR:RC, [KBASE+RC*4]
2869 | checktab RB, ->vmeta_tgets
2870 | mov TAB:RB, [BASE+RB*8]
2871 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
2872 | mov RA, TAB:RB->hmask
2873 | and RA, STR:RC->hash
2874 | imul RA, #NODE
2875 | add NODE:RA, TAB:RB->node
2876 |1:
2877 | cmp dword NODE:RA->key.it, LJ_TSTR
2878 | jne >4
2879 | cmp dword NODE:RA->key.gcr, STR:RC
2880 | jne >4
2881 | // Ok, key found. Assumes: offsetof(Node, val) == 0
2882 | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath.
2883 | je >5 // Key found, but nil value?
2884 | movzx RC, PC_RA
2885 | mov RB, [RA] // Get node value.
2886 | mov RA, [RA+4]
2887 | mov [BASE+RC*8], RB
2888 |2:
2889 | mov [BASE+RC*8+4], RA
2890 | ins_next
2891 |
2892 |3:
2893 | movzx RC, PC_RA
2894 | mov RA, LJ_TNIL
2895 | jmp <2
2896 |
2897 |4: // Follow hash chain.
2898 | mov NODE:RA, NODE:RA->next
2899 | test NODE:RA, NODE:RA
2900 | jnz <1
2901 | // End of hash chain: key not found, nil result.
2902 |
2903 |5: // Check for __index if table value is nil.
2904 | mov TAB:RA, TAB:RB->metatable
2905 | test TAB:RA, TAB:RA
2906 | jz <3 // No metatable: done.
2907 | test byte TAB:RA->nomm, 1<<MM_index
2908 | jnz <3 // 'no __index' flag set: done.
2909 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
2910 break;
2911 case BC_TGETB:
2912 | ins_ABC // RA = dst, RB = table, RC = byte literal
2913 | checktab RB, ->vmeta_tgetb
2914 | mov TAB:RB, [BASE+RB*8]
2915 | cmp RC, TAB:RB->asize
2916 | jae ->vmeta_tgetb
2917 | shl RC, 3
2918 | add RC, TAB:RB->array
2919 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
2920 | je >2
2921 |1:
2922 | mov RB, [RC] // Get array slot.
2923 | mov RC, [RC+4]
2924 | mov [BASE+RA*8], RB
2925 | mov [BASE+RA*8+4], RC
2926 | ins_next
2927 |
2928 |2: // Check for __index if table value is nil.
2929 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
2930 | jz <1
2931 | mov TAB:RA, TAB:RB->metatable
2932 | test byte TAB:RA->nomm, 1<<MM_index
2933 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
2934 | movzx RA, PC_RA // Restore RA.
2935 | jmp <1
2936 break;
2937
2938 case BC_TSETV:
2939 | ins_ABC // RA = src, RB = table, RC = key
2940 | checktab RB, ->vmeta_tsetv
2941 | mov TAB:RB, [BASE+RB*8]
2942 |
2943 | // Integer key? Convert number to int and back and compare.
2944 | checknum RC, >5
2945 | fld qword [BASE+RC*8]
2946 | fist ARG1
2947 | fild ARG1
2948 | fcomparepp // eax (RC) modified!
2949 | mov RC, ARG1
2950 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
2951 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
2952 | jae ->vmeta_tsetv
2953 | shl RC, 3
2954 | add RC, TAB:RB->array
2955 | cmp dword [RC+4], LJ_TNIL
2956 | je >3 // Previous value is nil?
2957 |1:
2958 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
2959 | jnz >7
2960 |2:
2961 | mov RB, [BASE+RA*8+4] // Set array slot.
2962 | mov RA, [BASE+RA*8]
2963 | mov [RC+4], RB
2964 | mov [RC], RA
2965 | ins_next
2966 |
2967 |3: // Check for __newindex if previous value is nil.
2968 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
2969 | jz <1
2970 | mov TAB:RA, TAB:RB->metatable
2971 | test byte TAB:RA->nomm, 1<<MM_newindex
2972 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
2973 | movzx RA, PC_RA // Restore RA.
2974 | jmp <1
2975 |
2976 |5: // String key?
2977 | checkstr RC, ->vmeta_tsetv
2978 | mov STR:RC, [BASE+RC*8]
2979 | jmp ->BC_TSETS_Z
2980 |
2981 |7: // Possible table write barrier for the value. Skip valiswhite check.
2982 | barrierback TAB:RB, RA
2983 | movzx RA, PC_RA // Restore RA.
2984 | jmp <2
2985 break;
2986 case BC_TSETS:
2987 | ins_ABC // RA = src, RB = table, RC = str const (~)
2988 | not RC
2989 | mov STR:RC, [KBASE+RC*4]
2990 | checktab RB, ->vmeta_tsets
2991 | mov TAB:RB, [BASE+RB*8]
2992 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
2993 | mov RA, TAB:RB->hmask
2994 | and RA, STR:RC->hash
2995 | imul RA, #NODE
2996 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
2997 | add NODE:RA, TAB:RB->node
2998 |1:
2999 | cmp dword NODE:RA->key.it, LJ_TSTR
3000 | jne >5
3001 | cmp dword NODE:RA->key.gcr, STR:RC
3002 | jne >5
3003 | // Ok, key found. Assumes: offsetof(Node, val) == 0
3004 | cmp dword [RA+4], LJ_TNIL
3005 | je >4 // Previous value is nil?
3006 |2:
3007 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3008 | jnz >7
3009 |3:
3010 | movzx RC, PC_RA
3011 | mov RB, [BASE+RC*8+4] // Set node value.
3012 | mov RC, [BASE+RC*8]
3013 | mov [RA+4], RB
3014 | mov [RA], RC
3015 | ins_next
3016 |
3017 |4: // Check for __newindex if previous value is nil.
3018 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
3019 | jz <2
3020 | mov ARG1, RA // Save RA.
3021 | mov TAB:RA, TAB:RB->metatable
3022 | test byte TAB:RA->nomm, 1<<MM_newindex
3023 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3024 | mov RA, ARG1 // Restore RA.
3025 | jmp <2
3026 |
3027 |5: // Follow hash chain.
3028 | mov NODE:RA, NODE:RA->next
3029 | test NODE:RA, NODE:RA
3030 | jnz <1
3031 | // End of hash chain: key not found, add a new one.
3032 |
3033 | // But check for __newindex first.
3034 | mov TAB:RA, TAB:RB->metatable
3035 | test TAB:RA, TAB:RA
3036 | jz >6 // No metatable: continue.
3037 | test byte TAB:RA->nomm, 1<<MM_newindex
3038 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
3039 |6:
3040 | mov ARG5, STR:RC
3041 | mov ARG6, LJ_TSTR
3042 | lea RC, ARG5 // Store temp. TValue in ARG5/ARG6.
3043 | mov ARG4, TAB:RB // Save TAB:RB for us.
3044 | mov ARG2, TAB:RB
3045 | mov L:RB, SAVE_L
3046 | mov ARG3, RC
3047 | mov ARG1, L:RB
3048 | mov SAVE_PC, PC
3049 | mov L:RB->base, BASE
3050 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
3051 | // Handles write barrier for the new key. TValue * returned in eax (RC).
3052 | mov BASE, L:RB->base
3053 | mov TAB:RB, ARG4 // Need TAB:RB for barrier.
3054 | mov RA, eax
3055 | jmp <2 // Must check write barrier for value.
3056 |
3057 |7: // Possible table write barrier for the value. Skip valiswhite check.
3058 | barrierback TAB:RB, RC // Destroys STR:RC.
3059 | jmp <3
3060 break;
3061 case BC_TSETB:
3062 | ins_ABC // RA = src, RB = table, RC = byte literal
3063 | checktab RB, ->vmeta_tsetb
3064 | mov TAB:RB, [BASE+RB*8]
3065 | cmp RC, TAB:RB->asize
3066 | jae ->vmeta_tsetb
3067 | shl RC, 3
3068 | add RC, TAB:RB->array
3069 | cmp dword [RC+4], LJ_TNIL
3070 | je >3 // Previous value is nil?
3071 |1:
3072 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3073 | jnz >7
3074 |2:
3075 | mov RB, [BASE+RA*8+4] // Set array slot.
3076 | mov RA, [BASE+RA*8]
3077 | mov [RC+4], RB
3078 | mov [RC], RA
3079 | ins_next
3080 |
3081 |3: // Check for __newindex if previous value is nil.
3082 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
3083 | jz <1
3084 | mov TAB:RA, TAB:RB->metatable
3085 | test byte TAB:RA->nomm, 1<<MM_newindex
3086 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
3087 | movzx RA, PC_RA // Restore RA.
3088 | jmp <1
3089 |
3090 |7: // Possible table write barrier for the value. Skip valiswhite check.
3091 | barrierback TAB:RB, RA
3092 | movzx RA, PC_RA // Restore RA.
3093 | jmp <2
3094 break;
3095
3096 case BC_TSETM:
3097 | ins_AD // RA = base (table at base-1), RD = num const (start index)
3098 | mov ARG5, KBASE // Need one more free register.
3099 | fld qword [KBASE+RD*8]
3100 | fistp ARG4 // Const is guaranteed to be an int.
3101 |1:
3102 | lea RA, [BASE+RA*8]
3103 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
3104 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
3105 | jnz >7
3106 |2:
3107 | mov RD, NRESULTS
3108 | mov KBASE, ARG4
3109 | sub RD, 1
3110 | jz >4 // Nothing to copy?
3111 | add RD, KBASE // Compute needed size.
3112 | cmp RD, TAB:RB->asize
3113 | jae >5 // Does not fit into array part?
3114 | sub RD, KBASE
3115 | shl KBASE, 3
3116 | add KBASE, TAB:RB->array
3117 |3: // Copy result slots to table.
3118 | mov RB, [RA]
3119 | mov [KBASE], RB
3120 | mov RB, [RA+4]
3121 | add RA, 8
3122 | mov [KBASE+4], RB
3123 | add KBASE, 8
3124 | sub RD, 1
3125 | jnz <3
3126 |4:
3127 | mov KBASE, ARG5
3128 | ins_next
3129 |
3130 |5: // Need to resize array part.
3131 | mov ARG2, TAB:RB
3132 | mov L:RB, SAVE_L
3133 | mov ARG3, RD
3134 | mov ARG1, L:RB
3135 | mov SAVE_PC, PC
3136 | mov L:RB->base, BASE
3137 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
3138 | mov BASE, L:RB->base
3139 | movzx RA, PC_RA // Restore RA.
3140 | jmp <1 // Retry.
3141 |
3142 |7: // Possible table write barrier for any value. Skip valiswhite check.
3143 | barrierback TAB:RB, RD
3144 | jmp <2
3145 break;
3146
3147 /* -- Calls and vararg handling ----------------------------------------- */
3148
3149 case BC_CALL: case BC_CALLM:
3150 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
3151 if (op == BC_CALLM) {
3152 | add NARGS:RC, NRESULTS
3153 }
3154 | lea RA, [BASE+RA*8+8]
3155 | mov LFUNC:RB, [RA-8]
3156 | cmp dword [RA-4], LJ_TFUNC
3157 | jne ->vmeta_call
3158 | jmp aword LFUNC:RB->gate
3159 break;
3160
3161 case BC_CALLMT:
3162 | ins_AD // RA = base, RD = extra_nargs
3163 | add NARGS:RD, NRESULTS
3164 | // Fall through. Assumes BC_CALLMT follows and ins_AD is a no-op.
3165 break;
3166 case BC_CALLT:
3167 | ins_AD // RA = base, RD = nargs+1
3168 | lea RA, [BASE+RA*8+8]
3169 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
3170 | mov LFUNC:RB, [RA-8]
3171 | cmp dword [RA-4], LJ_TFUNC
3172 | jne ->vmeta_call
3173 |->BC_CALLT_Z:
3174 | mov PC, [BASE-4]
3175 | test PC, FRAME_TYPE
3176 | jnz >7
3177 |1:
3178 | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below.
3179 | mov NRESULTS, NARGS:RD
3180 | sub NARGS:RD, 1
3181 | jz >3
3182 |2:
3183 | mov RB, [RA] // Move args down.
3184 | mov [KBASE], RB
3185 | mov RB, [RA+4]
3186 | mov [KBASE+4], RB
3187 | add KBASE, 8
3188 | add RA, 8
3189 | sub NARGS:RD, 1
3190 | jnz <2
3191 |
3192 | mov LFUNC:RB, [BASE-8]
3193 |3:
3194 | mov RA, BASE // BASE is ignored, except when ...
3195 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
3196 | ja >5
3197 |4:
3198 | mov NARGS:RD, NRESULTS
3199 | jmp aword LFUNC:RB->gate
3200 |
3201 |5: // Tailcall to a fast function.
3202 | test PC, FRAME_TYPE // Lua frame below?
3203 | jnz <4
3204 | movzx RD, PC_RA // Need to prepare BASE/KBASE.
3205 | not RD
3206 | lea BASE, [BASE+RD*8]
3207 | mov LFUNC:KBASE, [BASE-8]
3208 | mov PROTO:KBASE, LFUNC:KBASE->pt
3209 | mov KBASE, PROTO:KBASE->k
3210 | jmp <4
3211 |
3212 |7: // Tailcall from a vararg function.
3213 | jnp <1 // Vararg frame below?
3214 | and PC, -8
3215 | sub BASE, PC // Need to relocate BASE/KBASE down.
3216 | mov KBASE, BASE
3217 | mov PC, [BASE-4]
3218 | jmp <1
3219 break;
3220
3221 case BC_ITERC:
3222 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
3223 | lea RA, [BASE+RA*8+8] // fb = base+1
3224 | mov RB, [RA-24] // Copy state. fb[0] = fb[-3].
3225 | mov RC, [RA-20]
3226 | mov [RA], RB
3227 | mov [RA+4], RC
3228 | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2].
3229 | mov RC, [RA-12]
3230 | mov [RA+8], RB
3231 | mov [RA+12], RC
3232 | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4]
3233 | mov RC, [RA-28]
3234 | mov [RA-8], LFUNC:RB
3235 | mov [RA-4], RC
3236 | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
3237 | mov NARGS:RC, 3
3238 | jne ->vmeta_call
3239 | jmp aword LFUNC:RB->gate
3240 break;
3241
3242 case BC_VARG:
3243 | ins_AB_ // RA = base, RB = nresults+1, (RC = 1)
3244 | mov LFUNC:RC, [BASE-8]
3245 | lea RA, [BASE+RA*8]
3246 | mov PROTO:RC, LFUNC:RC->pt
3247 | movzx RC, byte PROTO:RC->numparams
3248 | mov ARG3, KBASE // Need one more free register.
3249 | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
3250 | sub KBASE, [BASE-4]
3251 | // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
3252 | test RB, RB
3253 | jz >5 // Copy all varargs?
3254 | lea RB, [RA+RB*8-8]
3255 | cmp KBASE, BASE // No vararg slots?
3256 | jnb >2
3257 |1: // Copy vararg slots to destination slots.
3258 | mov RC, [KBASE-8]
3259 | mov [RA], RC
3260 | mov RC, [KBASE-4]
3261 | add KBASE, 8
3262 | mov [RA+4], RC
3263 | add RA, 8
3264 | cmp RA, RB // All destination slots filled?
3265 | jnb >3
3266 | cmp KBASE, BASE // No more vararg slots?
3267 | jb <1
3268 |2: // Fill up remainder with nil.
3269 | mov dword [RA+4], LJ_TNIL
3270 | add RA, 8
3271 | cmp RA, RB
3272 | jb <2
3273 |3:
3274 | mov KBASE, ARG3
3275 | ins_next
3276 |
3277 |5: // Copy all varargs.
3278 | mov NRESULTS, 1 // NRESULTS = 0+1
3279 | mov RC, BASE
3280 | sub RC, KBASE
3281 | jbe <3 // No vararg slots?
3282 | mov RB, RC
3283 | shr RB, 3
3284 | mov ARG2, RB // Store this for stack growth below.
3285 | add RB, 1
3286 | mov NRESULTS, RB // NRESULTS = #varargs+1
3287 | mov L:RB, SAVE_L
3288 | add RC, RA
3289 | cmp RC, L:RB->maxstack
3290 | ja >7 // Need to grow stack?
3291 |6: // Copy all vararg slots.
3292 | mov RC, [KBASE-8]
3293 | mov [RA], RC
3294 | mov RC, [KBASE-4]
3295 | add KBASE, 8
3296 | mov [RA+4], RC
3297 | add RA, 8
3298 | cmp KBASE, BASE // No more vararg slots?
3299 | jb <6
3300 | jmp <3
3301 |
3302 |7: // Grow stack for varargs.
3303 | mov L:RB->base, BASE
3304 | mov L:RB->top, RA
3305 | mov SAVE_PC, PC
3306 | sub KBASE, BASE // Need delta, because BASE may change.
3307 | mov ARG1, L:RB
3308 | call extern lj_state_growstack // (lua_State *L, int n)
3309 | mov BASE, L:RB->base
3310 | mov RA, L:RB->top
3311 | add KBASE, BASE
3312 | jmp <6
3313 break;
3314
3315 /* -- Returns ----------------------------------------------------------- */
3316
3317 case BC_RETM:
3318 | ins_AD // RA = results, RD = extra_nresults
3319 | add RD, NRESULTS // NRESULTS >=1, so RD >=1.
3320 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
3321 break;
3322
3323 case BC_RET: case BC_RET0: case BC_RET1:
3324 | ins_AD // RA = results, RD = nresults+1
3325 if (op != BC_RET0) {
3326 | shl RA, 3
3327 }
3328 |1:
3329 | mov PC, [BASE-4]
3330 | mov NRESULTS, RD // Save nresults+1.
3331 | test PC, FRAME_TYPE // Check frame type marker.
3332 | jnz >7 // Not returning to a fixarg Lua func?
3333 switch (op) {
3334 case BC_RET:
3335 |->BC_RET_Z:
3336 | mov KBASE, BASE // Use KBASE for result move.
3337 | sub RD, 1
3338 | jz >3
3339 |2:
3340 | mov RB, [KBASE+RA] // Move results down.
3341 | mov [KBASE-8], RB
3342 | mov RB, [KBASE+RA+4]
3343 | mov [KBASE-4], RB
3344 | add KBASE, 8
3345 | sub RD, 1
3346 | jnz <2
3347 |3:
3348 | mov RD, NRESULTS // Note: NRESULTS may be >255.
3349 | movzx RB, PC_RB // So cannot compare with RDL!
3350 |5:
3351 | cmp RB, RD // More results expected?
3352 | ja >6
3353 break;
3354 case BC_RET1:
3355 | mov RB, [BASE+RA+4]
3356 | mov [BASE-4], RB
3357 | mov RB, [BASE+RA]
3358 | mov [BASE-8], RB
3359 /* fallthrough */
3360 case BC_RET0:
3361 |5:
3362 | cmp PC_RB, RDL // More results expected?
3363 | ja >6
3364 default:
3365 break;
3366 }
3367 | movzx RA, PC_RA
3368 | not RA // Note: ~RA = -(RA+1)
3369 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
3370 | mov LFUNC:KBASE, [BASE-8]
3371 | mov PROTO:KBASE, LFUNC:KBASE->pt
3372 | mov KBASE, PROTO:KBASE->k
3373 | ins_next
3374 |
3375 |6: // Fill up results with nil.
3376 if (op == BC_RET) {
3377 | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base.
3378 | add KBASE, 8
3379 } else {
3380 | mov dword [BASE+RD*8-12], LJ_TNIL
3381 }
3382 | add RD, 1
3383 | jmp <5
3384 |
3385 |7: // Non-standard return case.
3386 | jnp ->vm_return
3387 | // Return from vararg function: relocate BASE down and RA up.
3388 | and PC, -8
3389 | sub BASE, PC
3390 if (op != BC_RET0) {
3391 | add RA, PC
3392 }
3393 | jmp <1
3394 break;
3395
3396 /* -- Loops and branches ------------------------------------------------ */
3397
3398 |.define FOR_IDX, qword [RA]; .define FOR_TIDX, dword [RA+4]
3399 |.define FOR_STOP, qword [RA+8]; .define FOR_TSTOP, dword [RA+12]
3400 |.define FOR_STEP, qword [RA+16]; .define FOR_TSTEP, dword [RA+20]
3401 |.define FOR_EXT, qword [RA+24]; .define FOR_TEXT, dword [RA+28]
3402
3403 case BC_FORL:
3404#if LJ_HASJIT
3405 | hotloop RB
3406#endif
3407 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
3408 break;
3409
3410 case BC_JFORI:
3411 case BC_JFORL:
3412#if !LJ_HASJIT
3413 break;
3414#endif
3415 case BC_FORI:
3416 case BC_IFORL:
3417 vk = (op == BC_IFORL || op == BC_JFORL);
3418 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
3419 | lea RA, [BASE+RA*8]
3420 if (!vk) {
3421 | cmp FOR_TIDX, LJ_TISNUM; ja ->vmeta_for // Type checks
3422 | cmp FOR_TSTOP, LJ_TISNUM; ja ->vmeta_for
3423 }
3424 | mov RB, FOR_TSTEP // Load type/hiword of for step.
3425 if (!vk) {
3426 | cmp RB, LJ_TISNUM; ja ->vmeta_for
3427 }
3428 | fld FOR_STOP
3429 | fld FOR_IDX
3430 if (vk) {
3431 | fadd FOR_STEP // nidx = idx + step
3432 | fst FOR_IDX
3433 }
3434 | fst FOR_EXT
3435 | test RB, RB // Swap lim/(n)idx if step non-negative.
3436 | js >1
3437 | fxch
3438 |1:
3439 | fcomparepp // eax (RD) modified if !cmov.
3440 if (!cmov) {
3441 | movzx RD, PC_RD // Need to reload RD.
3442 }
3443 if (op == BC_FORI) {
3444 | jnb >2
3445 | branchPC RD
3446 } else if (op == BC_JFORI) {
3447 | branchPC RD
3448 | movzx RD, PC_RD
3449 | jnb =>BC_JLOOP
3450 } else if (op == BC_IFORL) {
3451 | jb >2
3452 | branchPC RD
3453 } else {
3454 | jnb =>BC_JLOOP
3455 }
3456 |2:
3457 | ins_next
3458 break;
3459
3460 case BC_ITERL:
3461#if LJ_HASJIT
3462 | hotloop RB
3463#endif
3464 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
3465 break;
3466
3467 case BC_JITERL:
3468#if !LJ_HASJIT
3469 break;
3470#endif
3471 case BC_IITERL:
3472 | ins_AJ // RA = base, RD = target
3473 | lea RA, [BASE+RA*8]
3474 | mov RB, [RA+4]
3475 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
3476 if (op == BC_JITERL) {
3477 | mov [RA-4], RB
3478 | mov RB, [RA]
3479 | mov [RA-8], RB
3480 | jmp =>BC_JLOOP
3481 } else {
3482 | branchPC RD // Otherwise save control var + branch.
3483 | mov RD, [RA]
3484 | mov [RA-4], RB
3485 | mov [RA-8], RD
3486 }
3487 |1:
3488 | ins_next
3489 break;
3490
3491 case BC_LOOP:
3492 | ins_A // RA = base, RD = target (loop extent)
3493 | // Note: RA/RD is only used by trace recorder to determine scope/extent
3494 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
3495#if LJ_HASJIT
3496 | hotloop RB
3497#endif
3498 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
3499 break;
3500
3501 case BC_ILOOP:
3502 | ins_A // RA = base, RD = target (loop extent)
3503 | ins_next
3504 break;
3505
3506 case BC_JLOOP:
3507#if LJ_HASJIT
3508 | ins_AD // RA = base (ignored), RD = traceno
3509 | mov RA, [DISPATCH+DISPATCH_J(trace)]
3510 | mov TRACE:RD, [RA+RD*4]
3511 | mov RD, TRACE:RD->mcode
3512 | mov L:RB, SAVE_L
3513 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
3514 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
3515 | jmp RD
3516#endif
3517 break;
3518
3519 case BC_JMP:
3520 | ins_AJ // RA = unused, RD = target
3521 | branchPC RD
3522 | ins_next
3523 break;
3524
3525 /* ---------------------------------------------------------------------- */
3526
3527 default:
3528 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
3529 exit(2);
3530 break;
3531 }
3532}
3533
3534static int build_backend(BuildCtx *ctx)
3535{
3536 int op;
3537 int cmov = 1;
3538#ifdef LUAJIT_CPU_NOCMOV
3539 cmov = 0;
3540#endif
3541
3542 dasm_growpc(Dst, BC__MAX);
3543
3544 build_subroutines(ctx, cmov);
3545
3546 |.code_op
3547 for (op = 0; op < BC__MAX; op++)
3548 build_ins(ctx, (BCOp)op, op, cmov);
3549
3550 return BC__MAX;
3551}
3552
3553/* Emit pseudo frame-info for all assembler functions. */
3554static void emit_asm_debug(BuildCtx *ctx)
3555{
3556 switch (ctx->mode) {
3557 case BUILD_elfasm:
3558 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
3559 fprintf(ctx->fp,
3560 ".Lframe0:\n"
3561 "\t.long .LECIE0-.LSCIE0\n"
3562 ".LSCIE0:\n"
3563 "\t.long 0xffffffff\n"
3564 "\t.byte 0x1\n"
3565 "\t.string \"\"\n"
3566 "\t.uleb128 0x1\n"
3567 "\t.sleb128 -4\n"
3568 "\t.byte 0x8\n"
3569 "\t.byte 0xc\n\t.uleb128 0x4\n\t.uleb128 0x4\n"
3570 "\t.byte 0x88\n\t.uleb128 0x1\n"
3571 "\t.align 4\n"
3572 ".LECIE0:\n\n");
3573 fprintf(ctx->fp,
3574 ".LSFDE0:\n"
3575 "\t.long .LEFDE0-.LASFDE0\n"
3576 ".LASFDE0:\n"
3577 "\t.long .Lframe0\n"
3578 "\t.long .Lbegin\n"
3579 "\t.long %d\n"
3580 "\t.byte 0xe\n\t.uleb128 0x30\n" /* def_cfa_offset */
3581 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
3582 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
3583 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
3584 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
3585 "\t.align 4\n"
3586 ".LEFDE0:\n\n", (int)ctx->codesz);
3587 break;
3588 default: /* Difficult for other modes. */
3589 break;
3590 }
3591}
3592