summaryrefslogtreecommitdiff
path: root/src/buildvm_x86.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'src/buildvm_x86.dasc')
-rw-r--r--src/buildvm_x86.dasc1181
1 files changed, 593 insertions, 588 deletions
diff --git a/src/buildvm_x86.dasc b/src/buildvm_x86.dasc
index fdbefb83..b970278e 100644
--- a/src/buildvm_x86.dasc
+++ b/src/buildvm_x86.dasc
@@ -40,6 +40,7 @@
40|.endif 40|.endif
41| 41|
42|.define RA, ecx 42|.define RA, ecx
43|.define RAH, ch
43|.define RAL, cl 44|.define RAL, cl
44|.define RB, ebp // Must be ebp (C callee-save). 45|.define RB, ebp // Must be ebp (C callee-save).
45|.define RC, eax // Must be eax (fcomparepp and others). 46|.define RC, eax // Must be eax (fcomparepp and others).
@@ -282,6 +283,27 @@
282| .endmacro 283| .endmacro
283|.endif 284|.endif
284| 285|
286|// Call decode and dispatch.
287|.macro ins_callt
288| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
289| mov PC, LFUNC:RB->pc
290| mov RA, [PC]
291| movzx OP, RAL
292| movzx RA, RAH
293| add PC, 4
294|.if X64
295| jmp aword [DISPATCH+OP*8]
296|.else
297| jmp aword [DISPATCH+OP*4]
298|.endif
299|.endmacro
300|
301|.macro ins_call
302| // BASE = new base, RB = LFUNC, RD = nargs+1
303| mov [BASE-4], PC
304| ins_callt
305|.endmacro
306|
285|//----------------------------------------------------------------------- 307|//-----------------------------------------------------------------------
286| 308|
287|// Macros to test operand types. 309|// Macros to test operand types.
@@ -394,156 +416,26 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
394 |.code_sub 416 |.code_sub
395 | 417 |
396 |//----------------------------------------------------------------------- 418 |//-----------------------------------------------------------------------
397 |//-- Call and return handling ------------------------------------------- 419 |//-- Return handling ----------------------------------------------------
398 |//----------------------------------------------------------------------- 420 |//-----------------------------------------------------------------------
399 | 421 |
400 |// Reminder: A call gate may be called with func/args above L->maxstack, 422 |->vm_returnp:
401 |// i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, 423 | test PC, FRAME_P
402 |// too. This means all call gates (L*, C and fast functions) must check 424 | jz ->cont_dispatch
403 |// for stack overflow _before_ adding more slots!
404 |
405 |//-- Call gates ---------------------------------------------------------
406 |
407 |->gate_lf: // Call gate for fixarg Lua functions.
408 | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return
409 | // DISPATCH initialized
410 | mov BASE, RA
411 | mov [BASE-4], PC // Store caller PC.
412 | mov PC, LFUNC:RB->pc
413 | movzx RA, byte [PC+PC2PROTO(framesize)]
414 | mov KBASE, [PC+PC2PROTO(k)]
415 | mov L:RB, SAVE_L
416 | lea RA, [BASE+RA*8] // Top of frame.
417 | cmp RA, L:RB->maxstack
418 | ja ->gate_lf_growstack
419 | movzx RA, byte [PC+PC2PROTO(numparams)]
420 | cmp NARGS:RC, RA // Check for missing parameters.
421 | jbe >3
422 |2:
423#if LJ_HASJIT
424 | // NYI: Disabled, until the tracer supports recursion/upcalls/leaves.
425 | // hotcall RB
426#endif
427 | ins_next
428 |
429 |3: // Clear missing parameters.
430 | mov dword [BASE+NARGS:RC*8-4], LJ_TNIL
431 | add NARGS:RC, 1
432 | cmp NARGS:RC, RA // Check for missing parameters.
433 | jbe <3
434 | jmp <2
435 |
436 |->gate_lv: // Call gate for vararg Lua functions.
437 | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = old base), PC = return
438 | // DISPATCH initialized
439 | mov [RA-4], PC // Store caller PC.
440 | lea PC, [NARGS:RC*8+FRAME_VARG]
441 | lea BASE, [RA+PC-FRAME_VARG]
442 | mov [BASE-8], LFUNC:RB // Store copy of LFUNC.
443 | mov [BASE-4], PC // Store delta + FRAME_VARG.
444 | mov PC, LFUNC:RB->pc
445 | movzx RB, byte [PC+PC2PROTO(framesize)]
446 | lea KBASE, [BASE+RB*8]
447 | mov L:RB, SAVE_L
448 | cmp KBASE, L:RB->maxstack
449 | ja ->gate_lv_growstack // Need to grow stack.
450 | mov RC, BASE
451 | movzx RB, byte [PC+PC2PROTO(numparams)]
452 | test RB, RB
453 | jz >2
454 |1: // Copy fixarg slots up to new frame.
455 | add RA, 8
456 | cmp RA, BASE
457 | jnb >3 // Less args than parameters?
458 | mov KBASE, [RA-8]
459 | mov [RC], KBASE
460 | mov KBASE, [RA-4]
461 | mov [RC+4], KBASE
462 | add RC, 8
463 | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
464 | sub RB, 1
465 | jnz <1
466 |2:
467 | mov KBASE, [PC+PC2PROTO(k)]
468#if LJ_HASJIT
469 | // NYI: Disabled, until the tracer supports recursion/upcalls/leaves.
470 | // hotcall RB
471#endif
472 | ins_next
473 |
474 |3: // Clear missing parameters.
475 | mov dword [RC+4], LJ_TNIL
476 | add RC, 8
477 | sub RB, 1
478 | jnz <3
479 | jmp <2
480 | 425 |
481 |->gate_cwrap: // Call gate for wrapped C functions. 426 | // Return from pcall or xpcall fast func.
482 | // RA = new base, RB = CFUNC, RC = nargs+1, (BASE = old base), PC = return 427 | and PC, -8
483 | mov [RA-4], PC 428 | sub BASE, PC // Restore caller base.
484 | mov KBASEa, CFUNC:RB->f 429 | lea RAa, [RA+PC-8] // Rebase RA and prepend one result.
485 | mov L:RB, SAVE_L 430 | mov PC, [BASE-4] // Fetch PC of previous frame.
486 | lea RC, [RA+NARGS:RC*8-8] 431 | // Prepending may overwrite the pcall frame, so do it at the end.
487 | mov L:RB->base, RA 432 | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
488 | lea RA, [RC+8*LUA_MINSTACK]
489 | mov L:RB->top, RC
490 | cmp RA, L:RB->maxstack
491 |.if X64
492 | mov CARG2, KBASEa
493 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
494 |.else
495 | mov ARG2, KBASEa
496 | mov ARG1, L:RB
497 |.endif
498 | ja ->gate_c_growstack // Need to grow stack.
499 | set_vmstate C
500 | // (lua_State *L, lua_CFunction f)
501 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
502 | set_vmstate INTERP
503 | // nresults returned in eax (RD).
504 | mov BASE, L:RB->base
505 | lea RA, [BASE+RD*8]
506 | neg RA
507 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
508 |->vm_returnc:
509 | add RD, 1 // RD = nresults+1
510 | mov MULTRES, RD
511 | test PC, FRAME_TYPE
512 | jz ->BC_RET_Z // Handle regular return to Lua.
513 | jmp ->vm_return
514 | 433 |
515 |->gate_c: // Call gate for C functions.
516 | // RA = new base, RB = CFUNC, RC = nargs+1, (BASE = old base), PC = return
517 | mov [RA-4], PC
518 | mov KBASEa, CFUNC:RB->f
519 | mov L:RB, SAVE_L
520 | lea RC, [RA+NARGS:RC*8-8]
521 | mov L:RB->base, RA
522 | lea RA, [RC+8*LUA_MINSTACK]
523 | mov L:RB->top, RC
524 | cmp RA, L:RB->maxstack
525 |.if X64
526 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
527 |.else
528 | mov ARG1, L:RB
529 |.endif
530 | ja ->gate_c_growstack // Need to grow stack.
531 | set_vmstate C
532 | call KBASEa // (lua_State *L)
533 | set_vmstate INTERP
534 | // nresults returned in eax (RD).
535 | mov BASE, L:RB->base
536 | lea RA, [BASE+RD*8]
537 | neg RA
538 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
539 |->vm_returnc: 434 |->vm_returnc:
540 | add RD, 1 // RD = nresults+1 435 | add RD, 1 // RD = nresults+1
541 | mov MULTRES, RD 436 | mov MULTRES, RD
542 | test PC, FRAME_TYPE 437 | test PC, FRAME_TYPE
543 | jz ->BC_RET_Z // Handle regular return to Lua. 438 | jz ->BC_RET_Z // Handle regular return to Lua.
544 | // Fallthrough.
545 |
546 |//-- Return handling (non-inline) ---------------------------------------
547 | 439 |
548 |->vm_return: 440 |->vm_return:
549 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return 441 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
@@ -654,51 +546,41 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
654 | set_vmstate INTERP 546 | set_vmstate INTERP
655 | jmp ->vm_returnc // Increments RD/MULTRES and returns. 547 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
656 | 548 |
657 |->vm_returnp: 549 |//-----------------------------------------------------------------------
658 | test PC, FRAME_P 550 |//-- Grow stack for calls -----------------------------------------------
659 | jz ->cont_dispatch 551 |//-----------------------------------------------------------------------
660 |
661 | // Return from pcall or xpcall fast func.
662 | and PC, -8
663 | sub BASE, PC // Restore caller base.
664 | lea RAa, [RA+PC-8] // Rebase RA and prepend one result.
665 | mov PC, [BASE-4] // Fetch PC of previous frame.
666 | // Prepending may overwrite the pcall frame, so do it at the end.
667 | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
668 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
669 |
670 |//-- Grow stack on-demand -----------------------------------------------
671 | 552 |
672 |->gate_c_growstack: // Grow stack for C function. 553 |->vm_growstack_c: // Grow stack for C function.
673 | mov FCARG2, LUA_MINSTACK 554 | mov FCARG2, LUA_MINSTACK
674 | jmp >1 555 | jmp >2
675 | 556 |
676 |->gate_lv_growstack: // Grow stack for vararg Lua function. 557 |->vm_growstack_v: // Grow stack for vararg Lua function.
677 | mov BASE, RA // Drop vararg frame again. 558 | sub RD, 8
559 | jmp >1
678 | 560 |
679 |->gate_lf_growstack: // Grow stack for fixarg Lua function. 561 |->vm_growstack_f: // Grow stack for fixarg Lua function.
680 | // BASE = new base, RC = nargs+1, RB = L, PC = first PC 562 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
681 | lea RC, [BASE+NARGS:RC*8-8] 563 | lea RD, [BASE+NARGS:RD*8-8]
682 | movzx RA, byte [PC+PC2PROTO(framesize)] 564 |1:
565 | movzx RA, byte [PC-4+PC2PROTO(framesize)]
683 | add PC, 4 // Must point after first instruction. 566 | add PC, 4 // Must point after first instruction.
684 | mov L:RB->base, BASE 567 | mov L:RB->base, BASE
685 | mov L:RB->top, RC 568 | mov L:RB->top, RD
686 | mov SAVE_PC, PC 569 | mov SAVE_PC, PC
687 | mov FCARG2, RA 570 | mov FCARG2, RA
688 |1: 571 |2:
572 | // RB = L, L->base = new base, L->top = top
689 | mov FCARG1, L:RB 573 | mov FCARG1, L:RB
690 | // L:RB = L, L->base = new base, L->top = top
691 | // SAVE_PC = initial PC+1 (undefined for C functions)
692 | call extern lj_state_growstack@8 // (lua_State *L, int n) 574 | call extern lj_state_growstack@8 // (lua_State *L, int n)
693 | mov RA, L:RB->base 575 | mov BASE, L:RB->base
694 | mov RC, L:RB->top 576 | mov RD, L:RB->top
695 | mov LFUNC:RB, [RA-8] 577 | mov LFUNC:RB, [BASE-8]
696 | mov PC, [RA-4] 578 | mov PC, [BASE-4]
697 | sub RC, RA 579 | sub RD, BASE
698 | shr RC, 3 580 | shr RD, 3
699 | add NARGS:RC, 1 581 | add NARGS:RD, 1
700 | // RA = new base, RB = LFUNC, RC = nargs+1, (BASE = invalid), PC restored. 582 | // BASE = new base, RB = LFUNC, RD = nargs+1, PC restored.
701 | jmp aword LFUNC:RB->gate // Just retry call. 583 | ins_callt // Just retry the call.
702 | 584 |
703 |//----------------------------------------------------------------------- 585 |//-----------------------------------------------------------------------
704 |//-- Entry points into the assembler VM --------------------------------- 586 |//-- Entry points into the assembler VM ---------------------------------
@@ -789,16 +671,20 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
789 | add PC, RA 671 | add PC, RA
790 | sub PC, BASE // PC = frame delta + frame type 672 | sub PC, BASE // PC = frame delta + frame type
791 | 673 |
792 | mov RC, L:RB->top 674 | mov RD, L:RB->top
793 | sub RC, RA 675 | sub RD, RA
794 | shr NARGS:RC, 3 676 | shr NARGS:RD, 3
795 | add NARGS:RC, 1 // RC = nargs+1 677 | add NARGS:RD, 1 // RD = nargs+1
796 | 678 |
679 |->vm_call_dispatch:
797 | mov LFUNC:RB, [RA-8] 680 | mov LFUNC:RB, [RA-8]
798 | cmp dword [RA-4], LJ_TFUNC 681 | cmp dword [RA-4], LJ_TFUNC
799 | jne ->vmeta_call // Ensure KBASE defined and != BASE. 682 | jne ->vmeta_call // Ensure KBASE defined and != BASE.
800 | jmp aword LFUNC:RB->gate 683 |
801 | // RA = new base, RB = LFUNC/CFUNC, RC = nargs+1. 684 |->vm_call_dispatch_f:
685 | mov BASE, RA
686 | ins_call
687 | // BASE = new base, RD = nargs+1
802 | 688 |
803 |->vm_cpcall: // Setup protected C frame, call C. 689 |->vm_cpcall: // Setup protected C frame, call C.
804 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) 690 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
@@ -979,8 +865,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
979 | lea PC, [RA+FRAME_CONT] 865 | lea PC, [RA+FRAME_CONT]
980 | sub PC, BASE 866 | sub PC, BASE
981 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. 867 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
982 | mov NARGS:RC, 3 // 2+1 args for func(t, k). 868 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
983 | jmp aword LFUNC:RB->gate 869 | jmp ->vm_call_dispatch_f
984 | 870 |
985 |//----------------------------------------------------------------------- 871 |//-----------------------------------------------------------------------
986 | 872 |
@@ -1058,8 +944,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1058 | lea PC, [RA+FRAME_CONT] 944 | lea PC, [RA+FRAME_CONT]
1059 | sub PC, BASE 945 | sub PC, BASE
1060 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. 946 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
1061 | mov NARGS:RC, 4 // 3+1 args for func(t, k, v). 947 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1062 | jmp aword LFUNC:RB->gate 948 | jmp ->vm_call_dispatch_f
1063 | 949 |
1064 |//-- Comparison metamethods --------------------------------------------- 950 |//-- Comparison metamethods ---------------------------------------------
1065 | 951 |
@@ -1206,11 +1092,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1206 | sub RC, BASE 1092 | sub RC, BASE
1207 | mov [RA-12], PC // [cont|PC] 1093 | mov [RA-12], PC // [cont|PC]
1208 | lea PC, [RC+FRAME_CONT] 1094 | lea PC, [RC+FRAME_CONT]
1209 | mov LFUNC:RB, [RA-8] 1095 | mov NARGS:RD, 2+1 // 2 args for func(o1, o2).
1210 | mov NARGS:RC, 3 // 2+1 args for func(o1, o2). 1096 | jmp ->vm_call_dispatch
1211 | cmp dword [RA-4], LJ_TFUNC
1212 | jne ->vmeta_call
1213 | jmp aword LFUNC:RB->gate
1214 | 1097 |
1215 |->vmeta_len: 1098 |->vmeta_len:
1216 | mov L:RB, SAVE_L 1099 | mov L:RB, SAVE_L
@@ -1225,19 +1108,21 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1225 | 1108 |
1226 |//-- Call metamethod ---------------------------------------------------- 1109 |//-- Call metamethod ----------------------------------------------------
1227 | 1110 |
1111 |->vmeta_call_ra:
1112 | lea RA, [BASE+RA*8+8]
1228 |->vmeta_call: // Resolve and call __call metamethod. 1113 |->vmeta_call: // Resolve and call __call metamethod.
1229 | // RA = new base, RC = nargs+1, BASE = old base, PC = return 1114 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1230 | mov TMP2, RA // Save RA, RC for us. 1115 | mov TMP2, RA // Save RA, RC for us.
1231 | mov TMP1, NARGS:RC 1116 | mov TMP1, NARGS:RD
1232 | sub RA, 8 1117 | sub RA, 8
1233 |.if X64 1118 |.if X64
1234 | mov L:RB, SAVE_L 1119 | mov L:RB, SAVE_L
1235 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. 1120 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1236 | mov CARG2d, RA 1121 | mov CARG2d, RA
1237 | lea CARG3d, [RA+NARGS:RC*8] 1122 | lea CARG3d, [RA+NARGS:RD*8]
1238 | mov CARG1d, L:RB // Caveat: CARG1d may be RA. 1123 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
1239 |.else 1124 |.else
1240 | lea RC, [RA+NARGS:RC*8] 1125 | lea RC, [RA+NARGS:RD*8]
1241 | mov L:RB, SAVE_L 1126 | mov L:RB, SAVE_L
1242 | mov ARG2, RA 1127 | mov ARG2, RA
1243 | mov ARG3, RC 1128 | mov ARG3, RC
@@ -1248,13 +1133,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1248 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) 1133 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1249 | mov BASE, L:RB->base 1134 | mov BASE, L:RB->base
1250 | mov RA, TMP2 1135 | mov RA, TMP2
1251 | mov NARGS:RC, TMP1 1136 | mov NARGS:RD, TMP1
1252 | mov LFUNC:RB, [RA-8] 1137 | mov LFUNC:RB, [RA-8]
1253 | add NARGS:RC, 1 1138 | add NARGS:RD, 1
1254 | // This is fragile. L->base must not move, KBASE must always be defined. 1139 | // This is fragile. L->base must not move, KBASE must always be defined.
1255 | cmp KBASE, BASE // Continue with CALLT if flag set. 1140 | cmp KBASE, BASE // Continue with CALLT if flag set.
1256 | je ->BC_CALLT_Z 1141 | je ->BC_CALLT_Z
1257 | jmp aword LFUNC:RB->gate // Otherwise call resolved metamethod. 1142 | mov BASE, RA
1143 | ins_call // Otherwise call resolved metamethod.
1258 | 1144 |
1259 |//-- Argument coercion for 'for' statement ------------------------------ 1145 |//-- Argument coercion for 'for' statement ------------------------------
1260 | 1146 |
@@ -1271,9 +1157,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1271 | movzx OP, RCL 1157 | movzx OP, RCL
1272 | shr RC, 16 1158 | shr RC, 16
1273 |.if X64 1159 |.if X64
1274 | jmp aword [DISPATCH+OP*8+BC__MAX*8] // Retry FORI or JFORI. 1160 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1275 |.else 1161 |.else
1276 | jmp aword [DISPATCH+OP*4+BC__MAX*4] // Retry FORI or JFORI. 1162 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI.
1277 |.endif 1163 |.endif
1278 | 1164 |
1279 |//----------------------------------------------------------------------- 1165 |//-----------------------------------------------------------------------
@@ -1286,31 +1172,31 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1286 | 1172 |
1287 |.macro .ffunc_1, name 1173 |.macro .ffunc_1, name
1288 |->ff_ .. name: 1174 |->ff_ .. name:
1289 | cmp NARGS:RC, 1+1; jb ->fff_fallback 1175 | cmp NARGS:RD, 1+1; jb ->fff_fallback
1290 |.endmacro 1176 |.endmacro
1291 | 1177 |
1292 |.macro .ffunc_2, name 1178 |.macro .ffunc_2, name
1293 |->ff_ .. name: 1179 |->ff_ .. name:
1294 | cmp NARGS:RC, 2+1; jb ->fff_fallback 1180 | cmp NARGS:RD, 2+1; jb ->fff_fallback
1295 |.endmacro 1181 |.endmacro
1296 | 1182 |
1297 |.macro .ffunc_n, name 1183 |.macro .ffunc_n, name
1298 | .ffunc_1 name 1184 | .ffunc_1 name
1299 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback 1185 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1300 | fld qword [RA] 1186 | fld qword [BASE]
1301 |.endmacro 1187 |.endmacro
1302 | 1188 |
1303 |.macro .ffunc_n, name, op 1189 |.macro .ffunc_n, name, op
1304 | .ffunc_1 name 1190 | .ffunc_1 name
1305 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback 1191 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1306 | op 1192 | op
1307 | fld qword [RA] 1193 | fld qword [BASE]
1308 |.endmacro 1194 |.endmacro
1309 | 1195 |
1310 |.macro .ffunc_nsse, name, op 1196 |.macro .ffunc_nsse, name, op
1311 | .ffunc_1 name 1197 | .ffunc_1 name
1312 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback 1198 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1313 | op xmm0, qword [RA] 1199 | op xmm0, qword [BASE]
1314 |.endmacro 1200 |.endmacro
1315 | 1201 |
1316 |.macro .ffunc_nsse, name 1202 |.macro .ffunc_nsse, name
@@ -1319,26 +1205,26 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1319 | 1205 |
1320 |.macro .ffunc_nn, name 1206 |.macro .ffunc_nn, name
1321 | .ffunc_2 name 1207 | .ffunc_2 name
1322 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback 1208 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1323 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback 1209 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback
1324 | fld qword [RA] 1210 | fld qword [BASE]
1325 | fld qword [RA+8] 1211 | fld qword [BASE+8]
1326 |.endmacro 1212 |.endmacro
1327 | 1213 |
1328 |.macro .ffunc_nnsse, name 1214 |.macro .ffunc_nnsse, name
1329 | .ffunc_1 name 1215 | .ffunc_1 name
1330 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback 1216 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1331 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback 1217 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback
1332 | movsd xmm0, qword [RA] 1218 | movsd xmm0, qword [BASE]
1333 | movsd xmm1, qword [RA+8] 1219 | movsd xmm1, qword [BASE+8]
1334 |.endmacro 1220 |.endmacro
1335 | 1221 |
1336 |.macro .ffunc_nnr, name 1222 |.macro .ffunc_nnr, name
1337 | .ffunc_2 name 1223 | .ffunc_2 name
1338 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback 1224 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1339 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback 1225 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback
1340 | fld qword [RA+8] 1226 | fld qword [BASE+8]
1341 | fld qword [RA] 1227 | fld qword [BASE]
1342 |.endmacro 1228 |.endmacro
1343 | 1229 |
1344 |// Inlined GC threshold check. Caveat: uses label 1. 1230 |// Inlined GC threshold check. Caveat: uses label 1.
@@ -1353,15 +1239,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1353 |//-- Base library: checks ----------------------------------------------- 1239 |//-- Base library: checks -----------------------------------------------
1354 | 1240 |
1355 |.ffunc_1 assert 1241 |.ffunc_1 assert
1356 | mov RB, [RA+4] 1242 | mov RB, [BASE+4]
1357 | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback 1243 | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
1244 | mov PC, [BASE-4]
1358 | mov MULTRES, RD 1245 | mov MULTRES, RD
1359 | mov [RA-4], RB 1246 | mov [BASE-4], RB
1360 | mov RB, [RA] 1247 | mov RB, [BASE]
1361 | mov [RA-8], RB 1248 | mov [BASE-8], RB
1362 | sub RD, 2 1249 | sub RD, 2
1363 | jz >2 1250 | jz >2
1364 | mov TMP1, RA 1251 | mov RA, BASE
1365 |1: 1252 |1:
1366 | add RA, 8 1253 | add RA, 8
1367 | mov RB, [RA+4] 1254 | mov RB, [RA+4]
@@ -1370,13 +1257,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1370 | mov [RA-8], RB 1257 | mov [RA-8], RB
1371 | sub RD, 1 1258 | sub RD, 1
1372 | jnz <1 1259 | jnz <1
1373 | mov RA, TMP1
1374 |2: 1260 |2:
1375 | mov RD, MULTRES 1261 | mov RD, MULTRES
1376 | jmp ->fff_res_ 1262 | jmp ->fff_res_
1377 | 1263 |
1378 |.ffunc_1 type 1264 |.ffunc_1 type
1379 | mov RB, [RA+4] 1265 | mov RB, [BASE+4]
1380 | mov RC, ~LJ_TNUMX 1266 | mov RC, ~LJ_TNUMX
1381 | not RB 1267 | not RB
1382 | cmp RC, RB 1268 | cmp RC, RB
@@ -1385,29 +1271,29 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1385 ||} else { 1271 ||} else {
1386 | jbe >1; mov RC, RB; 1: 1272 | jbe >1; mov RC, RB; 1:
1387 ||} 1273 ||}
1388 | mov CFUNC:RB, [RA-8] 1274 | mov CFUNC:RB, [BASE-8]
1389 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] 1275 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1390 | mov dword [RA-4], LJ_TSTR 1276 | mov PC, [BASE-4]
1391 | mov [RA-8], STR:RC 1277 | mov dword [BASE-4], LJ_TSTR
1278 | mov [BASE-8], STR:RC
1392 | jmp ->fff_res1 1279 | jmp ->fff_res1
1393 | 1280 |
1394 |//-- Base library: getters and setters --------------------------------- 1281 |//-- Base library: getters and setters ---------------------------------
1395 | 1282 |
1396 |.ffunc_1 getmetatable 1283 |.ffunc_1 getmetatable
1397 | mov RB, [RA+4] 1284 | mov RB, [BASE+4]
1285 | mov PC, [BASE-4]
1398 | cmp RB, LJ_TTAB; jne >6 1286 | cmp RB, LJ_TTAB; jne >6
1399 |1: // Field metatable must be at same offset for GCtab and GCudata! 1287 |1: // Field metatable must be at same offset for GCtab and GCudata!
1400 | mov TAB:RB, [RA] 1288 | mov TAB:RB, [BASE]
1401 | mov TAB:RB, TAB:RB->metatable 1289 | mov TAB:RB, TAB:RB->metatable
1402 |2: 1290 |2:
1403 | test TAB:RB, TAB:RB 1291 | test TAB:RB, TAB:RB
1404 | mov dword [RA-4], LJ_TNIL 1292 | mov dword [BASE-4], LJ_TNIL
1405 | jz ->fff_res1 1293 | jz ->fff_res1
1406 | mov CFUNC:RC, [RA-8]
1407 | mov STR:RC, [DISPATCH+DISPATCH_GL(mmname)+4*MM_metatable] 1294 | mov STR:RC, [DISPATCH+DISPATCH_GL(mmname)+4*MM_metatable]
1408 | mov dword [RA-4], LJ_TTAB // Store metatable as default result. 1295 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
1409 | mov [RA-8], TAB:RB 1296 | mov [BASE-8], TAB:RB
1410 | mov TMP1, RA // Save result pointer.
1411 | mov RA, TAB:RB->hmask 1297 | mov RA, TAB:RB->hmask
1412 | and RA, STR:RC->hash 1298 | and RA, STR:RC->hash
1413 | imul RA, #NODE 1299 | imul RA, #NODE
@@ -1424,11 +1310,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1424 | jmp ->fff_res1 // Not found, keep default result. 1310 | jmp ->fff_res1 // Not found, keep default result.
1425 |5: 1311 |5:
1426 | mov RB, [RA+4] 1312 | mov RB, [RA+4]
1427 | cmp RB, LJ_TNIL; je ->fff_res1 // Dito for nil value. 1313 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1428 | mov RC, [RA] 1314 | mov RC, [RA]
1429 | mov RA, TMP1 // Restore result pointer. 1315 | mov [BASE-4], RB // Return value of mt.__metatable.
1430 | mov [RA-4], RB // Return value of mt.__metatable. 1316 | mov [BASE-8], RC
1431 | mov [RA-8], RC
1432 | jmp ->fff_res1 1317 | jmp ->fff_res1
1433 | 1318 |
1434 |6: 1319 |6:
@@ -1441,15 +1326,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1441 | jmp <2 1326 | jmp <2
1442 | 1327 |
1443 |.ffunc_2 setmetatable 1328 |.ffunc_2 setmetatable
1444 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback 1329 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1445 | // Fast path: no mt for table yet and not clearing the mt. 1330 | // Fast path: no mt for table yet and not clearing the mt.
1446 | mov TAB:RB, [RA] 1331 | mov TAB:RB, [BASE]
1447 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback 1332 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
1448 | cmp dword [RA+12], LJ_TTAB; jne ->fff_fallback 1333 | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback
1449 | mov TAB:RC, [RA+8] 1334 | mov TAB:RC, [BASE+8]
1450 | mov TAB:RB->metatable, TAB:RC 1335 | mov TAB:RB->metatable, TAB:RC
1451 | mov dword [RA-4], LJ_TTAB // Return original table. 1336 | mov PC, [BASE-4]
1452 | mov [RA-8], TAB:RB 1337 | mov dword [BASE-4], LJ_TTAB // Return original table.
1338 | mov [BASE-8], TAB:RB
1453 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) 1339 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1454 | jz >1 1340 | jz >1
1455 | // Possible write barrier. Table is black, but skip iswhite(mt) check. 1341 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
@@ -1458,70 +1344,73 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1458 | jmp ->fff_res1 1344 | jmp ->fff_res1
1459 | 1345 |
1460 |.ffunc_2 rawget 1346 |.ffunc_2 rawget
1461 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback 1347 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1462 |.if X64 1348 |.if X64WIN
1463 | mov TMP1, BASE // Save BASE and RA. 1349 | mov RB, BASE // Save BASE.
1464 | mov RB, RA 1350 | lea CARG3d, [BASE+8]
1465 | mov CARG2d, [RA] 1351 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
1466 | lea CARG3d, [RA+8] 1352 | mov CARG1d, SAVE_L
1467 | mov CARG1d, SAVE_L // Caveat: CARG1d may be RA. 1353 |.elif X64
1354 | mov RB, BASE // Save BASE.
1355 | mov CARG2d, [BASE]
1356 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
1357 | mov CARG1d, SAVE_L
1468 |.else 1358 |.else
1469 | mov TAB:RC, [RA] 1359 | mov TAB:RD, [BASE]
1470 | mov L:RB, SAVE_L 1360 | mov L:RB, SAVE_L
1471 | mov ARG2, TAB:RC 1361 | mov ARG2, TAB:RD
1472 | mov ARG1, L:RB 1362 | mov ARG1, L:RB
1473 | mov RB, RA 1363 | mov RB, BASE // Save BASE.
1474 | mov TMP1, BASE // Save BASE and RA. 1364 | add BASE, 8
1475 | add RA, 8 1365 | mov ARG3, BASE
1476 | mov ARG3, RA
1477 |.endif 1366 |.endif
1478 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) 1367 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1479 | // cTValue * returned in eax (RC). 1368 | // cTValue * returned in eax (RD).
1480 | mov RA, RB 1369 | mov BASE, RB // Restore BASE.
1481 | mov BASE, TMP1 1370 | mov RB, [RD] // Copy table slot.
1482 | mov RB, [RC] // Copy table slot. 1371 | mov RD, [RD+4]
1483 | mov RC, [RC+4] 1372 | mov PC, [BASE-4]
1484 | mov [RA-8], RB 1373 | mov [BASE-8], RB
1485 | mov [RA-4], RC 1374 | mov [BASE-4], RD
1486 | jmp ->fff_res1 1375 | jmp ->fff_res1
1487 | 1376 |
1488 |//-- Base library: conversions ------------------------------------------ 1377 |//-- Base library: conversions ------------------------------------------
1489 | 1378 |
1490 |.ffunc tonumber 1379 |.ffunc tonumber
1491 | // Only handles the number case inline (without a base argument). 1380 | // Only handles the number case inline (without a base argument).
1492 | cmp NARGS:RC, 1+1; jne ->fff_fallback // Exactly one argument. 1381 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
1493 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback 1382 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1494 if (sse) { 1383 if (sse) {
1495 | movsd xmm0, qword [RA]; jmp ->fff_resxmm0 1384 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1496 } else { 1385 } else {
1497 | fld qword [RA]; jmp ->fff_resn 1386 | fld qword [BASE]; jmp ->fff_resn
1498 } 1387 }
1499 | 1388 |
1500 |.ffunc_1 tostring 1389 |.ffunc_1 tostring
1501 | // Only handles the string or number case inline. 1390 | // Only handles the string or number case inline.
1502 | cmp dword [RA+4], LJ_TSTR; jne >3 1391 | mov PC, [BASE-4]
1392 | cmp dword [BASE+4], LJ_TSTR; jne >3
1503 | // A __tostring method in the string base metatable is ignored. 1393 | // A __tostring method in the string base metatable is ignored.
1504 | mov STR:RC, [RA] 1394 | mov STR:RD, [BASE]
1505 |2: 1395 |2:
1506 | mov dword [RA-4], LJ_TSTR 1396 | mov dword [BASE-4], LJ_TSTR
1507 | mov [RA-8], STR:RC 1397 | mov [BASE-8], STR:RD
1508 | jmp ->fff_res1 1398 | jmp ->fff_res1
1509 |3: // Handle numbers inline, unless a number base metatable is present. 1399 |3: // Handle numbers inline, unless a number base metatable is present.
1510 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback 1400 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1511 | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 1401 | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1512 | jne ->fff_fallback 1402 | jne ->fff_fallback
1513 | ffgccheck // Caveat: uses label 1. 1403 | ffgccheck // Caveat: uses label 1.
1514 | mov L:RB, SAVE_L 1404 | mov L:RB, SAVE_L
1515 | mov L:RB->base, RA // Add frame since C call can throw. 1405 | mov L:RB->base, BASE // Add frame since C call can throw.
1516 | mov [RA-4], PC
1517 | mov SAVE_PC, PC // Redundant (but a defined value). 1406 | mov SAVE_PC, PC // Redundant (but a defined value).
1518 | mov TMP1, BASE // Save BASE. 1407 |.if X64 and not X64WIN
1519 | mov FCARG2, RA // Caveat: FCARG2 == BASE 1408 | mov FCARG2, BASE // Otherwise: FCARG2 == BASE
1520 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA 1409 |.endif
1410 | mov L:FCARG1, L:RB
1521 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np) 1411 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np)
1522 | // GCstr returned in eax (RC). 1412 | // GCstr returned in eax (RD).
1523 | mov RA, L:RB->base 1413 | mov BASE, L:RB->base
1524 | mov BASE, TMP1
1525 | jmp <2 1414 | jmp <2
1526 | 1415 |
1527 |//-- Base library: iterators ------------------------------------------- 1416 |//-- Base library: iterators -------------------------------------------
@@ -1529,120 +1418,117 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1529 |.ffunc_1 next 1418 |.ffunc_1 next
1530 | je >2 // Missing 2nd arg? 1419 | je >2 // Missing 2nd arg?
1531 |1: 1420 |1:
1532 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback 1421 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1533 |.if X64
1534 | mov TMP1, BASE // Save BASE.
1535 | mov CARG2d, [RA]
1536 | mov L:RB, SAVE_L 1422 | mov L:RB, SAVE_L
1537 | mov L:RB->base, RA // Add frame since C call can throw. 1423 | mov L:RB->base, BASE // Add frame since C call can throw.
1538 | mov [RA-4], PC 1424 | mov PC, [BASE-4]
1539 | lea CARG3d, [RA+8] 1425 |.if X64WIN
1540 | mov CARG1d, L:RB // Caveat: CARG1d may be RA. 1426 | lea CARG3d, [BASE+8]
1427 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
1428 | mov CARG1d, L:RB
1429 |.elif X64
1430 | mov CARG2d, [BASE]
1431 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
1432 | mov CARG1d, L:RB
1541 |.else 1433 |.else
1542 | mov TAB:RB, [RA] 1434 | mov TAB:RD, [BASE]
1543 | mov ARG2, TAB:RB 1435 | mov ARG2, TAB:RD
1544 | mov L:RB, SAVE_L
1545 | mov ARG1, L:RB 1436 | mov ARG1, L:RB
1546 | mov L:RB->base, RA // Add frame since C call can throw. 1437 | add BASE, 8
1547 | mov [RA-4], PC 1438 | mov ARG3, BASE
1548 | mov TMP1, BASE // Save BASE.
1549 | add RA, 8
1550 | mov ARG3, RA
1551 |.endif 1439 |.endif
1552 | mov SAVE_PC, PC // Redundant (but a defined value). 1440 | mov SAVE_PC, PC // Redundant (but a defined value).
1553 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key) 1441 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1554 | // Flag returned in eax (RC). 1442 | // Flag returned in eax (RD).
1555 | mov RA, L:RB->base 1443 | mov BASE, L:RB->base
1556 | mov BASE, TMP1 1444 | test RD, RD; jz >3 // End of traversal?
1557 | test RC, RC; jz >3 // End of traversal? 1445 | mov RB, [BASE+8] // Copy key and value to results.
1558 | mov RB, [RA+8] // Copy key and value to results. 1446 | mov RD, [BASE+12]
1559 | mov RC, [RA+12] 1447 | mov [BASE-8], RB
1560 | mov [RA-8], RB 1448 | mov [BASE-4], RD
1561 | mov [RA-4], RC 1449 | mov RB, [BASE+16]
1562 | mov RB, [RA+16] 1450 | mov RD, [BASE+20]
1563 | mov RC, [RA+20] 1451 | mov [BASE], RB
1564 | mov [RA], RB 1452 | mov [BASE+4], RD
1565 | mov [RA+4], RC
1566 |->fff_res2: 1453 |->fff_res2:
1567 | mov RD, 1+2 1454 | mov RD, 1+2
1568 | jmp ->fff_res 1455 | jmp ->fff_res
1569 |2: // Set missing 2nd arg to nil. 1456 |2: // Set missing 2nd arg to nil.
1570 | mov dword [RA+12], LJ_TNIL 1457 | mov dword [BASE+12], LJ_TNIL
1571 | jmp <1 1458 | jmp <1
1572 |3: // End of traversal: return nil. 1459 |3: // End of traversal: return nil.
1573 | mov dword [RA-4], LJ_TNIL 1460 | mov dword [BASE-4], LJ_TNIL
1574 | jmp ->fff_res1 1461 | jmp ->fff_res1
1575 | 1462 |
1576 |.ffunc_1 pairs 1463 |.ffunc_1 pairs
1577 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback 1464 | mov CFUNC:RB, [BASE-8]
1578 | mov CFUNC:RC, CFUNC:RB->upvalue[0] 1465 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1579 | mov dword [RA-4], LJ_TFUNC 1466 | mov CFUNC:RD, CFUNC:RB->upvalue[0]
1580 | mov [RA-8], CFUNC:RC 1467 | mov PC, [BASE-4]
1581 | mov dword [RA+12], LJ_TNIL 1468 | mov dword [BASE-4], LJ_TFUNC
1469 | mov [BASE-8], CFUNC:RD
1470 | mov dword [BASE+12], LJ_TNIL
1582 | mov RD, 1+3 1471 | mov RD, 1+3
1583 | jmp ->fff_res 1472 | jmp ->fff_res
1584 | 1473 |
1585 |.ffunc_1 ipairs_aux 1474 |.ffunc_1 ipairs_aux
1586 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback 1475 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1587 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback 1476 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback
1477 | mov PC, [BASE-4]
1588 if (sse) { 1478 if (sse) {
1589 | movsd xmm0, qword [RA+8] 1479 | movsd xmm0, qword [BASE+8]
1590 | sseconst_1 xmm1, RBa 1480 | sseconst_1 xmm1, RBa
1591 | addsd xmm0, xmm1 1481 | addsd xmm0, xmm1
1592 | cvtsd2si RC, xmm0 1482 | cvtsd2si RD, xmm0
1593 | movsd qword [RA-8], xmm0 1483 | movsd qword [BASE-8], xmm0
1594 } else { 1484 } else {
1595 |.if not X64 1485 |.if not X64
1596 | fld qword [RA+8] 1486 | fld qword [BASE+8]
1597 | fld1 1487 | fld1
1598 | faddp st1 1488 | faddp st1
1599 | fist ARG1 1489 | fist ARG1
1600 | fstp qword [RA-8] 1490 | fstp qword [BASE-8]
1601 | mov RC, ARG1 1491 | mov RD, ARG1
1602 |.endif 1492 |.endif
1603 } 1493 }
1604 | mov TAB:RB, [RA] 1494 | mov TAB:RB, [BASE]
1605 | cmp RC, TAB:RB->asize; jae >2 // Not in array part? 1495 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
1606 | shl RC, 3 1496 | shl RD, 3
1607 | add RC, TAB:RB->array 1497 | add RD, TAB:RB->array
1608 |1: 1498 |1:
1609 | cmp dword [RC+4], LJ_TNIL; je ->fff_res0 1499 | cmp dword [RD+4], LJ_TNIL; je ->fff_res0
1610 | mov RB, [RC] // Copy array slot. 1500 | mov RB, [RD] // Copy array slot.
1611 | mov RC, [RC+4] 1501 | mov RD, [RD+4]
1612 | mov [RA], RB 1502 | mov [BASE], RB
1613 | mov [RA+4], RC 1503 | mov [BASE+4], RD
1614 | jmp ->fff_res2 1504 | jmp ->fff_res2
1615 |2: // Check for empty hash part first. Otherwise call C function. 1505 |2: // Check for empty hash part first. Otherwise call C function.
1616 | cmp dword TAB:RB->hmask, 0; je ->fff_res0 1506 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1617 | mov TMP1, BASE // Save BASE and RA.
1618 |.if X64 and not X64WIN
1619 | mov FCARG1, TAB:RB 1507 | mov FCARG1, TAB:RB
1620 | mov RB, RA 1508 | mov RB, BASE // Save BASE.
1621 |.else 1509 | mov FCARG2, RD // Caveat: FCARG2 == BASE
1622 | xchg FCARG1, TAB:RB // Caveat: FCARG1 == RA
1623 |.endif
1624 | mov FCARG2, RC
1625 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) 1510 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
1626 | // cTValue * or NULL returned in eax (RC). 1511 | // cTValue * or NULL returned in eax (RD).
1627 | mov RA, RB 1512 | mov BASE, RB
1628 | mov BASE, TMP1 1513 | test RD, RD
1629 | test RC, RC
1630 | jnz <1 1514 | jnz <1
1631 |->fff_res0: 1515 |->fff_res0:
1632 | mov RD, 1+0 1516 | mov RD, 1+0
1633 | jmp ->fff_res 1517 | jmp ->fff_res
1634 | 1518 |
1635 |.ffunc_1 ipairs 1519 |.ffunc_1 ipairs
1636 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback 1520 | mov CFUNC:RB, [BASE-8]
1637 | mov CFUNC:RC, CFUNC:RB->upvalue[0] 1521 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1638 | mov dword [RA-4], LJ_TFUNC 1522 | mov CFUNC:RD, CFUNC:RB->upvalue[0]
1639 | mov [RA-8], CFUNC:RC 1523 | mov PC, [BASE-4]
1524 | mov dword [BASE-4], LJ_TFUNC
1525 | mov [BASE-8], CFUNC:RD
1640 if (sse) { 1526 if (sse) {
1641 | xorps xmm0, xmm0 1527 | xorps xmm0, xmm0
1642 | movsd qword [RA+8], xmm0 1528 | movsd qword [BASE+8], xmm0
1643 } else { 1529 } else {
1644 | fldz 1530 | fldz
1645 | fstp qword [RA+8] 1531 | fstp qword [BASE+8]
1646 } 1532 }
1647 | mov RD, 1+3 1533 | mov RD, 1+3
1648 | jmp ->fff_res 1534 | jmp ->fff_res
@@ -1650,54 +1536,42 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1650 |//-- Base library: catch errors ---------------------------------------- 1536 |//-- Base library: catch errors ----------------------------------------
1651 | 1537 |
1652 |.ffunc_1 pcall 1538 |.ffunc_1 pcall
1653 | mov [RA-4], PC 1539 | lea RA, [BASE+8]
1540 | sub NARGS:RD, 1
1654 | mov PC, 8+FRAME_PCALL 1541 | mov PC, 8+FRAME_PCALL
1655 | mov BASE, RA
1656 | add RA, 8
1657 | sub NARGS:RC, 1
1658 | mov LFUNC:RB, [RA-8]
1659 |1: 1542 |1:
1660 | test byte [DISPATCH+DISPATCH_GL(hookmask)], HOOK_ACTIVE 1543 | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)]
1661 | jnz >3 // Hook active before pcall? 1544 | shr RB, HOOK_ACTIVE_SHIFT
1662 |2: 1545 | and RB, 1
1663 | cmp dword [RA-4], LJ_TFUNC 1546 | add PC, RB // Remember active hook before pcall.
1664 | jne ->vmeta_call // Ensure KBASE defined and != BASE. 1547 | jmp ->vm_call_dispatch
1665 | jmp aword LFUNC:RB->gate
1666 |3:
1667 | add PC, 1 // Use FRAME_PCALLH if hook was active.
1668 | jmp <2
1669 | 1548 |
1670 |.ffunc_2 xpcall 1549 |.ffunc_2 xpcall
1671 | cmp dword [RA+12], LJ_TFUNC; jne ->fff_fallback 1550 | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback
1672 | mov [RA-4], PC 1551 | mov RB, [BASE+4] // Swap function and traceback.
1673 | mov RB, [RA+4] // Swap function and traceback. 1552 | mov [BASE+12], RB
1674 | mov [RA+12], RB 1553 | mov dword [BASE+4], LJ_TFUNC
1675 | mov dword [RA+4], LJ_TFUNC 1554 | mov LFUNC:RB, [BASE]
1676 | mov LFUNC:RB, [RA] 1555 | mov PC, [BASE+8]
1677 | mov PC, [RA+8] 1556 | mov [BASE+8], LFUNC:RB
1678 | mov [RA+8], LFUNC:RB 1557 | mov [BASE], PC
1679 | mov [RA], PC 1558 | lea RA, [BASE+16]
1680 | mov PC, 2*8+FRAME_PCALL 1559 | sub NARGS:RD, 2
1681 | mov BASE, RA 1560 | mov PC, 16+FRAME_PCALL
1682 | add RA, 2*8
1683 | sub NARGS:RC, 2
1684 | jmp <1 1561 | jmp <1
1685 | 1562 |
1686 |//-- Coroutine library -------------------------------------------------- 1563 |//-- Coroutine library --------------------------------------------------
1687 | 1564 |
1688 |.macro coroutine_resume_wrap, resume 1565 |.macro coroutine_resume_wrap, resume
1689 |9: // Need to restore PC for fallback handler.
1690 | mov PC, SAVE_PC
1691 | jmp ->fff_fallback
1692 |
1693 |.if resume 1566 |.if resume
1694 |.ffunc_1 coroutine_resume 1567 |.ffunc_1 coroutine_resume
1695 | mov L:RB, [RA] 1568 | mov L:RB, [BASE]
1696 |.else 1569 |.else
1697 |.ffunc coroutine_wrap_aux 1570 |.ffunc coroutine_wrap_aux
1571 | mov CFUNC:RB, [BASE-8]
1698 | mov L:RB, CFUNC:RB->upvalue[0].gcr 1572 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1699 |.endif 1573 |.endif
1700 | mov [RA-4], PC 1574 | mov PC, [BASE-4]
1701 | mov SAVE_PC, PC 1575 | mov SAVE_PC, PC
1702 |.if X64 1576 |.if X64
1703 | mov TMP1, L:RB 1577 | mov TMP1, L:RB
@@ -1705,60 +1579,52 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1705 | mov ARG1, L:RB 1579 | mov ARG1, L:RB
1706 |.endif 1580 |.endif
1707 |.if resume 1581 |.if resume
1708 | cmp dword [RA+4], LJ_TTHREAD; jne <9 1582 | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback
1709 |.endif
1710 | cmp aword L:RB->cframe, 0; jne <9
1711 | cmp byte L:RB->status, LUA_YIELD; ja <9
1712 | mov PC, L:RB->top
1713 |.if X64
1714 | mov TMP2, PC
1715 |.else
1716 | mov ARG2, PC
1717 |.endif 1583 |.endif
1584 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1585 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1586 | mov RA, L:RB->top
1718 | je >1 // Status != LUA_YIELD (i.e. 0)? 1587 | je >1 // Status != LUA_YIELD (i.e. 0)?
1719 | cmp PC, L:RB->base; je <9 // Check for presence of initial func. 1588 | cmp RA, L:RB->base // Check for presence of initial func.
1589 | je ->fff_fallback
1720 |1: 1590 |1:
1721 |.if resume 1591 |.if resume
1722 | lea PC, [PC+NARGS:RC*8-16] // Check stack space (-1-thread). 1592 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1723 |.else 1593 |.else
1724 | lea PC, [PC+NARGS:RC*8-8] // Check stack space (-1). 1594 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1725 |.endif 1595 |.endif
1726 | cmp PC, L:RB->maxstack; ja <9 1596 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1727 | mov L:RB->top, PC 1597 | mov L:RB->top, PC
1728 | 1598 |
1729 | mov L:RB, SAVE_L 1599 | mov L:RB, SAVE_L
1730 | mov L:RB->base, RA 1600 | mov L:RB->base, BASE
1731 |.if resume 1601 |.if resume
1732 | add RA, 8 // Keep resumed thread in stack for GC. 1602 | add BASE, 8 // Keep resumed thread in stack for GC.
1733 |.endif
1734 | mov L:RB->top, RA
1735 |.if X64
1736 | mov RB, TMP2
1737 |.else
1738 | mov RB, ARG2
1739 |.endif 1603 |.endif
1604 | mov L:RB->top, BASE
1740 |.if resume 1605 |.if resume
1741 | lea RA, [RA+NARGS:RC*8-24] // RA = end of source for stack move. 1606 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1742 |.else 1607 |.else
1743 | lea RA, [RA+NARGS:RC*8-16] // RA = end of source for stack move. 1608 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1744 |.endif 1609 |.endif
1745 | sub RAa, PCa // Relative to PC. 1610 | sub RBa, PCa // Relative to PC.
1746 | 1611 |
1747 | cmp PC, RB 1612 | cmp PC, RA
1748 | je >3 1613 | je >3
1749 |2: // Move args to coroutine. 1614 |2: // Move args to coroutine.
1750 | mov RC, [PC+RA+4] 1615 | mov RC, [PC+RB+4]
1751 | mov [PC-4], RC 1616 | mov [PC-4], RC
1752 | mov RC, [PC+RA] 1617 | mov RC, [PC+RB]
1753 | mov [PC-8], RC 1618 | mov [PC-8], RC
1754 | sub PC, 8 1619 | sub PC, 8
1755 | cmp PC, RB 1620 | cmp PC, RA
1756 | jne <2 1621 | jne <2
1757 |3: 1622 |3:
1758 |.if X64 1623 |.if X64
1624 | mov CARG2d, RA
1759 | mov CARG1d, TMP1 1625 | mov CARG1d, TMP1
1760 | mov CARG2d, TMP2
1761 |.else 1626 |.else
1627 | mov ARG2, RA
1762 | xor RA, RA 1628 | xor RA, RA
1763 | mov ARG4, RA 1629 | mov ARG4, RA
1764 | mov ARG3, RA 1630 | mov ARG3, RA
@@ -1854,12 +1720,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1854 | 1720 |
1855 |.ffunc coroutine_yield 1721 |.ffunc coroutine_yield
1856 | mov L:RB, SAVE_L 1722 | mov L:RB, SAVE_L
1857 | mov [RA-4], PC
1858 | test aword L:RB->cframe, CFRAME_RESUME 1723 | test aword L:RB->cframe, CFRAME_RESUME
1859 | jz ->fff_fallback 1724 | jz ->fff_fallback
1860 | mov L:RB->base, RA 1725 | mov L:RB->base, BASE
1861 | lea RC, [RA+NARGS:RC*8-8] 1726 | lea RD, [BASE+NARGS:RD*8-8]
1862 | mov L:RB->top, RC 1727 | mov L:RB->top, RD
1863 | xor RD, RD 1728 | xor RD, RD
1864 | mov aword L:RB->cframe, RDa 1729 | mov aword L:RB->cframe, RDa
1865 | mov al, LUA_YIELD 1730 | mov al, LUA_YIELD
@@ -1870,14 +1735,16 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1870 | 1735 |
1871 if (sse) { 1736 if (sse) {
1872 |->fff_resn: 1737 |->fff_resn:
1873 | fstp qword [RA-8] 1738 | mov PC, [BASE-4]
1739 | fstp qword [BASE-8]
1874 | jmp ->fff_res1 1740 | jmp ->fff_res1
1875 | 1741 |
1876 |.ffunc_nsse math_abs 1742 |.ffunc_nsse math_abs
1877 | sseconst_abs xmm1, RDa 1743 | sseconst_abs xmm1, RDa
1878 | andps xmm0, xmm1 1744 | andps xmm0, xmm1
1879 |->fff_resxmm0: 1745 |->fff_resxmm0:
1880 | movsd qword [RA-8], xmm0 1746 | mov PC, [BASE-4]
1747 | movsd qword [BASE-8], xmm0
1881 | // fallthrough 1748 | // fallthrough
1882 } else { 1749 } else {
1883 |.ffunc_n math_abs 1750 |.ffunc_n math_abs
@@ -1885,7 +1752,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1885 | // fallthrough 1752 | // fallthrough
1886 |->fff_resxmm0: // Dummy. 1753 |->fff_resxmm0: // Dummy.
1887 |->fff_resn: 1754 |->fff_resn:
1888 | fstp qword [RA-8] 1755 | mov PC, [BASE-4]
1756 | fstp qword [BASE-8]
1889 } 1757 }
1890 |->fff_res1: 1758 |->fff_res1:
1891 | mov RD, 1+1 1759 | mov RD, 1+1
@@ -1897,16 +1765,18 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1897 |5: 1765 |5:
1898 | cmp PC_RB, RDL // More results expected? 1766 | cmp PC_RB, RDL // More results expected?
1899 | ja >6 1767 | ja >6
1900 | // BASE and KBASE are assumed to be set for the calling frame. 1768 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1769 | movzx RA, PC_RA
1770 | not RAa // Note: ~RA = -(RA+1)
1771 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
1901 | ins_next 1772 | ins_next
1902 | 1773 |
1903 |6: // Fill up results with nil. 1774 |6: // Fill up results with nil.
1904 | mov dword [RA+RD*8-12], LJ_TNIL 1775 | mov dword [BASE+RD*8-12], LJ_TNIL
1905 | add RD, 1 1776 | add RD, 1
1906 | jmp <5 1777 | jmp <5
1907 | 1778 |
1908 |7: // Non-standard return case. 1779 |7: // Non-standard return case.
1909 | mov BASE, RA
1910 | mov RAa, -8 // Results start at BASE+RA = BASE-8. 1780 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
1911 | jmp ->vm_return 1781 | jmp ->vm_return
1912 | 1782 |
@@ -1948,10 +1818,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1948 | fstp FPARG1 1818 | fstp FPARG1
1949 | .endif 1819 | .endif
1950 ||} 1820 ||}
1951 | mov TMP1, RA
1952 | mov RB, BASE 1821 | mov RB, BASE
1953 | call extern lj_wrapper_ .. func 1822 | call extern lj_wrapper_ .. func
1954 | mov RA, TMP1
1955 | mov BASE, RB 1823 | mov BASE, RB
1956 | .if X64 1824 | .if X64
1957 | jmp ->fff_resxmm0 1825 | jmp ->fff_resxmm0
@@ -1967,10 +1835,12 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1967 |->ff_math_deg: 1835 |->ff_math_deg:
1968 if (sse) { 1836 if (sse) {
1969 |.ffunc_nsse math_rad 1837 |.ffunc_nsse math_rad
1838 | mov CFUNC:RB, [BASE-8]
1970 | mulsd xmm0, qword CFUNC:RB->upvalue[0] 1839 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
1971 | jmp ->fff_resxmm0 1840 | jmp ->fff_resxmm0
1972 } else { 1841 } else {
1973 |.ffunc_n math_rad 1842 |.ffunc_n math_rad
1843 | mov CFUNC:RB, [BASE-8]
1974 | fmul qword CFUNC:RB->upvalue[0] 1844 | fmul qword CFUNC:RB->upvalue[0]
1975 | jmp ->fff_resn 1845 | jmp ->fff_resn
1976 } 1846 }
@@ -1979,10 +1849,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1979 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn 1849 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
1980 | 1850 |
1981 |.ffunc_1 math_frexp 1851 |.ffunc_1 math_frexp
1982 | mov RB, [RA+4] 1852 | mov RB, [BASE+4]
1983 | cmp RB, LJ_TISNUM; ja ->fff_fallback 1853 | cmp RB, LJ_TISNUM; ja ->fff_fallback
1984 | mov RC, [RA] 1854 | mov PC, [BASE-4]
1985 | mov [RA-4], RB; mov [RA-8], RC 1855 | mov RC, [BASE]
1856 | mov [BASE-4], RB; mov [BASE-8], RC
1986 | shl RB, 1; cmp RB, 0xffe00000; jae >3 1857 | shl RB, 1; cmp RB, 0xffe00000; jae >3
1987 | or RC, RB; jz >3 1858 | or RC, RB; jz >3
1988 | mov RC, 1022 1859 | mov RC, 1022
@@ -1994,15 +1865,15 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
1994 } else { 1865 } else {
1995 | mov TMP1, RB; fild TMP1 1866 | mov TMP1, RB; fild TMP1
1996 } 1867 }
1997 | mov RB, [RA-4] 1868 | mov RB, [BASE-4]
1998 | and RB, 0x800fffff // Mask off exponent. 1869 | and RB, 0x800fffff // Mask off exponent.
1999 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. 1870 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2000 | mov [RA-4], RB 1871 | mov [BASE-4], RB
2001 |2: 1872 |2:
2002 if (sse) { 1873 if (sse) {
2003 | movsd qword [RA], xmm0 1874 | movsd qword [BASE], xmm0
2004 } else { 1875 } else {
2005 | fstp qword [RA] 1876 | fstp qword [BASE]
2006 } 1877 }
2007 | mov RD, 1+2 1878 | mov RD, 1+2
2008 | jmp ->fff_res 1879 | jmp ->fff_res
@@ -2014,46 +1885,48 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2014 } 1885 }
2015 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. 1886 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2016 if (sse) { 1887 if (sse) {
2017 | movsd xmm0, qword [RA] 1888 | movsd xmm0, qword [BASE]
2018 | sseconst_hi xmm1, RBa, 43500000 // 2^54. 1889 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2019 | mulsd xmm0, xmm1 1890 | mulsd xmm0, xmm1
2020 | movsd qword [RA-8], xmm0 1891 | movsd qword [BASE-8], xmm0
2021 } else { 1892 } else {
2022 | fld qword [RA] 1893 | fld qword [BASE]
2023 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54 1894 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2024 | fstp qword [RA-8] 1895 | fstp qword [BASE-8]
2025 } 1896 }
2026 | mov RB, [RA-4]; mov RC, 1076; shl RB, 1; jmp <1 1897 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2027 | 1898 |
2028 if (sse) { 1899 if (sse) {
2029 |.ffunc_nsse math_modf 1900 |.ffunc_nsse math_modf
2030 } else { 1901 } else {
2031 |.ffunc_n math_modf 1902 |.ffunc_n math_modf
2032 } 1903 }
2033 | mov RB, [RA+4] 1904 | mov RB, [BASE+4]
1905 | mov PC, [BASE-4]
2034 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? 1906 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2035 if (sse) { 1907 if (sse) {
2036 | movaps xmm4, xmm0 1908 | movaps xmm4, xmm0
2037 | call ->vm_trunc 1909 | call ->vm_trunc
2038 | subsd xmm4, xmm0 1910 | subsd xmm4, xmm0
2039 |1: 1911 |1:
2040 | movsd qword [RA-8], xmm0 1912 | movsd qword [BASE-8], xmm0
2041 | movsd qword [RA], xmm4 1913 | movsd qword [BASE], xmm4
2042 } else { 1914 } else {
2043 | fdup 1915 | fdup
2044 | call ->vm_trunc 1916 | call ->vm_trunc
2045 | fsub st1, st0 1917 | fsub st1, st0
2046 |1: 1918 |1:
2047 | fstp qword [RA-8] 1919 | fstp qword [BASE-8]
2048 | fstp qword [RA] 1920 | fstp qword [BASE]
2049 } 1921 }
2050 | mov RC, [RA-4]; mov RB, [RA+4] 1922 | mov RC, [BASE-4]; mov RB, [BASE+4]
2051 | xor RC, RB; js >3 // Need to adjust sign? 1923 | xor RC, RB; js >3 // Need to adjust sign?
2052 |2: 1924 |2:
2053 | mov RD, 1+2 1925 | mov RD, 1+2
2054 | jmp ->fff_res 1926 | jmp ->fff_res
2055 |3: 1927 |3:
2056 | xor RB, 0x80000000; mov [RA+4], RB; jmp <2 // Flip sign of fraction. 1928 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
1929 | jmp <2
2057 |4: 1930 |4:
2058 if (sse) { 1931 if (sse) {
2059 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. 1932 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
@@ -2079,8 +1952,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2079 |1: 1952 |1:
2080 | cmp RB, RD 1953 | cmp RB, RD
2081 | jae ->fff_resxmm0 1954 | jae ->fff_resxmm0
2082 | cmp dword [RA+RB*8-4], LJ_TISNUM; ja ->fff_fallback 1955 | cmp dword [BASE+RB*8-4], LJ_TISNUM; ja ->fff_fallback
2083 | movsd xmm1, qword [RA+RB*8-8] 1956 | movsd xmm1, qword [BASE+RB*8-8]
2084 | sseop xmm0, xmm1 1957 | sseop xmm0, xmm1
2085 | add RB, 1 1958 | add RB, 1
2086 | jmp <1 1959 | jmp <1
@@ -2091,8 +1964,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2091 |1: 1964 |1:
2092 | cmp RB, RD 1965 | cmp RB, RD
2093 | jae ->fff_resn 1966 | jae ->fff_resn
2094 | cmp dword [RA+RB*8-4], LJ_TISNUM; ja >5 1967 | cmp dword [BASE+RB*8-4], LJ_TISNUM; ja >5
2095 | fld qword [RA+RB*8-8] 1968 | fld qword [BASE+RB*8-8]
2096 ||if (cmov) { 1969 ||if (cmov) {
2097 | fucomi st1; cmovop st1; fpop1 1970 | fucomi st1; cmovop st1; fpop1
2098 ||} else { 1971 ||} else {
@@ -2116,8 +1989,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2116 |//-- String library ----------------------------------------------------- 1989 |//-- String library -----------------------------------------------------
2117 | 1990 |
2118 |.ffunc_1 string_len 1991 |.ffunc_1 string_len
2119 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback 1992 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2120 | mov STR:RB, [RA] 1993 | mov STR:RB, [BASE]
2121 if (sse) { 1994 if (sse) {
2122 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0 1995 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2123 } else { 1996 } else {
@@ -2125,9 +1998,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2125 } 1998 }
2126 | 1999 |
2127 |.ffunc string_byte // Only handle the 1-arg case here. 2000 |.ffunc string_byte // Only handle the 1-arg case here.
2128 | cmp NARGS:RC, 1+1; jne ->fff_fallback 2001 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2129 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback 2002 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2130 | mov STR:RB, [RA] 2003 | mov STR:RB, [BASE]
2004 | mov PC, [BASE-4]
2131 | cmp dword STR:RB->len, 1 2005 | cmp dword STR:RB->len, 1
2132 | jb ->fff_res0 // Return no results for empty string. 2006 | jb ->fff_res0 // Return no results for empty string.
2133 | movzx RB, byte STR:RB[1] 2007 | movzx RB, byte STR:RB[1]
@@ -2139,14 +2013,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2139 | 2013 |
2140 |.ffunc string_char // Only handle the 1-arg case here. 2014 |.ffunc string_char // Only handle the 1-arg case here.
2141 | ffgccheck 2015 | ffgccheck
2142 | cmp NARGS:RC, 1+1; jne ->fff_fallback // *Exactly* 1 arg. 2016 | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
2143 | cmp dword [RA+4], LJ_TISNUM; ja ->fff_fallback 2017 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
2144 if (sse) { 2018 if (sse) {
2145 | cvtsd2si RC, qword [RA] 2019 | cvtsd2si RC, qword [BASE]
2146 | cmp RC, 255; ja ->fff_fallback 2020 | cmp RC, 255; ja ->fff_fallback
2147 | mov TMP2, RC 2021 | mov TMP2, RC
2148 } else { 2022 } else {
2149 | fld qword [RA] 2023 | fld qword [BASE]
2150 | fistp TMP2 2024 | fistp TMP2
2151 | cmp TMP2, 255; ja ->fff_fallback 2025 | cmp TMP2, 255; ja ->fff_fallback
2152 } 2026 }
@@ -2156,7 +2030,6 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2156 | mov ARG3, 1 2030 | mov ARG3, 1
2157 |.endif 2031 |.endif
2158 | lea RDa, TMP2 // Points to stack. Little-endian. 2032 | lea RDa, TMP2 // Points to stack. Little-endian.
2159 | mov TMP1, RA // Save RA.
2160 |->fff_newstr: 2033 |->fff_newstr:
2161 | mov L:RB, SAVE_L 2034 | mov L:RB, SAVE_L
2162 | mov L:RB->base, BASE 2035 | mov L:RB->base, BASE
@@ -2170,38 +2043,37 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2170 |.endif 2043 |.endif
2171 | mov SAVE_PC, PC 2044 | mov SAVE_PC, PC
2172 | call extern lj_str_new // (lua_State *L, char *str, size_t l) 2045 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2173 | // GCstr * returned in eax (RC). 2046 | // GCstr * returned in eax (RD).
2174 | mov RA, TMP1
2175 | mov BASE, L:RB->base 2047 | mov BASE, L:RB->base
2176 | mov dword [RA-4], LJ_TSTR 2048 | mov PC, [BASE-4]
2177 | mov [RA-8], STR:RC 2049 | mov dword [BASE-4], LJ_TSTR
2050 | mov [BASE-8], STR:RD
2178 | jmp ->fff_res1 2051 | jmp ->fff_res1
2179 | 2052 |
2180 |.ffunc string_sub 2053 |.ffunc string_sub
2181 | ffgccheck 2054 | ffgccheck
2182 | mov TMP1, RA // Save RA.
2183 | mov TMP2, -1 2055 | mov TMP2, -1
2184 | cmp NARGS:RC, 1+2; jb ->fff_fallback 2056 | cmp NARGS:RD, 1+2; jb ->fff_fallback
2185 | jna >1 2057 | jna >1
2186 | cmp dword [RA+20], LJ_TISNUM; ja ->fff_fallback 2058 | cmp dword [BASE+20], LJ_TISNUM; ja ->fff_fallback
2187 if (sse) { 2059 if (sse) {
2188 | cvtsd2si RB, qword [RA+16] 2060 | cvtsd2si RB, qword [BASE+16]
2189 | mov TMP2, RB 2061 | mov TMP2, RB
2190 } else { 2062 } else {
2191 | fld qword [RA+16] 2063 | fld qword [BASE+16]
2192 | fistp TMP2 2064 | fistp TMP2
2193 } 2065 }
2194 |1: 2066 |1:
2195 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback 2067 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2196 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback 2068 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback
2197 | mov STR:RB, [RA] 2069 | mov STR:RB, [BASE]
2198 | mov TMP3, STR:RB 2070 | mov TMP3, STR:RB
2199 | mov RB, STR:RB->len 2071 | mov RB, STR:RB->len
2200 if (sse) { 2072 if (sse) {
2201 | cvtsd2si RA, qword [RA+8] 2073 | cvtsd2si RA, qword [BASE+8]
2202 } else { 2074 } else {
2203 |.if not X64 2075 |.if not X64
2204 | fld qword [RA+8] 2076 | fld qword [BASE+8]
2205 | fistp ARG3 2077 | fistp ARG3
2206 | mov RA, ARG3 2078 | mov RA, ARG3
2207 |.endif 2079 |.endif
@@ -2250,14 +2122,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2250 | 2122 |
2251 |.ffunc_2 string_rep // Only handle the 1-char case inline. 2123 |.ffunc_2 string_rep // Only handle the 1-char case inline.
2252 | ffgccheck 2124 | ffgccheck
2253 | mov TMP1, RA // Save RA. 2125 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2254 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback 2126 | cmp dword [BASE+12], LJ_TISNUM; ja ->fff_fallback
2255 | cmp dword [RA+12], LJ_TISNUM; ja ->fff_fallback 2127 | mov STR:RB, [BASE]
2256 | mov STR:RB, [RA]
2257 if (sse) { 2128 if (sse) {
2258 | cvtsd2si RC, qword [RA+8] 2129 | cvtsd2si RC, qword [BASE+8]
2259 } else { 2130 } else {
2260 | fld qword [RA+8] 2131 | fld qword [BASE+8]
2261 | fistp TMP2 2132 | fistp TMP2
2262 | mov RC, TMP2 2133 | mov RC, TMP2
2263 } 2134 }
@@ -2284,9 +2155,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2284 | 2155 |
2285 |.ffunc_1 string_reverse 2156 |.ffunc_1 string_reverse
2286 | ffgccheck 2157 | ffgccheck
2287 | mov TMP1, RA // Save RA. 2158 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2288 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback 2159 | mov STR:RB, [BASE]
2289 | mov STR:RB, [RA]
2290 | mov RC, STR:RB->len 2160 | mov RC, STR:RB->len
2291 | test RC, RC 2161 | test RC, RC
2292 | jz ->fff_emptystr // Zero length string? 2162 | jz ->fff_emptystr // Zero length string?
@@ -2312,9 +2182,8 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2312 |.macro ffstring_case, name, lo, hi 2182 |.macro ffstring_case, name, lo, hi
2313 | .ffunc_1 name 2183 | .ffunc_1 name
2314 | ffgccheck 2184 | ffgccheck
2315 | mov TMP1, RA // Save RA. 2185 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2316 | cmp dword [RA+4], LJ_TSTR; jne ->fff_fallback 2186 | mov STR:RB, [BASE]
2317 | mov STR:RB, [RA]
2318 | mov RC, STR:RB->len 2187 | mov RC, STR:RB->len
2319 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1 2188 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2320 | add RB, #STR 2189 | add RB, #STR
@@ -2349,19 +2218,17 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2349 |//-- Table library ------------------------------------------------------ 2218 |//-- Table library ------------------------------------------------------
2350 | 2219 |
2351 |.ffunc_1 table_getn 2220 |.ffunc_1 table_getn
2352 | cmp dword [RA+4], LJ_TTAB; jne ->fff_fallback 2221 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2353 | mov TMP1, BASE // Save RA and BASE. 2222 | mov RB, BASE // Save BASE.
2354 | mov RB, RA 2223 | mov TAB:FCARG1, [BASE]
2355 | mov TAB:FCARG1, [RA] // Caveat: FCARG1 == RA
2356 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t) 2224 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2357 | // Length of table returned in eax (RC). 2225 | // Length of table returned in eax (RD).
2358 | mov RA, RB // Restore RA and BASE. 2226 | mov BASE, RB // Restore BASE.
2359 | mov BASE, TMP1
2360 if (sse) { 2227 if (sse) {
2361 | cvtsi2sd xmm0, RC; jmp ->fff_resxmm0 2228 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2362 } else { 2229 } else {
2363 |.if not X64 2230 |.if not X64
2364 | mov ARG1, RC; fild ARG1; jmp ->fff_resn 2231 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2365 |.endif 2232 |.endif
2366 } 2233 }
2367 | 2234 |
@@ -2406,29 +2273,26 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2406 | 2273 |
2407 |.macro .ffunc_bit_op, name, ins 2274 |.macro .ffunc_bit_op, name, ins
2408 | .ffunc_bit name 2275 | .ffunc_bit name
2409 | mov TMP2, NARGS:RC // Save for fallback. 2276 | mov TMP2, NARGS:RD // Save for fallback.
2410 | lea RC, [RA+NARGS:RC*8-16] 2277 | lea RD, [BASE+NARGS:RD*8-16]
2411 ||if (sse) {
2412 | mov TMP1, BASE // Need BASE as a scratch register.
2413 ||}
2414 |1: 2278 |1:
2415 | cmp RC, RA 2279 | cmp RD, BASE
2416 | jbe ->fff_resbit_op 2280 | jbe ->fff_resbit
2417 | cmp dword [RC+4], LJ_TISNUM; ja ->fff_fallback_bit_op 2281 | cmp dword [RD+4], LJ_TISNUM; ja ->fff_fallback_bit_op
2418 ||if (sse) { 2282 ||if (sse) {
2419 | movsd xmm0, qword [RC] 2283 | movsd xmm0, qword [RD]
2420 | addsd xmm0, xmm1 2284 | addsd xmm0, xmm1
2421 | movd BASE, xmm0 2285 | movd RA, xmm0
2422 | ins RB, BASE 2286 | ins RB, RA
2423 ||} else { 2287 ||} else {
2424 |.if not X64 2288 |.if not X64
2425 | fld qword [RC] 2289 | fld qword [RD]
2426 | fadd TMP1 2290 | fadd TMP1
2427 | fstp FPARG1 2291 | fstp FPARG1
2428 | ins RB, ARG1 2292 | ins RB, ARG1
2429 |.endif 2293 |.endif
2430 ||} 2294 ||}
2431 | sub RC, 8 2295 | sub RD, 8
2432 | jmp <1 2296 | jmp <1
2433 |.endmacro 2297 |.endmacro
2434 | 2298 |
@@ -2446,14 +2310,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2446 |->fff_resbit: 2310 |->fff_resbit:
2447 | cvtsi2sd xmm0, RB 2311 | cvtsi2sd xmm0, RB
2448 | jmp ->fff_resxmm0 2312 | jmp ->fff_resxmm0
2449 |->fff_resbit_op:
2450 | cvtsi2sd xmm0, RB
2451 | mov BASE, TMP1
2452 | jmp ->fff_resxmm0
2453 } else { 2313 } else {
2454 |.if not X64 2314 |.if not X64
2455 |->fff_resbit: 2315 |->fff_resbit:
2456 |->fff_resbit_op:
2457 | mov ARG1, RB 2316 | mov ARG1, RB
2458 | fild ARG1 2317 | fild ARG1
2459 | jmp ->fff_resn 2318 | jmp ->fff_resn
@@ -2461,10 +2320,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2461 } 2320 }
2462 | 2321 |
2463 |->fff_fallback_bit_op: 2322 |->fff_fallback_bit_op:
2464 if (sse) { 2323 | mov NARGS:RD, TMP2 // Restore for fallback
2465 | mov BASE, TMP1
2466 }
2467 | mov NARGS:RC, TMP2 // Restore for fallback
2468 | jmp ->fff_fallback 2324 | jmp ->fff_fallback
2469 | 2325 |
2470 |.macro .ffunc_bit_sh, name, ins 2326 |.macro .ffunc_bit_sh, name, ins
@@ -2503,86 +2359,80 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2503 |//----------------------------------------------------------------------- 2359 |//-----------------------------------------------------------------------
2504 | 2360 |
2505 |->fff_fallback_2: 2361 |->fff_fallback_2:
2506 | mov NARGS:RC, 1+2 // Other args are ignored, anyway. 2362 | mov NARGS:RD, 1+2 // Other args are ignored, anyway.
2507 | jmp ->fff_fallback 2363 | jmp ->fff_fallback
2508 |->fff_fallback_1: 2364 |->fff_fallback_1:
2509 | mov NARGS:RC, 1+1 // Other args are ignored, anyway. 2365 | mov NARGS:RD, 1+1 // Other args are ignored, anyway.
2510 |->fff_fallback: // Call fast function fallback handler. 2366 |->fff_fallback: // Call fast function fallback handler.
2511 | // RA = new base, RC = nargs+1 2367 | // BASE = new base, RD = nargs+1
2512 | mov L:RB, SAVE_L 2368 | mov L:RB, SAVE_L
2513 | sub BASE, RA 2369 | mov PC, [BASE-4] // Fallback may overwrite PC.
2514 | mov [RA-4], PC
2515 | mov SAVE_PC, PC // Redundant (but a defined value). 2370 | mov SAVE_PC, PC // Redundant (but a defined value).
2516 | mov TMP1, BASE // Save old BASE (relative). 2371 | mov L:RB->base, BASE
2517 | mov L:RB->base, RA 2372 | lea RD, [BASE+NARGS:RD*8-8]
2518 | lea RC, [RA+NARGS:RC*8-8] 2373 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2519 | lea BASE, [RC+8*LUA_MINSTACK] // Ensure enough space for handler. 2374 | mov L:RB->top, RD
2520 | mov L:RB->top, RC 2375 | mov CFUNC:RD, [BASE-8]
2521 | mov CFUNC:RC, [RA-8] 2376 | cmp RA, L:RB->maxstack
2522 | cmp BASE, L:RB->maxstack
2523 | ja >5 // Need to grow stack. 2377 | ja >5 // Need to grow stack.
2524 |.if X64 2378 |.if X64
2525 | mov CARG1d, L:RB 2379 | mov CARG1d, L:RB
2526 |.else 2380 |.else
2527 | mov ARG1, L:RB 2381 | mov ARG1, L:RB
2528 |.endif 2382 |.endif
2529 | call aword CFUNC:RC->f // (lua_State *L) 2383 | call aword CFUNC:RD->f // (lua_State *L)
2384 | mov BASE, L:RB->base
2530 | // Either throws an error or recovers and returns 0 or MULTRES (+1). 2385 | // Either throws an error or recovers and returns 0 or MULTRES (+1).
2531 | test RC, RC; jnz >3 2386 | test RD, RD; jnz ->fff_res // Returned MULTRES (already in RD).
2532 |1: // Returned 0: retry fast path. 2387 |1: // Returned 0: retry fast path.
2533 | mov RA, L:RB->base 2388 | mov RD, L:RB->top
2534 | mov RC, L:RB->top 2389 | sub RD, BASE
2535 | sub RC, RA 2390 | shr RD, 3
2536 | shr RC, 3 2391 | add NARGS:RD, 1
2537 | add NARGS:RC, 1 2392 | mov LFUNC:RB, [BASE-8]
2538 | mov LFUNC:RB, [RA-8] 2393 | cmp dword [BASE-4], PC
2539 | mov BASE, TMP1 // Restore old BASE. 2394 | jne >2 // Tailcalled?
2540 | add BASE, RA 2395 | ins_callt // Retry the call.
2541 | cmp [RA-4], PC; jne >2 // Callable modified by handler? 2396 |
2542 | jmp aword LFUNC:RB->gate // Retry the call. 2397 |2: // Reconstruct previous base for vmeta_call.
2543 | 2398 | mov RA, BASE
2544 |2: // Run modified callable. 2399 | test PC, FRAME_TYPE
2545 | cmp dword [RA-4], LJ_TFUNC 2400 | jnz >3
2546 | jne ->vmeta_call 2401 | movzx RB, PC_RA
2547 | jmp aword LFUNC:RB->gate // Retry the call. 2402 | not RBa // Note: ~RB = -(RB+1)
2548 | 2403 | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8
2549 |3: // Returned MULTRES (already in RC/RD). 2404 | jmp ->vm_call_dispatch // Resolve again.
2550 | mov RA, L:RB->base 2405 |3:
2551 | mov BASE, TMP1 // Restore old BASE. 2406 | mov RB, PC
2552 | add BASE, RA 2407 | and RB, -8
2553 | jmp ->fff_res 2408 | sub BASE, RB
2409 | jmp ->vm_call_dispatch // Resolve again.
2554 | 2410 |
2555 |5: // Grow stack for fallback handler. 2411 |5: // Grow stack for fallback handler.
2556 | mov FCARG2, LUA_MINSTACK 2412 | mov FCARG2, LUA_MINSTACK
2557 | mov FCARG1, L:RB 2413 | mov FCARG1, L:RB
2558 | call extern lj_state_growstack@8 // (lua_State *L, int n) 2414 | call extern lj_state_growstack@8 // (lua_State *L, int n)
2415 | mov BASE, L:RB->base
2559 | jmp <1 // Dumb retry (goes through ff first). 2416 | jmp <1 // Dumb retry (goes through ff first).
2560 | 2417 |
2561 |->fff_gcstep: // Call GC step function. 2418 |->fff_gcstep: // Call GC step function.
2562 | // RA = new base, RC = nargs+1 2419 | // BASE = new base, RD = nargs+1
2563 | pop RBa // Must keep stack at same level. 2420 | pop RBa // Must keep stack at same level.
2564 | mov TMPa, RBa // Save return address 2421 | mov TMPa, RBa // Save return address
2565 | mov L:RB, SAVE_L 2422 | mov L:RB, SAVE_L
2566 | sub BASE, RA
2567 | mov TMP2, BASE // Save old BASE (relative).
2568 | mov [RA-4], PC
2569 | mov SAVE_PC, PC // Redundant (but a defined value). 2423 | mov SAVE_PC, PC // Redundant (but a defined value).
2570 | mov L:RB->base, RA 2424 | mov L:RB->base, BASE
2571 | lea RC, [RA+NARGS:RC*8-8] 2425 | lea RD, [BASE+NARGS:RD*8-8]
2572 | mov FCARG1, L:RB 2426 | mov FCARG1, L:RB
2573 | mov L:RB->top, RC 2427 | mov L:RB->top, RD
2574 | call extern lj_gc_step@4 // (lua_State *L) 2428 | call extern lj_gc_step@4 // (lua_State *L)
2575 | mov RA, L:RB->base 2429 | mov BASE, L:RB->base
2576 | mov RC, L:RB->top 2430 | mov RD, L:RB->top
2577 | sub RC, RA 2431 | sub RD, BASE
2578 | shr RC, 3 2432 | shr RD, 3
2579 | add NARGS:RC, 1 2433 | add NARGS:RD, 1
2580 | mov PC, [RA-4]
2581 | mov BASE, TMP2 // Restore old BASE.
2582 | add BASE, RA
2583 | mov RBa, TMPa 2434 | mov RBa, TMPa
2584 | push RBa // Restore return address. 2435 | push RBa // Restore return address.
2585 | mov LFUNC:RB, [RA-8]
2586 | ret 2436 | ret
2587 | 2437 |
2588 |//----------------------------------------------------------------------- 2438 |//-----------------------------------------------------------------------
@@ -2629,9 +2479,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2629 | movzx OP, PC_OP 2479 | movzx OP, PC_OP
2630 | movzx RD, PC_RD 2480 | movzx RD, PC_RD
2631 |.if X64 2481 |.if X64
2632 | jmp aword [DISPATCH+OP*8+BC__MAX*8] // Re-dispatch to static ins. 2482 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2633 |.else 2483 |.else
2634 | jmp aword [DISPATCH+OP*4+BC__MAX*4] // Re-dispatch to static ins. 2484 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins.
2635 |.endif 2485 |.endif
2636 | 2486 |
2637 |->cont_hook: // Continue from hook yield. 2487 |->cont_hook: // Continue from hook yield.
@@ -2645,8 +2495,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2645 |.if X64 2495 |.if X64
2646 | int3 // NYI 2496 | int3 // NYI
2647 |.else 2497 |.else
2498 | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L).
2499 | mov RB, LFUNC:RB->pc
2500 | movzx RD, byte [RB+PC2PROTO(framesize)]
2501 | lea RD, [BASE+RD*8]
2648 | mov L:RB, SAVE_L 2502 | mov L:RB, SAVE_L
2649 | mov L:RB->base, BASE 2503 | mov L:RB->base, BASE
2504 | mov L:RB->top, RD
2650 | mov FCARG2, PC 2505 | mov FCARG2, PC
2651 | lea FCARG1, [DISPATCH+GG_DISP2J] 2506 | lea FCARG1, [DISPATCH+GG_DISP2J]
2652 | mov [DISPATCH+DISPATCH_J(L)], L:RB 2507 | mov [DISPATCH+DISPATCH_J(L)], L:RB
@@ -2661,16 +2516,22 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
2661 |.if X64 2516 |.if X64
2662 | int3 // NYI 2517 | int3 // NYI
2663 |.else 2518 |.else
2519 | lea RD, [BASE+NARGS:RD*8-8]
2664 | mov L:RB, SAVE_L 2520 | mov L:RB, SAVE_L
2665 | mov L:RB->base, BASE 2521 | mov L:RB->base, BASE
2522 | mov L:RB->top, RD
2666 | mov FCARG2, PC 2523 | mov FCARG2, PC
2667 | lea FCARG1, [DISPATCH+GG_DISP2J] 2524 | lea FCARG1, [DISPATCH+GG_DISP2J]
2668 | mov [DISPATCH+DISPATCH_J(L)], L:RB 2525 | mov [DISPATCH+DISPATCH_J(L)], L:RB
2669 | mov SAVE_PC, PC 2526 | mov SAVE_PC, PC
2670 | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) 2527 | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
2671 | mov BASE, L:RB->base 2528 | mov BASE, L:RB->base
2672 | // Dispatch the first instruction and optionally record it. 2529 | mov RD, L:RB->top
2673 | ins_next 2530 | sub RD, BASE
2531 | shr RD, 3
2532 | add NARGS:RD, 1
2533 | mov LFUNC:RB, [BASE-8]
2534 | ins_callt
2674 |.endif 2535 |.endif
2675#endif 2536#endif
2676 | 2537 |
@@ -4403,13 +4264,13 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4403 case BC_CALL: case BC_CALLM: 4264 case BC_CALL: case BC_CALLM:
4404 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs 4265 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
4405 if (op == BC_CALLM) { 4266 if (op == BC_CALLM) {
4406 | add NARGS:RC, MULTRES 4267 | add NARGS:RD, MULTRES
4407 } 4268 }
4408 | lea RA, [BASE+RA*8+8] 4269 | cmp dword [BASE+RA*8+4], LJ_TFUNC
4409 | mov LFUNC:RB, [RA-8] 4270 | mov LFUNC:RB, [BASE+RA*8]
4410 | cmp dword [RA-4], LJ_TFUNC 4271 | jne ->vmeta_call_ra
4411 | jne ->vmeta_call 4272 | lea BASE, [BASE+RA*8+8]
4412 | jmp aword LFUNC:RB->gate 4273 | ins_call
4413 break; 4274 break;
4414 4275
4415 case BC_CALLMT: 4276 case BC_CALLMT:
@@ -4445,20 +4306,19 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4445 | 4306 |
4446 | mov LFUNC:RB, [BASE-8] 4307 | mov LFUNC:RB, [BASE-8]
4447 |3: 4308 |3:
4448 | mov RA, BASE // BASE is ignored, except when ... 4309 | mov NARGS:RD, MULTRES
4449 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? 4310 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
4450 | ja >5 4311 | ja >5
4451 |4: 4312 |4:
4452 | mov NARGS:RD, MULTRES 4313 | ins_callt
4453 | jmp aword LFUNC:RB->gate
4454 | 4314 |
4455 |5: // Tailcall to a fast function. 4315 |5: // Tailcall to a fast function.
4456 | test PC, FRAME_TYPE // Lua frame below? 4316 | test PC, FRAME_TYPE // Lua frame below?
4457 | jnz <4 4317 | jnz <4
4458 | movzx RD, PC_RA // Need to prepare BASE/KBASE. 4318 | movzx RA, PC_RA
4459 | not RDa 4319 | not RAa
4460 | lea BASE, [BASE+RD*8] 4320 | lea RA, [BASE+RA*8]
4461 | mov LFUNC:KBASE, [BASE-8] 4321 | mov LFUNC:KBASE, [RA-8] // Need to prepare KBASE.
4462 | mov KBASE, LFUNC:KBASE->pc 4322 | mov KBASE, LFUNC:KBASE->pc
4463 | mov KBASE, [KBASE+PC2PROTO(k)] 4323 | mov KBASE, [KBASE+PC2PROTO(k)]
4464 | jmp <4 4324 | jmp <4
@@ -4488,9 +4348,10 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4488 | mov [RA-8], LFUNC:RB 4348 | mov [RA-8], LFUNC:RB
4489 | mov [RA-4], RC 4349 | mov [RA-4], RC
4490 | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call. 4350 | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
4491 | mov NARGS:RC, 3 4351 | mov NARGS:RD, 2+1
4492 | jne ->vmeta_call 4352 | jne ->vmeta_call
4493 | jmp aword LFUNC:RB->gate 4353 | mov BASE, RA
4354 | ins_call
4494 break; 4355 break;
4495 4356
4496 case BC_VARG: 4357 case BC_VARG:
@@ -4799,6 +4660,150 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
4799 | ins_next 4660 | ins_next
4800 break; 4661 break;
4801 4662
4663 /* -- Function headers -------------------------------------------------- */
4664
4665 /*
4666 ** Reminder: A function may be called with func/args above L->maxstack,
4667 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
4668 ** too. This means all FUNC* ops (including fast functions) must check
4669 ** for stack overflow _before_ adding more slots!
4670 */
4671
4672 case BC_FUNCF:
4673#if LJ_HASJIT
4674 | // NYI: Disabled, until the tracer supports recursion/upcalls/leaves.
4675 | // hotcall RB
4676#endif
4677 case BC_FUNCV: /* NYI: compiled vararg functions. */
4678 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
4679 break;
4680
4681 case BC_JFUNCF:
4682#if !LJ_HASJIT
4683 break;
4684#endif
4685 case BC_IFUNCF:
4686 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4687 | mov KBASE, [PC-4+PC2PROTO(k)]
4688 | mov L:RB, SAVE_L
4689 | lea RA, [BASE+RA*8] // Top of frame.
4690 | cmp RA, L:RB->maxstack
4691 | ja ->vm_growstack_f
4692 | movzx RA, byte [PC-4+PC2PROTO(numparams)]
4693 | cmp NARGS:RD, RA // Check for missing parameters.
4694 | jbe >3
4695 |2:
4696 if (op == BC_JFUNCF) {
4697 | movzx RD, PC_RD
4698 | jmp =>BC_JLOOP
4699 } else {
4700 | ins_next
4701 }
4702 |
4703 |3: // Clear missing parameters.
4704 | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL
4705 | add NARGS:RD, 1
4706 | cmp NARGS:RD, RA
4707 | jbe <3
4708 | jmp <2
4709 break;
4710
4711 case BC_JFUNCV:
4712#if !LJ_HASJIT
4713 break;
4714#endif
4715 | int3 // NYI: compiled vararg functions
4716 break; /* NYI: compiled vararg functions. */
4717
4718 case BC_IFUNCV:
4719 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
4720 | lea RB, [NARGS:RD*8+FRAME_VARG]
4721 | lea RD, [BASE+NARGS:RD*8]
4722 | mov LFUNC:KBASE, [BASE-8]
4723 | mov [RD-4], RB // Store delta + FRAME_VARG.
4724 | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC.
4725 | mov L:RB, SAVE_L
4726 | lea RA, [RD+RA*8]
4727 | cmp RA, L:RB->maxstack
4728 | ja ->vm_growstack_v // Need to grow stack.
4729 | mov RA, BASE
4730 | mov BASE, RD
4731 | movzx RB, byte [PC-4+PC2PROTO(numparams)]
4732 | test RB, RB
4733 | jz >2
4734 |1: // Copy fixarg slots up to new frame.
4735 | add RA, 8
4736 | cmp RA, BASE
4737 | jnb >3 // Less args than parameters?
4738 | mov KBASE, [RA-8]
4739 | mov [RD], KBASE
4740 | mov KBASE, [RA-4]
4741 | mov [RD+4], KBASE
4742 | add RD, 8
4743 | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
4744 | sub RB, 1
4745 | jnz <1
4746 |2:
4747 if (op == BC_JFUNCV) {
4748 | movzx RD, PC_RD
4749 | jmp =>BC_JLOOP
4750 } else {
4751 | mov KBASE, [PC-4+PC2PROTO(k)]
4752 | ins_next
4753 }
4754 |
4755 |3: // Clear missing parameters.
4756 | mov dword [RD+4], LJ_TNIL
4757 | add RD, 8
4758 | sub RB, 1
4759 | jnz <3
4760 | jmp <2
4761 break;
4762
4763 case BC_FUNCC:
4764 case BC_FUNCCW:
4765 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
4766 | mov CFUNC:RB, [BASE-8]
4767 | mov KBASEa, CFUNC:RB->f
4768 | mov L:RB, SAVE_L
4769 | lea RD, [BASE+NARGS:RD*8-8]
4770 | mov L:RB->base, BASE
4771 | lea RA, [RD+8*LUA_MINSTACK]
4772 | cmp RA, L:RB->maxstack
4773 | mov L:RB->top, RD
4774 if (op == BC_FUNCC) {
4775 |.if X64
4776 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
4777 |.else
4778 | mov ARG1, L:RB
4779 |.endif
4780 } else {
4781 |.if X64
4782 | mov CARG2, KBASEa
4783 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
4784 |.else
4785 | mov ARG2, KBASEa
4786 | mov ARG1, L:RB
4787 |.endif
4788 }
4789 | ja ->vm_growstack_c // Need to grow stack.
4790 | set_vmstate C
4791 if (op == BC_FUNCC) {
4792 | call KBASEa // (lua_State *L)
4793 } else {
4794 | // (lua_State *L, lua_CFunction f)
4795 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
4796 }
4797 | set_vmstate INTERP
4798 | // nresults returned in eax (RD).
4799 | mov BASE, L:RB->base
4800 | lea RA, [BASE+RD*8]
4801 | neg RA
4802 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
4803 | mov PC, [BASE-4] // Fetch PC of caller.
4804 | jmp ->vm_returnc
4805 break;
4806
4802 /* ---------------------------------------------------------------------- */ 4807 /* ---------------------------------------------------------------------- */
4803 4808
4804 default: 4809 default: