diff options
Diffstat (limited to 'src/vm_arm64.dasc')
-rw-r--r-- | src/vm_arm64.dasc | 227 |
1 files changed, 214 insertions, 13 deletions
diff --git a/src/vm_arm64.dasc b/src/vm_arm64.dasc index 7a881bdd..a6227bf7 100644 --- a/src/vm_arm64.dasc +++ b/src/vm_arm64.dasc | |||
@@ -236,12 +236,17 @@ | |||
236 | |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro | 236 | |.macro mov_false, reg; movn reg, #0x8000, lsl #32; .endmacro |
237 | |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro | 237 | |.macro mov_true, reg; movn reg, #0x0001, lsl #48; .endmacro |
238 | | | 238 | | |
239 | #define GL_J(field) (GG_OFS(J) + (int)offsetof(jit_State, field)) | 239 | #define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) |
240 | | | 240 | | |
241 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | 241 | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) |
242 | | | 242 | | |
243 | |.macro hotcheck, delta | 243 | |.macro hotcheck, delta |
244 | | NYI | 244 | | lsr CARG1, PC, #1 |
245 | | and CARG1, CARG1, #126 | ||
246 | | add CARG1, CARG1, #GG_G2DISP+GG_DISP2HOT | ||
247 | | ldrh CARG2w, [GL, CARG1] | ||
248 | | subs CARG2, CARG2, #delta | ||
249 | | strh CARG2w, [GL, CARG1] | ||
245 | |.endmacro | 250 | |.endmacro |
246 | | | 251 | | |
247 | |.macro hotloop | 252 | |.macro hotloop |
@@ -869,7 +874,7 @@ static void build_subroutines(BuildCtx *ctx) | |||
869 | | bl extern lj_meta_for // (lua_State *L, TValue *base) | 874 | | bl extern lj_meta_for // (lua_State *L, TValue *base) |
870 | | ldr INSw, [PC, #-4] | 875 | | ldr INSw, [PC, #-4] |
871 | |.if JIT | 876 | |.if JIT |
872 | | uxtb TMP0, INS | 877 | | uxtb TMP0w, INSw |
873 | |.endif | 878 | |.endif |
874 | | decode_RA RA, INS | 879 | | decode_RA RA, INS |
875 | | decode_RD RC, INS | 880 | | decode_RD RC, INS |
@@ -1732,7 +1737,20 @@ static void build_subroutines(BuildCtx *ctx) | |||
1732 | |//----------------------------------------------------------------------- | 1737 | |//----------------------------------------------------------------------- |
1733 | | | 1738 | | |
1734 | |->vm_record: // Dispatch target for recording phase. | 1739 | |->vm_record: // Dispatch target for recording phase. |
1735 | | NYI | 1740 | |.if JIT |
1741 | | ldrb CARG1w, GL->hookmask | ||
1742 | | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. | ||
1743 | | bne >5 | ||
1744 | | // Decrement the hookcount for consistency, but always do the call. | ||
1745 | | ldr CARG2w, GL->hookcount | ||
1746 | | tst CARG1, #HOOK_ACTIVE | ||
1747 | | bne >1 | ||
1748 | | sub CARG2w, CARG2w, #1 | ||
1749 | | tst CARG1, #LUA_MASKLINE|LUA_MASKCOUNT | ||
1750 | | beq >1 | ||
1751 | | str CARG2w, GL->hookcount | ||
1752 | | b >1 | ||
1753 | |.endif | ||
1736 | | | 1754 | | |
1737 | |->vm_rethook: // Dispatch target for return hooks. | 1755 | |->vm_rethook: // Dispatch target for return hooks. |
1738 | | ldrb TMP2w, GL->hookmask | 1756 | | ldrb TMP2w, GL->hookmask |
@@ -1774,7 +1792,21 @@ static void build_subroutines(BuildCtx *ctx) | |||
1774 | | b <4 | 1792 | | b <4 |
1775 | | | 1793 | | |
1776 | |->vm_hotloop: // Hot loop counter underflow. | 1794 | |->vm_hotloop: // Hot loop counter underflow. |
1777 | | NYI | 1795 | |.if JIT |
1796 | | ldr LFUNC:CARG3, [BASE, FRAME_FUNC] // Same as curr_topL(L). | ||
1797 | | add CARG1, GL, #GG_G2DISP+GG_DISP2J | ||
1798 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1799 | | str PC, SAVE_PC | ||
1800 | | ldr CARG3, LFUNC:CARG3->pc | ||
1801 | | mov CARG2, PC | ||
1802 | | str L, [GL, #GL_J(L)] | ||
1803 | | ldrb CARG3w, [CARG3, #PC2PROTO(framesize)] | ||
1804 | | str BASE, L->base | ||
1805 | | add CARG3, BASE, CARG3, lsl #3 | ||
1806 | | str CARG3, L->top | ||
1807 | | bl extern lj_trace_hot // (jit_State *J, const BCIns *pc) | ||
1808 | | b <3 | ||
1809 | |.endif | ||
1778 | | | 1810 | | |
1779 | |->vm_callhook: // Dispatch target for call hooks. | 1811 | |->vm_callhook: // Dispatch target for call hooks. |
1780 | | mov CARG2, PC | 1812 | | mov CARG2, PC |
@@ -1804,7 +1836,54 @@ static void build_subroutines(BuildCtx *ctx) | |||
1804 | | br CRET1 | 1836 | | br CRET1 |
1805 | | | 1837 | | |
1806 | |->cont_stitch: // Trace stitching. | 1838 | |->cont_stitch: // Trace stitching. |
1807 | | NYI | 1839 | |.if JIT |
1840 | | // RA = resultptr, CARG4 = meta base | ||
1841 | | ldr RB, SAVE_MULTRES | ||
1842 | | ldr INSw, [PC, #-4] | ||
1843 | | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. | ||
1844 | | subs RB, RB, #8 | ||
1845 | | decode_RA RC, INS // Call base. | ||
1846 | | and CARG3, CARG3, #LJ_GCVMASK | ||
1847 | | beq >2 | ||
1848 | |1: // Move results down. | ||
1849 | | ldr CARG1, [RA] | ||
1850 | | add RA, RA, #8 | ||
1851 | | subs RB, RB, #8 | ||
1852 | | str CARG1, [BASE, RC, lsl #3] | ||
1853 | | add RC, RC, #1 | ||
1854 | | bne <1 | ||
1855 | |2: | ||
1856 | | decode_RA RA, INS | ||
1857 | | decode_RB RB, INS | ||
1858 | | add RA, RA, RB | ||
1859 | |3: | ||
1860 | | cmp RA, RC | ||
1861 | | bhi >9 // More results wanted? | ||
1862 | | | ||
1863 | | ldrh RAw, TRACE:CARG3->traceno | ||
1864 | | ldrh RCw, TRACE:CARG3->link | ||
1865 | | cmp RCw, RAw | ||
1866 | | beq ->cont_nop // Blacklisted. | ||
1867 | | cmp RCw, #0 | ||
1868 | | bne =>BC_JLOOP // Jump to stitched trace. | ||
1869 | | | ||
1870 | | // Stitch a new trace to the previous trace. | ||
1871 | | mov CARG1, #GL_J(exitno) | ||
1872 | | str RA, [GL, CARG1] | ||
1873 | | mov CARG1, #GL_J(L) | ||
1874 | | str L, [GL, CARG1] | ||
1875 | | str BASE, L->base | ||
1876 | | add CARG1, GL, #GG_G2J | ||
1877 | | mov CARG2, PC | ||
1878 | | bl extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) | ||
1879 | | ldr BASE, L->base | ||
1880 | | b ->cont_nop | ||
1881 | | | ||
1882 | |9: // Fill up results with nil. | ||
1883 | | str TISNIL, [BASE, RC, lsl #3] | ||
1884 | | add RC, RC, #1 | ||
1885 | | b <3 | ||
1886 | |.endif | ||
1808 | | | 1887 | | |
1809 | |->vm_profhook: // Dispatch target for profiler hook. | 1888 | |->vm_profhook: // Dispatch target for profiler hook. |
1810 | #if LJ_HASPROFILE | 1889 | #if LJ_HASPROFILE |
@@ -1822,10 +1901,120 @@ static void build_subroutines(BuildCtx *ctx) | |||
1822 | |//-- Trace exit handler ------------------------------------------------- | 1901 | |//-- Trace exit handler ------------------------------------------------- |
1823 | |//----------------------------------------------------------------------- | 1902 | |//----------------------------------------------------------------------- |
1824 | | | 1903 | | |
1904 | |.macro savex_, a, b | ||
1905 | | stp d..a, d..b, [sp, #a*8] | ||
1906 | | stp x..a, x..b, [sp, #32*8+a*8] | ||
1907 | |.endmacro | ||
1908 | | | ||
1825 | |->vm_exit_handler: | 1909 | |->vm_exit_handler: |
1826 | | NYI | 1910 | |.if JIT |
1911 | | sub sp, sp, #(64*8) | ||
1912 | | savex_, 0, 1 | ||
1913 | | savex_, 2, 3 | ||
1914 | | savex_, 4, 5 | ||
1915 | | savex_, 6, 7 | ||
1916 | | savex_, 8, 9 | ||
1917 | | savex_, 10, 11 | ||
1918 | | savex_, 12, 13 | ||
1919 | | savex_, 14, 15 | ||
1920 | | savex_, 16, 17 | ||
1921 | | savex_, 18, 19 | ||
1922 | | savex_, 20, 21 | ||
1923 | | savex_, 22, 23 | ||
1924 | | savex_, 24, 25 | ||
1925 | | savex_, 26, 27 | ||
1926 | | savex_, 28, 29 | ||
1927 | | stp d30, d31, [sp, #30*8] | ||
1928 | | ldr CARG1, [sp, #64*8] // Load original value of lr. | ||
1929 | | add CARG3, sp, #64*8 // Recompute original value of sp. | ||
1930 | | mv_vmstate CARG4, EXIT | ||
1931 | | ldr CARG2w, [CARG1, #-4]! // Get exit instruction. | ||
1932 | | stp CARG1, CARG3, [sp, #62*8] // Store exit pc/sp in RID_LR/RID_SP. | ||
1933 | | lsl CARG2, CARG2, #38 | ||
1934 | | add CARG1, CARG1, CARG2, asr #36 | ||
1935 | | ldr CARG2w, [lr] // Load exit stub group offset. | ||
1936 | | sub CARG1, CARG1, lr | ||
1937 | | sub CARG1, CARG1, #4 | ||
1938 | | ldr L, GL->cur_L | ||
1939 | | add CARG1, CARG2, CARG1, lsr #2 // Compute exit number. | ||
1940 | | ldr BASE, GL->jit_base | ||
1941 | | st_vmstate CARG4 | ||
1942 | | str CARG1w, [GL, #GL_J(exitno)] | ||
1943 | | str BASE, L->base | ||
1944 | | str L, [GL, #GL_J(L)] | ||
1945 | | str xzr, GL->jit_base | ||
1946 | | add CARG1, GL, #GG_G2J | ||
1947 | | mov CARG2, sp | ||
1948 | | bl extern lj_trace_exit // (jit_State *J, ExitState *ex) | ||
1949 | | // Returns MULTRES (unscaled) or negated error code. | ||
1950 | | ldr CARG2, L->cframe | ||
1951 | | ldr BASE, L->base | ||
1952 | | and sp, CARG2, #CFRAME_RAWMASK | ||
1953 | | ldr PC, SAVE_PC // Get SAVE_PC. | ||
1954 | | str L, SAVE_L // Set SAVE_L (on-trace resume/yield). | ||
1955 | | b >1 | ||
1956 | |.endif | ||
1957 | | | ||
1827 | |->vm_exit_interp: | 1958 | |->vm_exit_interp: |
1828 | | NYI | 1959 | | // CARG1 = MULTRES or negated error code, BASE, PC and GL set. |
1960 | |.if JIT | ||
1961 | | ldr L, SAVE_L | ||
1962 | |1: | ||
1963 | | cmp CARG1w, #0 | ||
1964 | | blt >9 // Check for error from exit. | ||
1965 | | lsl RC, CARG1, #3 | ||
1966 | | ldr LFUNC:CARG2, [BASE, FRAME_FUNC] | ||
1967 | | movz TISNUM, #(LJ_TISNUM>>1)&0xffff, lsl #48 | ||
1968 | | movz TISNUMhi, #(LJ_TISNUM>>1)&0xffff, lsl #16 | ||
1969 | | movn TISNIL, #0 | ||
1970 | | and LFUNC:CARG2, CARG2, #LJ_GCVMASK | ||
1971 | | str RC, SAVE_MULTRES | ||
1972 | | str BASE, L->base | ||
1973 | | ldr CARG2, LFUNC:CARG2->pc | ||
1974 | | str xzr, GL->jit_base | ||
1975 | | mv_vmstate CARG4, INTERP | ||
1976 | | ldr KBASE, [CARG2, #PC2PROTO(k)] | ||
1977 | | // Modified copy of ins_next which handles function header dispatch, too. | ||
1978 | | ldrb RBw, [PC] | ||
1979 | | ldr INSw, [PC], #4 | ||
1980 | | st_vmstate CARG4 | ||
1981 | | cmp RBw, #BC_FUNCC+2 // Fast function? | ||
1982 | | add TMP1, GL, INS, uxtb #3 | ||
1983 | | bhs >4 | ||
1984 | |2: | ||
1985 | | cmp RBw, #BC_FUNCF // Function header? | ||
1986 | | add TMP0, GL, RB, uxtb #3 | ||
1987 | | ldr RB, [TMP0, #GG_G2DISP] | ||
1988 | | decode_RA RA, INS | ||
1989 | | lsr TMP0, INS, #16 | ||
1990 | | csel RC, TMP0, RC, lo | ||
1991 | | blo >5 | ||
1992 | | ldr CARG3, [BASE, FRAME_FUNC] | ||
1993 | | sub RC, RC, #8 | ||
1994 | | add RA, BASE, RA, lsl #3 // Yes: RA = BASE+framesize*8, RC = nargs*8 | ||
1995 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
1996 | |5: | ||
1997 | | br RB | ||
1998 | | | ||
1999 | |4: // Check frame below fast function. | ||
2000 | | ldr CARG1, [BASE, FRAME_PC] | ||
2001 | | ands CARG2, CARG1, #FRAME_TYPE | ||
2002 | | bne <2 // Trace stitching continuation? | ||
2003 | | // Otherwise set KBASE for Lua function below fast function. | ||
2004 | | ldr CARG3, [CARG1, #-4] | ||
2005 | | decode_RA CARG1, CARG3 | ||
2006 | | sub CARG2, BASE, CARG1, lsl #3 | ||
2007 | | ldr LFUNC:CARG3, [CARG2, #-32] | ||
2008 | | and LFUNC:CARG3, CARG3, #LJ_GCVMASK | ||
2009 | | ldr CARG3, LFUNC:CARG3->pc | ||
2010 | | ldr KBASE, [CARG3, #PC2PROTO(k)] | ||
2011 | | b <2 | ||
2012 | | | ||
2013 | |9: // Rethrow error from the right C frame. | ||
2014 | | neg CARG2, CARG1 | ||
2015 | | mov CARG1, L | ||
2016 | | bl extern lj_err_throw // (lua_State *L, int errcode) | ||
2017 | |.endif | ||
1829 | | | 2018 | | |
1830 | |//----------------------------------------------------------------------- | 2019 | |//----------------------------------------------------------------------- |
1831 | |//-- Math helper functions ---------------------------------------------- | 2020 | |//-- Math helper functions ---------------------------------------------- |
@@ -3387,6 +3576,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3387 | if (op == BC_FORI) { | 3576 | if (op == BC_FORI) { |
3388 | | csel PC, RC, PC, gt | 3577 | | csel PC, RC, PC, gt |
3389 | } else if (op == BC_JFORI) { | 3578 | } else if (op == BC_JFORI) { |
3579 | | mov PC, RC | ||
3390 | | ldrh RCw, [RC, #-2] | 3580 | | ldrh RCw, [RC, #-2] |
3391 | } else if (op == BC_IFORL) { | 3581 | } else if (op == BC_IFORL) { |
3392 | | csel PC, RC, PC, le | 3582 | | csel PC, RC, PC, le |
@@ -3488,7 +3678,16 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3488 | 3678 | ||
3489 | case BC_JLOOP: | 3679 | case BC_JLOOP: |
3490 | |.if JIT | 3680 | |.if JIT |
3491 | | NYI | 3681 | | // RA = base (ignored), RC = traceno |
3682 | | ldr CARG1, [GL, #GL_J(trace)] | ||
3683 | | mov CARG2, #0 // Traces on ARM64 don't store the trace #, so use 0. | ||
3684 | | ldr TRACE:RC, [CARG1, RC, lsl #3] | ||
3685 | | st_vmstate CARG2 | ||
3686 | | ldr RA, TRACE:RC->mcode | ||
3687 | | str BASE, GL->jit_base | ||
3688 | | str L, GL->tmpbuf.L | ||
3689 | | sub sp, sp, #16 // See SPS_FIXED. Avoids sp adjust in every root trace. | ||
3690 | | br RA | ||
3492 | |.endif | 3691 | |.endif |
3493 | break; | 3692 | break; |
3494 | 3693 | ||
@@ -3546,10 +3745,12 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |||
3546 | case BC_IFUNCV: | 3745 | case BC_IFUNCV: |
3547 | | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 | 3746 | | // BASE = new base, RA = BASE+framesize*8, CARG3 = LFUNC, RC = nargs*8 |
3548 | | ldr CARG1, L->maxstack | 3747 | | ldr CARG1, L->maxstack |
3748 | | movn TMP0, #~LJ_TFUNC | ||
3549 | | add TMP2, BASE, RC | 3749 | | add TMP2, BASE, RC |
3750 | | add LFUNC:CARG3, CARG3, TMP0, lsl #47 | ||
3550 | | add RA, RA, RC | 3751 | | add RA, RA, RC |
3551 | | add TMP0, RC, #16+FRAME_VARG | 3752 | | add TMP0, RC, #16+FRAME_VARG |
3552 | | str LFUNC:CARG3, [TMP2], #8 // Store (untagged) copy of LFUNC. | 3753 | | str LFUNC:CARG3, [TMP2], #8 // Store (tagged) copy of LFUNC. |
3553 | | ldr KBASE, [PC, #-4+PC2PROTO(k)] | 3754 | | ldr KBASE, [PC, #-4+PC2PROTO(k)] |
3554 | | cmp RA, CARG1 | 3755 | | cmp RA, CARG1 |
3555 | | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. | 3756 | | str TMP0, [TMP2], #8 // Store delta + FRAME_VARG. |
@@ -3736,8 +3937,8 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
3736 | "\t.uleb128 0x1\n" | 3937 | "\t.uleb128 0x1\n" |
3737 | "\t.sleb128 -8\n" | 3938 | "\t.sleb128 -8\n" |
3738 | "\t.byte 30\n" /* Return address is in lr. */ | 3939 | "\t.byte 30\n" /* Return address is in lr. */ |
3739 | "\t.uleb128 1\n" /* augmentation length */ | 3940 | "\t.uleb128 1\n" /* augmentation length */ |
3740 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ | 3941 | "\t.byte 0x1b\n" /* pcrel|sdata4 */ |
3741 | "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ | 3942 | "\t.byte 0xc\n\t.uleb128 31\n\t.uleb128 0\n" /* def_cfa sp */ |
3742 | "\t.align 3\n" | 3943 | "\t.align 3\n" |
3743 | ".LECIE2:\n\n"); | 3944 | ".LECIE2:\n\n"); |
@@ -3748,7 +3949,7 @@ static void emit_asm_debug(BuildCtx *ctx) | |||
3748 | "\t.long .LASFDE3-.Lframe2\n" | 3949 | "\t.long .LASFDE3-.Lframe2\n" |
3749 | "\t.long lj_vm_ffi_call-.\n" | 3950 | "\t.long lj_vm_ffi_call-.\n" |
3750 | "\t.long %d\n" | 3951 | "\t.long %d\n" |
3751 | "\t.uleb128 0\n" /* augmentation length */ | 3952 | "\t.uleb128 0\n" /* augmentation length */ |
3752 | "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ | 3953 | "\t.byte 0xe\n\t.uleb128 32\n" /* def_cfa_offset */ |
3753 | "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ | 3954 | "\t.byte 0x9d\n\t.uleb128 4\n" /* offset fp */ |
3754 | "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ | 3955 | "\t.byte 0x9e\n\t.uleb128 3\n" /* offset lr */ |