diff options
author | Mike Pall <mike> | 2016-04-18 11:16:13 +0200 |
---|---|---|
committer | Mike Pall <mike> | 2016-04-18 11:16:13 +0200 |
commit | 73680a5fc760cb39760e4bbfce1166ce75de237f (patch) | |
tree | c5e46cdc9144566e5b08d9fb8625165d73e68007 | |
parent | e5b5e079c364bb429a85f6c740c478e2dd820381 (diff) | |
download | luajit-73680a5fc760cb39760e4bbfce1166ce75de237f.tar.gz luajit-73680a5fc760cb39760e4bbfce1166ce75de237f.tar.bz2 luajit-73680a5fc760cb39760e4bbfce1166ce75de237f.zip |
x86/x64: Search for exit jumps with instruction length decoder.
Contributed by Peter Cawley.
-rw-r--r-- | src/lj_asm_x86.h | 113 |
1 files changed, 104 insertions, 9 deletions
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index ffd59d33..39a792c2 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
@@ -2776,6 +2776,106 @@ static void asm_setup_target(ASMState *as) | |||
2776 | 2776 | ||
2777 | /* -- Trace patching ------------------------------------------------------ */ | 2777 | /* -- Trace patching ------------------------------------------------------ */ |
2778 | 2778 | ||
2779 | static const uint8_t map_op1[256] = { | ||
2780 | 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20, | ||
2781 | 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51, | ||
2782 | 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, | ||
2783 | 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, | ||
2784 | #if LJ_64 | ||
2785 | 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14, | ||
2786 | #else | ||
2787 | 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, | ||
2788 | #endif | ||
2789 | 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, | ||
2790 | 0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51, | ||
2791 | 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, | ||
2792 | 0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, | ||
2793 | 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51, | ||
2794 | #if LJ_64 | ||
2795 | 0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, | ||
2796 | #else | ||
2797 | 0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, | ||
2798 | #endif | ||
2799 | 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05, | ||
2800 | 0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51, | ||
2801 | 0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, | ||
2802 | 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51, | ||
2803 | 0x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92 | ||
2804 | }; | ||
2805 | |||
2806 | static const uint8_t map_op2[256] = { | ||
2807 | 0x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94, | ||
2808 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
2809 | 0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
2810 | 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51, | ||
2811 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
2812 | 0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
2813 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
2814 | 0x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
2815 | 0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46, | ||
2816 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
2817 | 0x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93, | ||
2818 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93, | ||
2819 | 0x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, | ||
2820 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
2821 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
2822 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52 | ||
2823 | }; | ||
2824 | |||
2825 | static uint32_t asm_x86_inslen(const uint8_t* p) | ||
2826 | { | ||
2827 | uint32_t result = 0; | ||
2828 | uint32_t prefixes = 0; | ||
2829 | uint32_t x = map_op1[*p]; | ||
2830 | for (;;) { | ||
2831 | switch (x >> 4) { | ||
2832 | case 0: return result + x + (prefixes & 4); | ||
2833 | case 1: prefixes |= x; x = map_op1[*++p]; result++; break; | ||
2834 | case 2: x = map_op2[*++p]; break; | ||
2835 | case 3: p++; goto mrm; | ||
2836 | case 4: result -= (prefixes & 2); /* fallthrough */ | ||
2837 | case 5: return result + (x & 15); | ||
2838 | case 6: /* Group 3. */ | ||
2839 | if (p[1] & 0x38) return result + 2; | ||
2840 | if ((prefixes & 2) && (x == 0x66)) return result + 4; | ||
2841 | return result + (x & 15); | ||
2842 | case 7: /* VEX c4/c5. */ | ||
2843 | if (LJ_32 && p[1] < 0xc0) { | ||
2844 | x = 2; | ||
2845 | goto mrm; | ||
2846 | } | ||
2847 | if (x == 0x70) { | ||
2848 | x = *++p & 0x1f; | ||
2849 | result++; | ||
2850 | if (x >= 2) { | ||
2851 | p += 2; | ||
2852 | result += 2; | ||
2853 | goto mrm; | ||
2854 | } | ||
2855 | } | ||
2856 | p++; | ||
2857 | result++; | ||
2858 | x = map_op2[*++p]; | ||
2859 | break; | ||
2860 | case 8: result -= (prefixes & 2); /* fallthrough */ | ||
2861 | case 9: mrm: /* ModR/M and possibly SIB. */ | ||
2862 | result += (x & 15); | ||
2863 | x = *++p; | ||
2864 | switch (x >> 6) { | ||
2865 | case 0: if ((x & 7) == 5) return result + 4; break; | ||
2866 | case 1: result++; break; | ||
2867 | case 2: result += 4; break; | ||
2868 | case 3: return result; | ||
2869 | } | ||
2870 | if ((x & 7) == 4) { | ||
2871 | result++; | ||
2872 | if (x < 0x40 && (p[1] & 7) == 5) result += 4; | ||
2873 | } | ||
2874 | return result; | ||
2875 | } | ||
2876 | } | ||
2877 | } | ||
2878 | |||
2779 | /* Patch exit jumps of existing machine code to a new target. */ | 2879 | /* Patch exit jumps of existing machine code to a new target. */ |
2780 | void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | 2880 | void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) |
2781 | { | 2881 | { |
@@ -2788,18 +2888,13 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
2788 | if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) | 2888 | if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) |
2789 | *(int32_t *)(p+len-4) = jmprel(p+len, target); | 2889 | *(int32_t *)(p+len-4) = jmprel(p+len, target); |
2790 | /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ | 2890 | /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ |
2791 | for (; p < pe; p++) | 2891 | for (; p < pe; p += asm_x86_inslen(p)) |
2792 | if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) { | 2892 | if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) |
2793 | p += LJ_64 ? 11 : 10; | ||
2794 | break; | 2893 | break; |
2795 | } | ||
2796 | lua_assert(p < pe); | 2894 | lua_assert(p < pe); |
2797 | for (; p < pe; p++) { | 2895 | for (; p < pe; p += asm_x86_inslen(p)) |
2798 | if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) { | 2896 | if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) |
2799 | *(int32_t *)(p+2) = jmprel(p+6, target); | 2897 | *(int32_t *)(p+2) = jmprel(p+6, target); |
2800 | p += 5; | ||
2801 | } | ||
2802 | } | ||
2803 | lj_mcode_sync(T->mcode, T->mcode + T->szmcode); | 2898 | lj_mcode_sync(T->mcode, T->mcode + T->szmcode); |
2804 | lj_mcode_patch(J, mcarea, 1); | 2899 | lj_mcode_patch(J, mcarea, 1); |
2805 | } | 2900 | } |