diff options
| author | Mike Pall <mike> | 2016-04-18 11:16:13 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2016-04-18 11:16:13 +0200 |
| commit | 73680a5fc760cb39760e4bbfce1166ce75de237f (patch) | |
| tree | c5e46cdc9144566e5b08d9fb8625165d73e68007 /src | |
| parent | e5b5e079c364bb429a85f6c740c478e2dd820381 (diff) | |
| download | luajit-73680a5fc760cb39760e4bbfce1166ce75de237f.tar.gz luajit-73680a5fc760cb39760e4bbfce1166ce75de237f.tar.bz2 luajit-73680a5fc760cb39760e4bbfce1166ce75de237f.zip | |
x86/x64: Search for exit jumps with instruction length decoder.
Contributed by Peter Cawley.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lj_asm_x86.h | 113 |
1 files changed, 104 insertions, 9 deletions
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h index ffd59d33..39a792c2 100644 --- a/src/lj_asm_x86.h +++ b/src/lj_asm_x86.h | |||
| @@ -2776,6 +2776,106 @@ static void asm_setup_target(ASMState *as) | |||
| 2776 | 2776 | ||
| 2777 | /* -- Trace patching ------------------------------------------------------ */ | 2777 | /* -- Trace patching ------------------------------------------------------ */ |
| 2778 | 2778 | ||
| 2779 | static const uint8_t map_op1[256] = { | ||
| 2780 | 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20, | ||
| 2781 | 0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51, | ||
| 2782 | 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, | ||
| 2783 | 0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51, | ||
| 2784 | #if LJ_64 | ||
| 2785 | 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14, | ||
| 2786 | #else | ||
| 2787 | 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, | ||
| 2788 | #endif | ||
| 2789 | 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51, | ||
| 2790 | 0x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51, | ||
| 2791 | 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, | ||
| 2792 | 0x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, | ||
| 2793 | 0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51, | ||
| 2794 | #if LJ_64 | ||
| 2795 | 0x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, | ||
| 2796 | #else | ||
| 2797 | 0x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51, | ||
| 2798 | #endif | ||
| 2799 | 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05, | ||
| 2800 | 0x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51, | ||
| 2801 | 0x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92, | ||
| 2802 | 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51, | ||
| 2803 | 0x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92 | ||
| 2804 | }; | ||
| 2805 | |||
| 2806 | static const uint8_t map_op2[256] = { | ||
| 2807 | 0x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94, | ||
| 2808 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
| 2809 | 0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
| 2810 | 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51, | ||
| 2811 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
| 2812 | 0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
| 2813 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
| 2814 | 0x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
| 2815 | 0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46, | ||
| 2816 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
| 2817 | 0x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93, | ||
| 2818 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93, | ||
| 2819 | 0x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52, | ||
| 2820 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
| 2821 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93, | ||
| 2822 | 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52 | ||
| 2823 | }; | ||
| 2824 | |||
| 2825 | static uint32_t asm_x86_inslen(const uint8_t* p) | ||
| 2826 | { | ||
| 2827 | uint32_t result = 0; | ||
| 2828 | uint32_t prefixes = 0; | ||
| 2829 | uint32_t x = map_op1[*p]; | ||
| 2830 | for (;;) { | ||
| 2831 | switch (x >> 4) { | ||
| 2832 | case 0: return result + x + (prefixes & 4); | ||
| 2833 | case 1: prefixes |= x; x = map_op1[*++p]; result++; break; | ||
| 2834 | case 2: x = map_op2[*++p]; break; | ||
| 2835 | case 3: p++; goto mrm; | ||
| 2836 | case 4: result -= (prefixes & 2); /* fallthrough */ | ||
| 2837 | case 5: return result + (x & 15); | ||
| 2838 | case 6: /* Group 3. */ | ||
| 2839 | if (p[1] & 0x38) return result + 2; | ||
| 2840 | if ((prefixes & 2) && (x == 0x66)) return result + 4; | ||
| 2841 | return result + (x & 15); | ||
| 2842 | case 7: /* VEX c4/c5. */ | ||
| 2843 | if (LJ_32 && p[1] < 0xc0) { | ||
| 2844 | x = 2; | ||
| 2845 | goto mrm; | ||
| 2846 | } | ||
| 2847 | if (x == 0x70) { | ||
| 2848 | x = *++p & 0x1f; | ||
| 2849 | result++; | ||
| 2850 | if (x >= 2) { | ||
| 2851 | p += 2; | ||
| 2852 | result += 2; | ||
| 2853 | goto mrm; | ||
| 2854 | } | ||
| 2855 | } | ||
| 2856 | p++; | ||
| 2857 | result++; | ||
| 2858 | x = map_op2[*++p]; | ||
| 2859 | break; | ||
| 2860 | case 8: result -= (prefixes & 2); /* fallthrough */ | ||
| 2861 | case 9: mrm: /* ModR/M and possibly SIB. */ | ||
| 2862 | result += (x & 15); | ||
| 2863 | x = *++p; | ||
| 2864 | switch (x >> 6) { | ||
| 2865 | case 0: if ((x & 7) == 5) return result + 4; break; | ||
| 2866 | case 1: result++; break; | ||
| 2867 | case 2: result += 4; break; | ||
| 2868 | case 3: return result; | ||
| 2869 | } | ||
| 2870 | if ((x & 7) == 4) { | ||
| 2871 | result++; | ||
| 2872 | if (x < 0x40 && (p[1] & 7) == 5) result += 4; | ||
| 2873 | } | ||
| 2874 | return result; | ||
| 2875 | } | ||
| 2876 | } | ||
| 2877 | } | ||
| 2878 | |||
| 2779 | /* Patch exit jumps of existing machine code to a new target. */ | 2879 | /* Patch exit jumps of existing machine code to a new target. */ |
| 2780 | void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | 2880 | void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) |
| 2781 | { | 2881 | { |
| @@ -2788,18 +2888,13 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |||
| 2788 | if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) | 2888 | if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) |
| 2789 | *(int32_t *)(p+len-4) = jmprel(p+len, target); | 2889 | *(int32_t *)(p+len-4) = jmprel(p+len, target); |
| 2790 | /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ | 2890 | /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ |
| 2791 | for (; p < pe; p++) | 2891 | for (; p < pe; p += asm_x86_inslen(p)) |
| 2792 | if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) { | 2892 | if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) |
| 2793 | p += LJ_64 ? 11 : 10; | ||
| 2794 | break; | 2893 | break; |
| 2795 | } | ||
| 2796 | lua_assert(p < pe); | 2894 | lua_assert(p < pe); |
| 2797 | for (; p < pe; p++) { | 2895 | for (; p < pe; p += asm_x86_inslen(p)) |
| 2798 | if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) { | 2896 | if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) |
| 2799 | *(int32_t *)(p+2) = jmprel(p+6, target); | 2897 | *(int32_t *)(p+2) = jmprel(p+6, target); |
| 2800 | p += 5; | ||
| 2801 | } | ||
| 2802 | } | ||
| 2803 | lj_mcode_sync(T->mcode, T->mcode + T->szmcode); | 2898 | lj_mcode_sync(T->mcode, T->mcode + T->szmcode); |
| 2804 | lj_mcode_patch(J, mcarea, 1); | 2899 | lj_mcode_patch(J, mcarea, 1); |
| 2805 | } | 2900 | } |
