summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2016-04-18 11:16:13 +0200
committerMike Pall <mike>2016-04-18 11:16:13 +0200
commit73680a5fc760cb39760e4bbfce1166ce75de237f (patch)
treec5e46cdc9144566e5b08d9fb8625165d73e68007
parente5b5e079c364bb429a85f6c740c478e2dd820381 (diff)
downloadluajit-73680a5fc760cb39760e4bbfce1166ce75de237f.tar.gz
luajit-73680a5fc760cb39760e4bbfce1166ce75de237f.tar.bz2
luajit-73680a5fc760cb39760e4bbfce1166ce75de237f.zip
x86/x64: Search for exit jumps with instruction length decoder.
Contributed by Peter Cawley.
-rw-r--r--src/lj_asm_x86.h113
1 files changed, 104 insertions, 9 deletions
diff --git a/src/lj_asm_x86.h b/src/lj_asm_x86.h
index ffd59d33..39a792c2 100644
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -2776,6 +2776,106 @@ static void asm_setup_target(ASMState *as)
2776 2776
2777/* -- Trace patching ------------------------------------------------------ */ 2777/* -- Trace patching ------------------------------------------------------ */
2778 2778
2779static const uint8_t map_op1[256] = {
27800x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x20,
27810x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x51,0x51,
27820x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,
27830x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,0x92,0x92,0x92,0x92,0x52,0x45,0x10,0x51,
2784#if LJ_64
27850x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x14,0x14,0x14,0x14,0x14,0x14,0x14,0x14,
2786#else
27870x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,
2788#endif
27890x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,
27900x51,0x51,0x92,0x92,0x10,0x10,0x12,0x11,0x45,0x86,0x52,0x93,0x51,0x51,0x51,0x51,
27910x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
27920x93,0x86,0x93,0x93,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,
27930x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x51,0x47,0x51,0x51,0x51,0x51,0x51,
2794#if LJ_64
27950x59,0x59,0x59,0x59,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51,
2796#else
27970x55,0x55,0x55,0x55,0x51,0x51,0x51,0x51,0x52,0x45,0x51,0x51,0x51,0x51,0x51,0x51,
2798#endif
27990x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x05,0x05,0x05,0x05,0x05,0x05,0x05,0x05,
28000x93,0x93,0x53,0x51,0x70,0x71,0x93,0x86,0x54,0x51,0x53,0x51,0x51,0x52,0x51,0x51,
28010x92,0x92,0x92,0x92,0x52,0x52,0x51,0x51,0x92,0x92,0x92,0x92,0x92,0x92,0x92,0x92,
28020x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x45,0x45,0x47,0x52,0x51,0x51,0x51,0x51,
28030x10,0x51,0x10,0x10,0x51,0x51,0x63,0x66,0x51,0x51,0x51,0x51,0x51,0x51,0x92,0x92
2804};
2805
2806static const uint8_t map_op2[256] = {
28070x93,0x93,0x93,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x51,0x52,0x51,0x93,0x52,0x94,
28080x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
28090x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
28100x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x34,0x51,0x35,0x51,0x51,0x51,0x51,0x51,
28110x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
28120x53,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
28130x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
28140x94,0x54,0x54,0x54,0x93,0x93,0x93,0x52,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
28150x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,
28160x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
28170x52,0x52,0x52,0x93,0x94,0x93,0x51,0x51,0x52,0x52,0x52,0x93,0x94,0x93,0x93,0x93,
28180x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x94,0x93,0x93,0x93,0x93,0x93,
28190x93,0x93,0x94,0x93,0x94,0x94,0x94,0x93,0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
28200x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
28210x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
28220x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x52
2823};
2824
2825static uint32_t asm_x86_inslen(const uint8_t* p)
2826{
2827 uint32_t result = 0;
2828 uint32_t prefixes = 0;
2829 uint32_t x = map_op1[*p];
2830 for (;;) {
2831 switch (x >> 4) {
2832 case 0: return result + x + (prefixes & 4);
2833 case 1: prefixes |= x; x = map_op1[*++p]; result++; break;
2834 case 2: x = map_op2[*++p]; break;
2835 case 3: p++; goto mrm;
2836 case 4: result -= (prefixes & 2); /* fallthrough */
2837 case 5: return result + (x & 15);
2838 case 6: /* Group 3. */
2839 if (p[1] & 0x38) return result + 2;
2840 if ((prefixes & 2) && (x == 0x66)) return result + 4;
2841 return result + (x & 15);
2842 case 7: /* VEX c4/c5. */
2843 if (LJ_32 && p[1] < 0xc0) {
2844 x = 2;
2845 goto mrm;
2846 }
2847 if (x == 0x70) {
2848 x = *++p & 0x1f;
2849 result++;
2850 if (x >= 2) {
2851 p += 2;
2852 result += 2;
2853 goto mrm;
2854 }
2855 }
2856 p++;
2857 result++;
2858 x = map_op2[*++p];
2859 break;
2860 case 8: result -= (prefixes & 2); /* fallthrough */
2861 case 9: mrm: /* ModR/M and possibly SIB. */
2862 result += (x & 15);
2863 x = *++p;
2864 switch (x >> 6) {
2865 case 0: if ((x & 7) == 5) return result + 4; break;
2866 case 1: result++; break;
2867 case 2: result += 4; break;
2868 case 3: return result;
2869 }
2870 if ((x & 7) == 4) {
2871 result++;
2872 if (x < 0x40 && (p[1] & 7) == 5) result += 4;
2873 }
2874 return result;
2875 }
2876 }
2877}
2878
2779/* Patch exit jumps of existing machine code to a new target. */ 2879/* Patch exit jumps of existing machine code to a new target. */
2780void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) 2880void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2781{ 2881{
@@ -2788,18 +2888,13 @@ void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
2788 if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px) 2888 if (len > 5 && p[len-5] == XI_JMP && p+len-6 + *(int32_t *)(p+len-4) == px)
2789 *(int32_t *)(p+len-4) = jmprel(p+len, target); 2889 *(int32_t *)(p+len-4) = jmprel(p+len, target);
2790 /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */ 2890 /* Do not patch parent exit for a stack check. Skip beyond vmstate update. */
2791 for (; p < pe; p++) 2891 for (; p < pe; p += asm_x86_inslen(p))
2792 if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi) { 2892 if (*(uint32_t *)(p+(LJ_64 ? 3 : 2)) == stateaddr && p[0] == XI_MOVmi)
2793 p += LJ_64 ? 11 : 10;
2794 break; 2893 break;
2795 }
2796 lua_assert(p < pe); 2894 lua_assert(p < pe);
2797 for (; p < pe; p++) { 2895 for (; p < pe; p += asm_x86_inslen(p))
2798 if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px) { 2896 if ((*(uint16_t *)p & 0xf0ff) == 0x800f && p + *(int32_t *)(p+2) == px)
2799 *(int32_t *)(p+2) = jmprel(p+6, target); 2897 *(int32_t *)(p+2) = jmprel(p+6, target);
2800 p += 5;
2801 }
2802 }
2803 lj_mcode_sync(T->mcode, T->mcode + T->szmcode); 2898 lj_mcode_sync(T->mcode, T->mcode + T->szmcode);
2804 lj_mcode_patch(J, mcarea, 1); 2899 lj_mcode_patch(J, mcarea, 1);
2805} 2900}