diff options
Diffstat (limited to '')
-rw-r--r-- | src/jit/dis_x86.lua | 275 |
1 files changed, 185 insertions, 90 deletions
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua index deb2f304..d564988e 100644 --- a/src/jit/dis_x86.lua +++ b/src/jit/dis_x86.lua | |||
@@ -15,19 +15,20 @@ | |||
15 | -- Intel and AMD manuals. The supported instruction set is quite extensive | 15 | -- Intel and AMD manuals. The supported instruction set is quite extensive |
16 | -- and reflects what a current generation Intel or AMD CPU implements in | 16 | -- and reflects what a current generation Intel or AMD CPU implements in |
17 | -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, | 17 | -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, |
18 | -- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM) | 18 | -- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor |
19 | -- instructions. | 19 | -- (VMX/SVM) instructions. |
20 | -- | 20 | -- |
21 | -- Notes: | 21 | -- Notes: |
22 | -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. | 22 | -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. |
23 | -- * No attempt at optimization has been made -- it's fast enough for my needs. | 23 | -- * No attempt at optimization has been made -- it's fast enough for my needs. |
24 | -- * The public API may change when more architectures are added. | ||
25 | ------------------------------------------------------------------------------ | 24 | ------------------------------------------------------------------------------ |
26 | 25 | ||
27 | local type = type | 26 | local type = type |
28 | local sub, byte, format = string.sub, string.byte, string.format | 27 | local sub, byte, format = string.sub, string.byte, string.format |
29 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub | 28 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub |
30 | local lower, rep = string.lower, string.rep | 29 | local lower, rep = string.lower, string.rep |
30 | local bit = require("bit") | ||
31 | local tohex = bit.tohex | ||
31 | 32 | ||
32 | -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. | 33 | -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. |
33 | local map_opc1_32 = { | 34 | local map_opc1_32 = { |
@@ -76,7 +77,7 @@ local map_opc1_32 = { | |||
76 | "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", | 77 | "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", |
77 | "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", | 78 | "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", |
78 | --Cx | 79 | --Cx |
79 | "shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi", | 80 | "shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi", |
80 | "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", | 81 | "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", |
81 | --Dx | 82 | --Dx |
82 | "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", | 83 | "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", |
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({ | |||
101 | [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", | 102 | [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", |
102 | [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", | 103 | [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", |
103 | [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", | 104 | [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", |
104 | [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false, | 105 | [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false, |
105 | [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, | 106 | [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, |
106 | }, { __index = map_opc1_32 }) | 107 | }, { __index = map_opc1_32 }) |
107 | 108 | ||
@@ -112,12 +113,12 @@ local map_opc2 = { | |||
112 | [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", | 113 | [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", |
113 | "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", | 114 | "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", |
114 | --1x | 115 | --1x |
115 | "movupsXrm|movssXrm|movupdXrm|movsdXrm", | 116 | "movupsXrm|movssXrvm|movupdXrm|movsdXrvm", |
116 | "movupsXmr|movssXmr|movupdXmr|movsdXmr", | 117 | "movupsXmr|movssXmvr|movupdXmr|movsdXmvr", |
117 | "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", | 118 | "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", |
118 | "movlpsXmr||movlpdXmr", | 119 | "movlpsXmr||movlpdXmr", |
119 | "unpcklpsXrm||unpcklpdXrm", | 120 | "unpcklpsXrvm||unpcklpdXrvm", |
120 | "unpckhpsXrm||unpckhpdXrm", | 121 | "unpckhpsXrvm||unpckhpdXrvm", |
121 | "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", | 122 | "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", |
122 | "movhpsXmr||movhpdXmr", | 123 | "movhpsXmr||movhpdXmr", |
123 | "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", | 124 | "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", |
@@ -126,7 +127,7 @@ local map_opc2 = { | |||
126 | "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, | 127 | "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, |
127 | "movapsXrm||movapdXrm", | 128 | "movapsXrm||movapdXrm", |
128 | "movapsXmr||movapdXmr", | 129 | "movapsXmr||movapdXmr", |
129 | "cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt", | 130 | "cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt", |
130 | "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", | 131 | "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", |
131 | "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", | 132 | "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", |
132 | "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", | 133 | "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", |
@@ -142,27 +143,27 @@ local map_opc2 = { | |||
142 | "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", | 143 | "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", |
143 | --5x | 144 | --5x |
144 | "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", | 145 | "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", |
145 | "rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm", | 146 | "rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm", |
146 | "andpsXrm||andpdXrm","andnpsXrm||andnpdXrm", | 147 | "andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm", |
147 | "orpsXrm||orpdXrm","xorpsXrm||xorpdXrm", | 148 | "orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm", |
148 | "addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm", | 149 | "addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm", |
149 | "cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm", | 150 | "cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm", |
150 | "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", | 151 | "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", |
151 | "subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm", | 152 | "subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm", |
152 | "divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm", | 153 | "divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm", |
153 | --6x | 154 | --6x |
154 | "punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm", | 155 | "punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm", |
155 | "pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm", | 156 | "pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm", |
156 | "punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm", | 157 | "punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm", |
157 | "||punpcklqdqXrm","||punpckhqdqXrm", | 158 | "||punpcklqdqXrvm","||punpckhqdqXrvm", |
158 | "movPrVSm","movqMrm|movdquXrm|movdqaXrm", | 159 | "movPrVSm","movqMrm|movdquXrm|movdqaXrm", |
159 | --7x | 160 | --7x |
160 | "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", | 161 | "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu", |
161 | "pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", | 162 | "pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu", |
162 | "pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|", | 163 | "pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|", |
163 | "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", | 164 | "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", |
164 | nil,nil, | 165 | nil,nil, |
165 | "||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm", | 166 | "||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm", |
166 | "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", | 167 | "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", |
167 | --8x | 168 | --8x |
168 | "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", | 169 | "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", |
@@ -180,27 +181,27 @@ nil,nil, | |||
180 | "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", | 181 | "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", |
181 | --Cx | 182 | --Cx |
182 | "xaddBmr","xaddVmr", | 183 | "xaddBmr","xaddVmr", |
183 | "cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|", | 184 | "cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|", |
184 | "pinsrwPrWmu","pextrwDrPmu", | 185 | "pinsrwPrvWmu","pextrwDrPmu", |
185 | "shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp", | 186 | "shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp", |
186 | "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", | 187 | "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", |
187 | --Dx | 188 | --Dx |
188 | "||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm", | 189 | "||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm", |
189 | "paddqPrm","pmullwPrm", | 190 | "paddqPrvm","pmullwPrvm", |
190 | "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", | 191 | "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", |
191 | "psubusbPrm","psubuswPrm","pminubPrm","pandPrm", | 192 | "psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm", |
192 | "paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm", | 193 | "paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm", |
193 | --Ex | 194 | --Ex |
194 | "pavgbPrm","psrawPrm","psradPrm","pavgwPrm", | 195 | "pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm", |
195 | "pmulhuwPrm","pmulhwPrm", | 196 | "pmulhuwPrvm","pmulhwPrvm", |
196 | "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", | 197 | "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", |
197 | "psubsbPrm","psubswPrm","pminswPrm","porPrm", | 198 | "psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm", |
198 | "paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm", | 199 | "paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm", |
199 | --Fx | 200 | --Fx |
200 | "|||lddquXrm","psllwPrm","pslldPrm","psllqPrm", | 201 | "|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm", |
201 | "pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$", | 202 | "pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$", |
202 | "psubbPrm","psubwPrm","psubdPrm","psubqPrm", | 203 | "psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm", |
203 | "paddbPrm","paddwPrm","padddPrm","ud", | 204 | "paddbPrvm","paddwPrvm","padddPrvm","ud", |
204 | } | 205 | } |
205 | assert(map_opc2[255] == "ud") | 206 | assert(map_opc2[255] == "ud") |
206 | 207 | ||
@@ -208,49 +209,73 @@ assert(map_opc2[255] == "ud") | |||
208 | local map_opc3 = { | 209 | local map_opc3 = { |
209 | ["38"] = { -- [66] 0f 38 xx | 210 | ["38"] = { -- [66] 0f 38 xx |
210 | --0x | 211 | --0x |
211 | [0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm", | 212 | [0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm", |
212 | "pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm", | 213 | "pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm", |
213 | "psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm", | 214 | "psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm", |
214 | nil,nil,nil,nil, | 215 | "||permilpsXrvm","||permilpdXrvm",nil,nil, |
215 | --1x | 216 | --1x |
216 | "||pblendvbXrma",nil,nil,nil, | 217 | "||pblendvbXrma",nil,nil,nil, |
217 | "||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm", | 218 | "||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm", |
218 | nil,nil,nil,nil, | 219 | "||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil, |
219 | "pabsbPrm","pabswPrm","pabsdPrm",nil, | 220 | "pabsbPrm","pabswPrm","pabsdPrm",nil, |
220 | --2x | 221 | --2x |
221 | "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", | 222 | "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", |
222 | "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, | 223 | "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, |
223 | "||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm", | 224 | "||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm", |
224 | nil,nil,nil,nil, | 225 | "||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr", |
225 | --3x | 226 | --3x |
226 | "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", | 227 | "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", |
227 | "||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm", | 228 | "||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm", |
228 | "||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm", | 229 | "||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm", |
229 | "||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm", | 230 | "||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm", |
230 | --4x | 231 | --4x |
231 | "||pmulddXrm","||phminposuwXrm", | 232 | "||pmulddXrvm","||phminposuwXrm",nil,nil, |
233 | nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm", | ||
234 | --5x | ||
235 | [0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm", | ||
236 | [0x5a] = "||broadcasti128XrlXm", | ||
237 | --7x | ||
238 | [0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm", | ||
239 | --8x | ||
240 | [0x8c] = "||pmaskmovXrvVSm", | ||
241 | [0x8e] = "||pmaskmovVSmXvr", | ||
242 | --Dx | ||
243 | [0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm", | ||
244 | [0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm", | ||
232 | --Fx | 245 | --Fx |
233 | [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", | 246 | [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", |
247 | [0xf7] = "|sarxVrmv|shlxVrmv|shrxVrmv", | ||
234 | }, | 248 | }, |
235 | 249 | ||
236 | ["3a"] = { -- [66] 0f 3a xx | 250 | ["3a"] = { -- [66] 0f 3a xx |
237 | --0x | 251 | --0x |
238 | [0x00]=nil,nil,nil,nil,nil,nil,nil,nil, | 252 | [0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil, |
239 | "||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu", | 253 | "||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil, |
240 | "||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu", | 254 | "||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu", |
255 | "||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu", | ||
241 | --1x | 256 | --1x |
242 | nil,nil,nil,nil, | 257 | nil,nil,nil,nil, |
243 | "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", | 258 | "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", |
244 | nil,nil,nil,nil,nil,nil,nil,nil, | 259 | "||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil, |
260 | nil,nil,nil,nil, | ||
245 | --2x | 261 | --2x |
246 | "||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil, | 262 | "||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil, |
263 | --3x | ||
264 | [0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru", | ||
247 | --4x | 265 | --4x |
248 | [0x40] = "||dppsXrmu", | 266 | [0x40] = "||dppsXrvmu", |
249 | [0x41] = "||dppdXrmu", | 267 | [0x41] = "||dppdXrvmu", |
250 | [0x42] = "||mpsadbwXrmu", | 268 | [0x42] = "||mpsadbwXrvmu", |
269 | [0x44] = "||pclmulqdqXrvmu", | ||
270 | [0x46] = "||perm2i128Xrvmu", | ||
271 | [0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb", | ||
272 | [0x4c] = "||pblendvbXrvmb", | ||
251 | --6x | 273 | --6x |
252 | [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", | 274 | [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", |
253 | [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", | 275 | [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", |
276 | [0xdf] = "||aeskeygenassistXrmu", | ||
277 | --Fx | ||
278 | [0xf0] = "|||rorxVrmu", | ||
254 | }, | 279 | }, |
255 | } | 280 | } |
256 | 281 | ||
@@ -354,17 +379,19 @@ local map_regs = { | |||
354 | "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! | 379 | "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! |
355 | X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", | 380 | X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", |
356 | "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, | 381 | "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, |
382 | Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", | ||
383 | "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" }, | ||
357 | } | 384 | } |
358 | local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } | 385 | local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } |
359 | 386 | ||
360 | -- Maps for size names. | 387 | -- Maps for size names. |
361 | local map_sz2n = { | 388 | local map_sz2n = { |
362 | B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, | 389 | B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32, |
363 | } | 390 | } |
364 | local map_sz2prefix = { | 391 | local map_sz2prefix = { |
365 | B = "byte", W = "word", D = "dword", | 392 | B = "byte", W = "word", D = "dword", |
366 | Q = "qword", | 393 | Q = "qword", |
367 | M = "qword", X = "xword", | 394 | M = "qword", X = "xword", Y = "yword", |
368 | F = "dword", G = "qword", -- No need for sizes/register names for these two. | 395 | F = "dword", G = "qword", -- No need for sizes/register names for these two. |
369 | } | 396 | } |
370 | 397 | ||
@@ -387,10 +414,13 @@ local function putop(ctx, text, operands) | |||
387 | if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end | 414 | if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end |
388 | if ctx.rex then | 415 | if ctx.rex then |
389 | local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. | 416 | local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. |
390 | (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "") | 417 | (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "").. |
391 | if t ~= "" then text = "rex."..t.." "..text end | 418 | (ctx.vexl and "l" or "") |
419 | if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end | ||
420 | if t ~= "" then text = ctx.rex.."."..t.." "..text | ||
421 | elseif ctx.rex == "vex" then text = "v"..text end | ||
392 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false | 422 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false |
393 | ctx.rex = false | 423 | ctx.rex = false; ctx.vexl = false; ctx.vexv = false |
394 | end | 424 | end |
395 | if ctx.seg then | 425 | if ctx.seg then |
396 | local text2, n = gsub(text, "%[", "["..ctx.seg..":") | 426 | local text2, n = gsub(text, "%[", "["..ctx.seg..":") |
@@ -405,6 +435,7 @@ local function putop(ctx, text, operands) | |||
405 | end | 435 | end |
406 | ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) | 436 | ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) |
407 | ctx.mrm = false | 437 | ctx.mrm = false |
438 | ctx.vexv = false | ||
408 | ctx.start = pos | 439 | ctx.start = pos |
409 | ctx.imm = nil | 440 | ctx.imm = nil |
410 | end | 441 | end |
@@ -413,7 +444,7 @@ end | |||
413 | local function clearprefixes(ctx) | 444 | local function clearprefixes(ctx) |
414 | ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false | 445 | ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false |
415 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false | 446 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false |
416 | ctx.rex = false; ctx.a32 = false | 447 | ctx.rex = false; ctx.a32 = false; ctx.vexl = false |
417 | end | 448 | end |
418 | 449 | ||
419 | -- Fallback for incomplete opcodes at the end. | 450 | -- Fallback for incomplete opcodes at the end. |
@@ -450,9 +481,9 @@ end | |||
450 | -- Process pattern string and generate the operands. | 481 | -- Process pattern string and generate the operands. |
451 | local function putpat(ctx, name, pat) | 482 | local function putpat(ctx, name, pat) |
452 | local operands, regs, sz, mode, sp, rm, sc, rx, sdisp | 483 | local operands, regs, sz, mode, sp, rm, sc, rx, sdisp |
453 | local code, pos, stop = ctx.code, ctx.pos, ctx.stop | 484 | local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl |
454 | 485 | ||
455 | -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz | 486 | -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz |
456 | for p in gmatch(pat, ".") do | 487 | for p in gmatch(pat, ".") do |
457 | local x = nil | 488 | local x = nil |
458 | if p == "V" or p == "U" then | 489 | if p == "V" or p == "U" then |
@@ -467,11 +498,13 @@ local function putpat(ctx, name, pat) | |||
467 | elseif p == "B" then | 498 | elseif p == "B" then |
468 | sz = "B" | 499 | sz = "B" |
469 | regs = ctx.rex and map_regs.B64 or map_regs.B | 500 | regs = ctx.rex and map_regs.B64 or map_regs.B |
470 | elseif match(p, "[WDQMXFG]") then | 501 | elseif match(p, "[WDQMXYFG]") then |
471 | sz = p | 502 | sz = p |
503 | if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end | ||
472 | regs = map_regs[sz] | 504 | regs = map_regs[sz] |
473 | elseif p == "P" then | 505 | elseif p == "P" then |
474 | sz = ctx.o16 and "X" or "M"; ctx.o16 = false | 506 | sz = ctx.o16 and "X" or "M"; ctx.o16 = false |
507 | if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end | ||
475 | regs = map_regs[sz] | 508 | regs = map_regs[sz] |
476 | elseif p == "S" then | 509 | elseif p == "S" then |
477 | name = name..lower(sz) | 510 | name = name..lower(sz) |
@@ -484,6 +517,10 @@ local function putpat(ctx, name, pat) | |||
484 | local imm = getimm(ctx, pos, 1); if not imm then return end | 517 | local imm = getimm(ctx, pos, 1); if not imm then return end |
485 | x = format("0x%02x", imm) | 518 | x = format("0x%02x", imm) |
486 | pos = pos+1 | 519 | pos = pos+1 |
520 | elseif p == "b" then | ||
521 | local imm = getimm(ctx, pos, 1); if not imm then return end | ||
522 | x = regs[imm/16+1] | ||
523 | pos = pos+1 | ||
487 | elseif p == "w" then | 524 | elseif p == "w" then |
488 | local imm = getimm(ctx, pos, 2); if not imm then return end | 525 | local imm = getimm(ctx, pos, 2); if not imm then return end |
489 | x = format("0x%x", imm) | 526 | x = format("0x%x", imm) |
@@ -532,7 +569,7 @@ local function putpat(ctx, name, pat) | |||
532 | local lo = imm % 0x1000000 | 569 | local lo = imm % 0x1000000 |
533 | x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) | 570 | x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) |
534 | else | 571 | else |
535 | x = format("0x%08x", imm) | 572 | x = "0x"..tohex(imm) |
536 | end | 573 | end |
537 | elseif p == "R" then | 574 | elseif p == "R" then |
538 | local r = byte(code, pos-1, pos-1)%8 | 575 | local r = byte(code, pos-1, pos-1)%8 |
@@ -616,8 +653,13 @@ local function putpat(ctx, name, pat) | |||
616 | else | 653 | else |
617 | x = "CR"..sp | 654 | x = "CR"..sp |
618 | end | 655 | end |
656 | elseif p == "v" then | ||
657 | if ctx.vexv then | ||
658 | x = regs[ctx.vexv+1]; ctx.vexv = false | ||
659 | end | ||
619 | elseif p == "y" then x = "DR"..sp | 660 | elseif p == "y" then x = "DR"..sp |
620 | elseif p == "z" then x = "TR"..sp | 661 | elseif p == "z" then x = "TR"..sp |
662 | elseif p == "l" then vexl = false | ||
621 | elseif p == "t" then | 663 | elseif p == "t" then |
622 | else | 664 | else |
623 | error("bad pattern `"..pat.."'") | 665 | error("bad pattern `"..pat.."'") |
@@ -692,7 +734,7 @@ map_act = { | |||
692 | B = putpat, W = putpat, D = putpat, Q = putpat, | 734 | B = putpat, W = putpat, D = putpat, Q = putpat, |
693 | V = putpat, U = putpat, T = putpat, | 735 | V = putpat, U = putpat, T = putpat, |
694 | M = putpat, X = putpat, P = putpat, | 736 | M = putpat, X = putpat, P = putpat, |
695 | F = putpat, G = putpat, | 737 | F = putpat, G = putpat, Y = putpat, |
696 | 738 | ||
697 | -- Collect prefixes. | 739 | -- Collect prefixes. |
698 | [":"] = function(ctx, name, pat) | 740 | [":"] = function(ctx, name, pat) |
@@ -753,15 +795,68 @@ map_act = { | |||
753 | 795 | ||
754 | -- REX prefix. | 796 | -- REX prefix. |
755 | rex = function(ctx, name, pat) | 797 | rex = function(ctx, name, pat) |
756 | if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed. | 798 | if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed. |
757 | for p in gmatch(pat, ".") do ctx["rex"..p] = true end | 799 | for p in gmatch(pat, ".") do ctx["rex"..p] = true end |
758 | ctx.rex = true | 800 | ctx.rex = "rex" |
801 | end, | ||
802 | |||
803 | -- VEX prefix. | ||
804 | vex = function(ctx, name, pat) | ||
805 | if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed. | ||
806 | ctx.rex = "vex" | ||
807 | local pos = ctx.pos | ||
808 | if ctx.mrm then | ||
809 | ctx.mrm = nil | ||
810 | pos = pos-1 | ||
811 | end | ||
812 | local b = byte(ctx.code, pos, pos) | ||
813 | if not b then return incomplete(ctx) end | ||
814 | pos = pos+1 | ||
815 | if b < 128 then ctx.rexr = true end | ||
816 | local m = 1 | ||
817 | if pat == "3" then | ||
818 | m = b%32; b = (b-m)/32 | ||
819 | local nb = b%2; b = (b-nb)/2 | ||
820 | if nb == 0 then ctx.rexb = true end | ||
821 | local nx = b%2; b = (b-nx)/2 | ||
822 | if nx == 0 then ctx.rexx = true end | ||
823 | b = byte(ctx.code, pos, pos) | ||
824 | if not b then return incomplete(ctx) end | ||
825 | pos = pos+1 | ||
826 | if b >= 128 then ctx.rexw = true end | ||
827 | end | ||
828 | ctx.pos = pos | ||
829 | local map | ||
830 | if m == 1 then map = map_opc2 | ||
831 | elseif m == 2 then map = map_opc3["38"] | ||
832 | elseif m == 3 then map = map_opc3["3a"] | ||
833 | else return unknown(ctx) end | ||
834 | local p = b%4; b = (b-p)/4 | ||
835 | if p == 1 then ctx.o16 = "o16" | ||
836 | elseif p == 2 then ctx.rep = "rep" | ||
837 | elseif p == 3 then ctx.rep = "repne" end | ||
838 | local l = b%2; b = (b-l)/2 | ||
839 | if l ~= 0 then ctx.vexl = true end | ||
840 | ctx.vexv = (-1-b)%16 | ||
841 | return dispatchmap(ctx, map) | ||
759 | end, | 842 | end, |
760 | 843 | ||
761 | -- Special case for nop with REX prefix. | 844 | -- Special case for nop with REX prefix. |
762 | nop = function(ctx, name, pat) | 845 | nop = function(ctx, name, pat) |
763 | return dispatch(ctx, ctx.rex and pat or "nop") | 846 | return dispatch(ctx, ctx.rex and pat or "nop") |
764 | end, | 847 | end, |
848 | |||
849 | -- Special case for 0F 77. | ||
850 | emms = function(ctx, name, pat) | ||
851 | if ctx.rex ~= "vex" then | ||
852 | return putop(ctx, "emms") | ||
853 | elseif ctx.vexl then | ||
854 | ctx.vexl = false | ||
855 | return putop(ctx, "zeroall") | ||
856 | else | ||
857 | return putop(ctx, "zeroupper") | ||
858 | end | ||
859 | end, | ||
765 | } | 860 | } |
766 | 861 | ||
767 | ------------------------------------------------------------------------------ | 862 | ------------------------------------------------------------------------------ |
@@ -782,7 +877,7 @@ local function disass_block(ctx, ofs, len) | |||
782 | end | 877 | end |
783 | 878 | ||
784 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 879 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
785 | local function create_(code, addr, out) | 880 | local function create(code, addr, out) |
786 | local ctx = {} | 881 | local ctx = {} |
787 | ctx.code = code | 882 | ctx.code = code |
788 | ctx.addr = (addr or 0) - 1 | 883 | ctx.addr = (addr or 0) - 1 |
@@ -796,8 +891,8 @@ local function create_(code, addr, out) | |||
796 | return ctx | 891 | return ctx |
797 | end | 892 | end |
798 | 893 | ||
799 | local function create64_(code, addr, out) | 894 | local function create64(code, addr, out) |
800 | local ctx = create_(code, addr, out) | 895 | local ctx = create(code, addr, out) |
801 | ctx.x64 = true | 896 | ctx.x64 = true |
802 | ctx.map1 = map_opc1_64 | 897 | ctx.map1 = map_opc1_64 |
803 | ctx.aregs = map_regs.Q | 898 | ctx.aregs = map_regs.Q |
@@ -805,32 +900,32 @@ local function create64_(code, addr, out) | |||
805 | end | 900 | end |
806 | 901 | ||
807 | -- Simple API: disassemble code (a string) at address and output via out. | 902 | -- Simple API: disassemble code (a string) at address and output via out. |
808 | local function disass_(code, addr, out) | 903 | local function disass(code, addr, out) |
809 | create_(code, addr, out):disass() | 904 | create(code, addr, out):disass() |
810 | end | 905 | end |
811 | 906 | ||
812 | local function disass64_(code, addr, out) | 907 | local function disass64(code, addr, out) |
813 | create64_(code, addr, out):disass() | 908 | create64(code, addr, out):disass() |
814 | end | 909 | end |
815 | 910 | ||
816 | -- Return register name for RID. | 911 | -- Return register name for RID. |
817 | local function regname_(r) | 912 | local function regname(r) |
818 | if r < 8 then return map_regs.D[r+1] end | 913 | if r < 8 then return map_regs.D[r+1] end |
819 | return map_regs.X[r-7] | 914 | return map_regs.X[r-7] |
820 | end | 915 | end |
821 | 916 | ||
822 | local function regname64_(r) | 917 | local function regname64(r) |
823 | if r < 16 then return map_regs.Q[r+1] end | 918 | if r < 16 then return map_regs.Q[r+1] end |
824 | return map_regs.X[r-15] | 919 | return map_regs.X[r-15] |
825 | end | 920 | end |
826 | 921 | ||
827 | -- Public module functions. | 922 | -- Public module functions. |
828 | module(...) | 923 | return { |
829 | 924 | create = create, | |
830 | create = create_ | 925 | create64 = create64, |
831 | create64 = create64_ | 926 | disass = disass, |
832 | disass = disass_ | 927 | disass64 = disass64, |
833 | disass64 = disass64_ | 928 | regname = regname, |
834 | regname = regname_ | 929 | regname64 = regname64 |
835 | regname64 = regname64_ | 930 | } |
836 | 931 | ||