diff options
| author | Mike Pall <mike> | 2015-10-24 18:43:47 +0200 |
|---|---|---|
| committer | Mike Pall <mike> | 2015-10-24 18:43:47 +0200 |
| commit | 8a13c9cebf368de9338c3c3c8c30c48d45d717bd (patch) | |
| tree | 85480a97cf3b02dcf2412c7540b0f463726f4b42 | |
| parent | 7e22082480028a467c27d9c32852ec7a12f8235f (diff) | |
| download | luajit-8a13c9cebf368de9338c3c3c8c30c48d45d717bd.tar.gz luajit-8a13c9cebf368de9338c3c3c8c30c48d45d717bd.tar.bz2 luajit-8a13c9cebf368de9338c3c3c8c30c48d45d717bd.zip | |
DynASM/x86: Add AVX and AVX2 opcodes.
Thanks to Peter Cawley.
| -rw-r--r-- | dynasm/dasm_x86.h | 7 | ||||
| -rw-r--r-- | dynasm/dasm_x86.lua | 402 |
2 files changed, 338 insertions, 71 deletions
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h index 652e8c99..175febe0 100644 --- a/dynasm/dasm_x86.h +++ b/dynasm/dasm_x86.h | |||
| @@ -391,7 +391,12 @@ int dasm_encode(Dst_DECL, void *buffer) | |||
| 391 | case DASM_IMM_D: wd: dasmd(n); break; | 391 | case DASM_IMM_D: wd: dasmd(n); break; |
| 392 | case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; | 392 | case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; |
| 393 | case DASM_IMM_W: dasmw(n); break; | 393 | case DASM_IMM_W: dasmw(n); break; |
| 394 | case DASM_VREG: { int t = *p++; if (t >= 2) n<<=3; cp[-1] |= n; break; } | 394 | case DASM_VREG: { |
| 395 | int t = *p++; | ||
| 396 | if (t >= 5) n <<= 4; else if (t >= 2) n <<= 3; | ||
| 397 | cp[-1] ^= n; | ||
| 398 | break; | ||
| 399 | } | ||
| 395 | case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; | 400 | case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; |
| 396 | b++; n = (int)(ptrdiff_t)D->globals[-n]; | 401 | b++; n = (int)(ptrdiff_t)D->globals[-n]; |
| 397 | case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ | 402 | case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ |
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua index d8203e3d..d85dfec4 100644 --- a/dynasm/dasm_x86.lua +++ b/dynasm/dasm_x86.lua | |||
| @@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl | |||
| 27 | local _s = string | 27 | local _s = string |
| 28 | local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char | 28 | local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char |
| 29 | local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub | 29 | local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub |
| 30 | local concat, sort = table.concat, table.sort | 30 | local concat, sort, remove = table.concat, table.sort, table.remove |
| 31 | local bit = bit or require("bit") | 31 | local bit = bit or require("bit") |
| 32 | local band, shl, shr = bit.band, bit.lshift, bit.rshift | 32 | local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift |
| 33 | 33 | ||
| 34 | -- Inherited tables and callbacks. | 34 | -- Inherited tables and callbacks. |
| 35 | local g_opt, g_arch | 35 | local g_opt, g_arch |
| @@ -299,7 +299,7 @@ local function mkrmap(sz, cl, names) | |||
| 299 | local iname = format("@%s%x%s", sz, i, needrex and "R" or "") | 299 | local iname = format("@%s%x%s", sz, i, needrex and "R" or "") |
| 300 | if needrex then map_reg_needrex[iname] = true end | 300 | if needrex then map_reg_needrex[iname] = true end |
| 301 | local name | 301 | local name |
| 302 | if sz == "o" then name = format("xmm%d", i) | 302 | if sz == "o" or sz == "y" then name = format("%s%d", cl, i) |
| 303 | elseif sz == "f" then name = format("st%d", i) | 303 | elseif sz == "f" then name = format("st%d", i) |
| 304 | else name = format("r%d%s", i, sz == addrsize and "" or sz) end | 304 | else name = format("r%d%s", i, sz == addrsize and "" or sz) end |
| 305 | map_archdef[name] = iname | 305 | map_archdef[name] = iname |
| @@ -334,21 +334,24 @@ mkrmap("f", "Rf") | |||
| 334 | -- SSE registers (oword sized, but qword and dword accessible). | 334 | -- SSE registers (oword sized, but qword and dword accessible). |
| 335 | mkrmap("o", "xmm") | 335 | mkrmap("o", "xmm") |
| 336 | 336 | ||
| 337 | -- AVX registers (yword sized, but oword, qword and dword accessible). | ||
| 338 | mkrmap("y", "ymm") | ||
| 339 | |||
| 337 | -- Operand size prefixes to codes. | 340 | -- Operand size prefixes to codes. |
| 338 | local map_opsize = { | 341 | local map_opsize = { |
| 339 | byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t", | 342 | byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y", |
| 340 | aword = addrsize, | 343 | tword = "t", aword = addrsize, |
| 341 | } | 344 | } |
| 342 | 345 | ||
| 343 | -- Operand size code to number. | 346 | -- Operand size code to number. |
| 344 | local map_opsizenum = { | 347 | local map_opsizenum = { |
| 345 | b = 1, w = 2, d = 4, q = 8, o = 16, t = 10, | 348 | b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10, |
| 346 | } | 349 | } |
| 347 | 350 | ||
| 348 | -- Operand size code to name. | 351 | -- Operand size code to name. |
| 349 | local map_opsizename = { | 352 | local map_opsizename = { |
| 350 | b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword", | 353 | b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword", |
| 351 | f = "fpword", | 354 | t = "tword", f = "fpword", |
| 352 | } | 355 | } |
| 353 | 356 | ||
| 354 | -- Valid index register scale factors. | 357 | -- Valid index register scale factors. |
| @@ -460,7 +463,29 @@ local function wputszarg(sz, n) | |||
| 460 | end | 463 | end |
| 461 | 464 | ||
| 462 | -- Put multi-byte opcode with operand-size dependent modifications. | 465 | -- Put multi-byte opcode with operand-size dependent modifications. |
| 463 | local function wputop(sz, op, rex) | 466 | local function wputop(sz, op, rex, vex) |
| 467 | if vex then | ||
| 468 | local tail | ||
| 469 | if vex.m == 1 and band(rex, 11) == 0 then | ||
| 470 | wputb(0xc5) | ||
| 471 | tail = shl(bxor(band(rex, 4), 4), 5) | ||
| 472 | else | ||
| 473 | wputb(0xc4) | ||
| 474 | wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) | ||
| 475 | tail = shl(band(rex, 8), 4) | ||
| 476 | end | ||
| 477 | local reg, vreg = 0, nil | ||
| 478 | if vex.v then | ||
| 479 | reg = vex.v.reg | ||
| 480 | if not reg then werror("bad vex operand") end | ||
| 481 | if reg < 0 then reg = 0; vreg = vex.v.vreg end | ||
| 482 | end | ||
| 483 | if sz == "y" or vex.l then tail = tail + 4 end | ||
| 484 | wputb(tail + shl(bxor(reg, 15), 3) + vex.p) | ||
| 485 | if vreg then waction("VREG", vreg); wputxb(4) end | ||
| 486 | rex = 0 | ||
| 487 | if op >= 256 then werror("bad vex opcode") end | ||
| 488 | end | ||
| 464 | local r | 489 | local r |
| 465 | if rex ~= 0 and not x64 then werror("bad operand size") end | 490 | if rex ~= 0 and not x64 then werror("bad operand size") end |
| 466 | if sz == "w" then wputb(102) end | 491 | if sz == "w" then wputb(102) end |
| @@ -881,9 +906,15 @@ end | |||
| 881 | -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. | 906 | -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. |
| 882 | -- The spare 3 bits are either filled with the last hex digit or | 907 | -- The spare 3 bits are either filled with the last hex digit or |
| 883 | -- the result from a previous "r"/"R". The opcode is restored. | 908 | -- the result from a previous "r"/"R". The opcode is restored. |
| 909 | -- "u" Use VEX encoding, vvvv unused. | ||
| 910 | -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is | ||
| 911 | -- removed from the list used by future characters). | ||
| 912 | -- "L" Force VEX.L | ||
| 884 | -- | 913 | -- |
| 885 | -- All of the following characters force a flush of the opcode: | 914 | -- All of the following characters force a flush of the opcode: |
| 886 | -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. | 915 | -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. |
| 916 | -- "s" stores a 4 bit immediate from the last register operand, | ||
| 917 | -- followed by 4 zero bits. | ||
| 887 | -- "S" stores a signed 8 bit immediate from the last operand. | 918 | -- "S" stores a signed 8 bit immediate from the last operand. |
| 888 | -- "U" stores an unsigned 8 bit immediate from the last operand. | 919 | -- "U" stores an unsigned 8 bit immediate from the last operand. |
| 889 | -- "W" stores an unsigned 16 bit immediate from the last operand. | 920 | -- "W" stores an unsigned 16 bit immediate from the last operand. |
| @@ -1225,46 +1256,14 @@ local map_op = { | |||
| 1225 | movups_2 = "rmo:0F10rM|mro:0F11Rm", | 1256 | movups_2 = "rmo:0F10rM|mro:0F11Rm", |
| 1226 | orpd_2 = "rmo:660F56rM", | 1257 | orpd_2 = "rmo:660F56rM", |
| 1227 | orps_2 = "rmo:0F56rM", | 1258 | orps_2 = "rmo:0F56rM", |
| 1228 | packssdw_2 = "rmo:660F6BrM", | ||
| 1229 | packsswb_2 = "rmo:660F63rM", | ||
| 1230 | packuswb_2 = "rmo:660F67rM", | ||
| 1231 | paddb_2 = "rmo:660FFCrM", | ||
| 1232 | paddd_2 = "rmo:660FFErM", | ||
| 1233 | paddq_2 = "rmo:660FD4rM", | ||
| 1234 | paddsb_2 = "rmo:660FECrM", | ||
| 1235 | paddsw_2 = "rmo:660FEDrM", | ||
| 1236 | paddusb_2 = "rmo:660FDCrM", | ||
| 1237 | paddusw_2 = "rmo:660FDDrM", | ||
| 1238 | paddw_2 = "rmo:660FFDrM", | ||
| 1239 | pand_2 = "rmo:660FDBrM", | ||
| 1240 | pandn_2 = "rmo:660FDFrM", | ||
| 1241 | pause_0 = "F390", | 1259 | pause_0 = "F390", |
| 1242 | pavgb_2 = "rmo:660FE0rM", | ||
| 1243 | pavgw_2 = "rmo:660FE3rM", | ||
| 1244 | pcmpeqb_2 = "rmo:660F74rM", | ||
| 1245 | pcmpeqd_2 = "rmo:660F76rM", | ||
| 1246 | pcmpeqw_2 = "rmo:660F75rM", | ||
| 1247 | pcmpgtb_2 = "rmo:660F64rM", | ||
| 1248 | pcmpgtd_2 = "rmo:660F66rM", | ||
| 1249 | pcmpgtw_2 = "rmo:660F65rM", | ||
| 1250 | pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. | 1260 | pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. |
| 1251 | pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", | 1261 | pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", |
| 1252 | pmaddwd_2 = "rmo:660FF5rM", | ||
| 1253 | pmaxsw_2 = "rmo:660FEErM", | ||
| 1254 | pmaxub_2 = "rmo:660FDErM", | ||
| 1255 | pminsw_2 = "rmo:660FEArM", | ||
| 1256 | pminub_2 = "rmo:660FDArM", | ||
| 1257 | pmovmskb_2 = "rr/do:660FD7rM", | 1262 | pmovmskb_2 = "rr/do:660FD7rM", |
| 1258 | pmulhuw_2 = "rmo:660FE4rM", | ||
| 1259 | pmulhw_2 = "rmo:660FE5rM", | ||
| 1260 | pmullw_2 = "rmo:660FD5rM", | ||
| 1261 | pmuludq_2 = "rmo:660FF4rM", | ||
| 1262 | por_2 = "rmo:660FEBrM", | ||
| 1263 | prefetchnta_1 = "xb:n0F180m", | 1263 | prefetchnta_1 = "xb:n0F180m", |
| 1264 | prefetcht0_1 = "xb:n0F181m", | 1264 | prefetcht0_1 = "xb:n0F181m", |
| 1265 | prefetcht1_1 = "xb:n0F182m", | 1265 | prefetcht1_1 = "xb:n0F182m", |
| 1266 | prefetcht2_1 = "xb:n0F183m", | 1266 | prefetcht2_1 = "xb:n0F183m", |
| 1267 | psadbw_2 = "rmo:660FF6rM", | ||
| 1268 | pshufd_3 = "rmio:660F70rMU", | 1267 | pshufd_3 = "rmio:660F70rMU", |
| 1269 | pshufhw_3 = "rmio:F30F70rMU", | 1268 | pshufhw_3 = "rmio:F30F70rMU", |
| 1270 | pshuflw_3 = "rmio:F20F70rMU", | 1269 | pshuflw_3 = "rmio:F20F70rMU", |
| @@ -1278,23 +1277,6 @@ local map_op = { | |||
| 1278 | psrldq_2 = "rio:660F733mU", | 1277 | psrldq_2 = "rio:660F733mU", |
| 1279 | psrlq_2 = "rmo:660FD3rM|rio:660F732mU", | 1278 | psrlq_2 = "rmo:660FD3rM|rio:660F732mU", |
| 1280 | psrlw_2 = "rmo:660FD1rM|rio:660F712mU", | 1279 | psrlw_2 = "rmo:660FD1rM|rio:660F712mU", |
| 1281 | psubb_2 = "rmo:660FF8rM", | ||
| 1282 | psubd_2 = "rmo:660FFArM", | ||
| 1283 | psubq_2 = "rmo:660FFBrM", | ||
| 1284 | psubsb_2 = "rmo:660FE8rM", | ||
| 1285 | psubsw_2 = "rmo:660FE9rM", | ||
| 1286 | psubusb_2 = "rmo:660FD8rM", | ||
| 1287 | psubusw_2 = "rmo:660FD9rM", | ||
| 1288 | psubw_2 = "rmo:660FF9rM", | ||
| 1289 | punpckhbw_2 = "rmo:660F68rM", | ||
| 1290 | punpckhdq_2 = "rmo:660F6ArM", | ||
| 1291 | punpckhqdq_2 = "rmo:660F6DrM", | ||
| 1292 | punpckhwd_2 = "rmo:660F69rM", | ||
| 1293 | punpcklbw_2 = "rmo:660F60rM", | ||
| 1294 | punpckldq_2 = "rmo:660F62rM", | ||
| 1295 | punpcklqdq_2 = "rmo:660F6CrM", | ||
| 1296 | punpcklwd_2 = "rmo:660F61rM", | ||
| 1297 | pxor_2 = "rmo:660FEFrM", | ||
| 1298 | rcpps_2 = "rmo:0F53rM", | 1280 | rcpps_2 = "rmo:0F53rM", |
| 1299 | rcpss_2 = "rro:F30F53rM|rx/od:", | 1281 | rcpss_2 = "rro:F30F53rM|rx/od:", |
| 1300 | rsqrtps_2 = "rmo:0F52rM", | 1282 | rsqrtps_2 = "rmo:0F52rM", |
| @@ -1421,6 +1403,223 @@ local map_op = { | |||
| 1421 | aesimc_2 = "rmo:660F38DBrM", | 1403 | aesimc_2 = "rmo:660F38DBrM", |
| 1422 | aeskeygenassist_3 = "rmio:660F3ADFrMU", | 1404 | aeskeygenassist_3 = "rmio:660F3ADFrMU", |
| 1423 | pclmulqdq_3 = "rmio:660F3A44rMU", | 1405 | pclmulqdq_3 = "rmio:660F3A44rMU", |
| 1406 | |||
| 1407 | -- AVX FP ops | ||
| 1408 | vaddsubpd_3 = "rrmoy:660FVD0rM", | ||
| 1409 | vaddsubps_3 = "rrmoy:F20FVD0rM", | ||
| 1410 | vandpd_3 = "rrmoy:660FV54rM", | ||
| 1411 | vandps_3 = "rrmoy:0FV54rM", | ||
| 1412 | vandnpd_3 = "rrmoy:660FV55rM", | ||
| 1413 | vandnps_3 = "rrmoy:0FV55rM", | ||
| 1414 | vblendpd_4 = "rrmioy:660F3AV0DrMU", | ||
| 1415 | vblendps_4 = "rrmioy:660F3AV0CrMU", | ||
| 1416 | vblendvpd_4 = "rrmroy:660F3AV4BrMs", | ||
| 1417 | vblendvps_4 = "rrmroy:660F3AV4ArMs", | ||
| 1418 | vbroadcastf128_2 = "rx/yo:660F38u1ArM", | ||
| 1419 | vcmppd_4 = "rrmioy:660FVC2rMU", | ||
| 1420 | vcmpps_4 = "rrmioy:0FVC2rMU", | ||
| 1421 | vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:", | ||
| 1422 | vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:", | ||
| 1423 | vcomisd_2 = "rro:660Fu2FrM|rx/oq:", | ||
| 1424 | vcomiss_2 = "rro:0Fu2FrM|rx/od:", | ||
| 1425 | vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:", | ||
| 1426 | vcvtdq2ps_2 = "rmoy:0Fu5BrM", | ||
| 1427 | vcvtpd2dq_2 = "rmoy:F20FuE6rM", | ||
| 1428 | vcvtpd2ps_2 = "rmoy:660Fu5ArM", | ||
| 1429 | vcvtps2dq_2 = "rmoy:660Fu5BrM", | ||
| 1430 | vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:", | ||
| 1431 | vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:", | ||
| 1432 | vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:", | ||
| 1433 | vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM", | ||
| 1434 | vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM", | ||
| 1435 | vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:", | ||
| 1436 | vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:", | ||
| 1437 | vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM", | ||
| 1438 | vcvttps2dq_2 = "rmoy:F30Fu5BrM", | ||
| 1439 | vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:", | ||
| 1440 | vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:", | ||
| 1441 | vdppd_4 = "rrmio:660F3AV41rMU", | ||
| 1442 | vdpps_4 = "rrmioy:660F3AV40rMU", | ||
| 1443 | vextractf128_3 = "mri/oy:660F3AuL19RmU", | ||
| 1444 | vextractps_3 = "mri/do:660F3Au17RmU", | ||
| 1445 | vhaddpd_3 = "rrmoy:660FV7CrM", | ||
| 1446 | vhaddps_3 = "rrmoy:F20FV7CrM", | ||
| 1447 | vhsubpd_3 = "rrmoy:660FV7DrM", | ||
| 1448 | vhsubps_3 = "rrmoy:F20FV7DrM", | ||
| 1449 | vinsertf128_4 = "rrmi/yyo:660F3AV18rMU", | ||
| 1450 | vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:", | ||
| 1451 | vldmxcsr_1 = "xd:0FuAE2m", | ||
| 1452 | vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm", | ||
| 1453 | vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm", | ||
| 1454 | vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm", | ||
| 1455 | vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm", | ||
| 1456 | vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:", | ||
| 1457 | vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm", | ||
| 1458 | vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:", | ||
| 1459 | vmovhlps_3 = "rrro:0FV12rM", | ||
| 1460 | vmovhpd_2 = "xr/qo:660Fu17Rm", | ||
| 1461 | vmovhpd_3 = "rrx/ooq:660FV16rM", | ||
| 1462 | vmovhps_2 = "xr/qo:0Fu17Rm", | ||
| 1463 | vmovhps_3 = "rrx/ooq:0FV16rM", | ||
| 1464 | vmovlhps_3 = "rrro:0FV16rM", | ||
| 1465 | vmovlpd_2 = "xr/qo:660Fu13Rm", | ||
| 1466 | vmovlpd_3 = "rrx/ooq:660FV12rM", | ||
| 1467 | vmovlps_2 = "xr/qo:0Fu13Rm", | ||
| 1468 | vmovlps_3 = "rrx/ooq:0FV12rM", | ||
| 1469 | vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM", | ||
| 1470 | vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM", | ||
| 1471 | vmovntpd_2 = "xroy:660Fu2BRm", | ||
| 1472 | vmovntps_2 = "xroy:0Fu2BRm", | ||
| 1473 | vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm", | ||
| 1474 | vmovsd_3 = "rrro:F20FV10rM", | ||
| 1475 | vmovshdup_2 = "rmoy:F30Fu16rM", | ||
| 1476 | vmovsldup_2 = "rmoy:F30Fu12rM", | ||
| 1477 | vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm", | ||
| 1478 | vmovss_3 = "rrro:F30FV10rM", | ||
| 1479 | vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm", | ||
| 1480 | vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm", | ||
| 1481 | vorpd_3 = "rrmoy:660FV56rM", | ||
| 1482 | vorps_3 = "rrmoy:0FV56rM", | ||
| 1483 | vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU", | ||
| 1484 | vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU", | ||
| 1485 | vperm2f128_4 = "rrmiy:660F3AV06rMU", | ||
| 1486 | vptestpd_2 = "rmoy:660F38u0FrM", | ||
| 1487 | vptestps_2 = "rmoy:660F38u0ErM", | ||
| 1488 | vrcpps_2 = "rmoy:0Fu53rM", | ||
| 1489 | vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", | ||
| 1490 | vrsqrtps_2 = "rmoy:0Fu52rM", | ||
| 1491 | vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", | ||
| 1492 | vroundpd_3 = "rmioy:660F3AV09rMU", | ||
| 1493 | vroundps_3 = "rmioy:660F3AV08rMU", | ||
| 1494 | vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", | ||
| 1495 | vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", | ||
| 1496 | vshufpd_4 = "rrmioy:660FVC6rMU", | ||
| 1497 | vshufps_4 = "rrmioy:0FVC6rMU", | ||
| 1498 | vsqrtps_2 = "rmoy:0Fu51rM", | ||
| 1499 | vsqrtss_2 = "rro:F30Fu51rM|rx/od:", | ||
| 1500 | vsqrtpd_2 = "rmoy:660Fu51rM", | ||
| 1501 | vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:", | ||
| 1502 | vstmxcsr_1 = "xd:0FuAE3m", | ||
| 1503 | vucomisd_2 = "rro:660Fu2ErM|rx/oq:", | ||
| 1504 | vucomiss_2 = "rro:0Fu2ErM|rx/od:", | ||
| 1505 | vunpckhpd_3 = "rrmoy:660FV15rM", | ||
| 1506 | vunpckhps_3 = "rrmoy:0FV15rM", | ||
| 1507 | vunpcklpd_3 = "rrmoy:660FV14rM", | ||
| 1508 | vunpcklps_3 = "rrmoy:0FV14rM", | ||
| 1509 | vxorpd_3 = "rrmoy:660FV57rM", | ||
| 1510 | vxorps_3 = "rrmoy:0FV57rM", | ||
| 1511 | vzeroall_0 = "0FuL77", | ||
| 1512 | vzeroupper_0 = "0Fu77", | ||
| 1513 | |||
| 1514 | -- AVX2 FP ops | ||
| 1515 | vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:", | ||
| 1516 | vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:", | ||
| 1517 | -- *vgather* (!vsib) | ||
| 1518 | vpermpd_3 = "rmiy:660F3AuX01rMU", | ||
| 1519 | vpermps_3 = "rrmy:660F38V16rM", | ||
| 1520 | |||
| 1521 | -- AVX, AVX2 integer ops | ||
| 1522 | -- In general, xmm requires AVX, ymm requires AVX2. | ||
| 1523 | vlddqu_2 = "rxoy:F20FuF0rM", | ||
| 1524 | vmaskmovdqu_2 = "rro:660FuF7rM", | ||
| 1525 | vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", | ||
| 1526 | vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm", | ||
| 1527 | vmovntdq_2 = "xroy:660FuE7Rm", | ||
| 1528 | vmovntdqa_2 = "rxoy:660F38u2ArM", | ||
| 1529 | vmpsadbw_4 = "rrmioy:660F3AV42rMU", | ||
| 1530 | vpabsb_2 = "rmoy:660F38u1CrM", | ||
| 1531 | vpabsd_2 = "rmoy:660F38u1ErM", | ||
| 1532 | vpabsw_2 = "rmoy:660F38u1DrM", | ||
| 1533 | vpackusdw_3 = "rrmoy:660F38V2BrM", | ||
| 1534 | vpalignr_4 = "rrmioy:660F3AV0FrMU", | ||
| 1535 | vpblendvb_4 = "rrmroy:660F3AV4CrMs", | ||
| 1536 | vpblendw_4 = "rrmioy:660F3AV0ErMU", | ||
| 1537 | vpclmulqdq_4 = "rrmio:660F3AV44rMU", | ||
| 1538 | vpcmpeqq_3 = "rrmoy:660F38V29rM", | ||
| 1539 | vpcmpestri_3 = "rmio:660F3Au61rMU", | ||
| 1540 | vpcmpestrm_3 = "rmio:660F3Au60rMU", | ||
| 1541 | vpcmpgtq_3 = "rrmoy:660F38V37rM", | ||
| 1542 | vpcmpistri_3 = "rmio:660F3Au63rMU", | ||
| 1543 | vpcmpistrm_3 = "rmio:660F3Au62rMU", | ||
| 1544 | vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:", | ||
| 1545 | vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU", | ||
| 1546 | vpextrd_3 = "mri/do:660F3Au16RmU", | ||
| 1547 | vpextrq_3 = "mri/qo:660F3Au16RmU", | ||
| 1548 | vphaddw_3 = "rrmoy:660F38V01rM", | ||
| 1549 | vphaddd_3 = "rrmoy:660F38V02rM", | ||
| 1550 | vphaddsw_3 = "rrmoy:660F38V03rM", | ||
| 1551 | vphminposuw_2 = "rmo:660F38u41rM", | ||
| 1552 | vphsubw_3 = "rrmoy:660F38V05rM", | ||
| 1553 | vphsubd_3 = "rrmoy:660F38V06rM", | ||
| 1554 | vphsubsw_3 = "rrmoy:660F38V07rM", | ||
| 1555 | vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:", | ||
| 1556 | vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:", | ||
| 1557 | vpinsrd_4 = "rrmi/ood:660F3AV22rMU", | ||
| 1558 | vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU", | ||
| 1559 | vpmaddubsw_3 = "rrmoy:660F38V04rM", | ||
| 1560 | vpmaxsb_3 = "rrmoy:660F38V3CrM", | ||
| 1561 | vpmaxsd_3 = "rrmoy:660F38V3DrM", | ||
| 1562 | vpmaxuw_3 = "rrmoy:660F38V3ErM", | ||
| 1563 | vpmaxud_3 = "rrmoy:660F38V3FrM", | ||
| 1564 | vpminsb_3 = "rrmoy:660F38V38rM", | ||
| 1565 | vpminsd_3 = "rrmoy:660F38V39rM", | ||
| 1566 | vpminuw_3 = "rrmoy:660F38V3ArM", | ||
| 1567 | vpminud_3 = "rrmoy:660F38V3BrM", | ||
| 1568 | vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM", | ||
| 1569 | vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:", | ||
| 1570 | vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:", | ||
| 1571 | vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:", | ||
| 1572 | vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:", | ||
| 1573 | vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:", | ||
| 1574 | vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:", | ||
| 1575 | vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:", | ||
| 1576 | vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:", | ||
| 1577 | vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:", | ||
| 1578 | vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:", | ||
| 1579 | vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:", | ||
| 1580 | vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:", | ||
| 1581 | vpmuldq_3 = "rrmoy:660F38V28rM", | ||
| 1582 | vpmulhrsw_3 = "rrmoy:660F38V0BrM", | ||
| 1583 | vpmulld_3 = "rrmoy:660F38V40rM", | ||
| 1584 | vpshufb_3 = "rrmoy:660F38V00rM", | ||
| 1585 | vpshufd_3 = "rmioy:660Fu70rMU", | ||
| 1586 | vpshufhw_3 = "rmioy:F30Fu70rMU", | ||
| 1587 | vpshuflw_3 = "rmioy:F20Fu70rMU", | ||
| 1588 | vpsignb_3 = "rrmoy:660F38V08rM", | ||
| 1589 | vpsignw_3 = "rrmoy:660F38V09rM", | ||
| 1590 | vpsignd_3 = "rrmoy:660F38V0ArM", | ||
| 1591 | vpslldq_3 = "rrioy:660Fv737mU", | ||
| 1592 | vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU", | ||
| 1593 | vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU", | ||
| 1594 | vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU", | ||
| 1595 | vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU", | ||
| 1596 | vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU", | ||
| 1597 | vpsrldq_3 = "rrioy:660Fv733mU", | ||
| 1598 | vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU", | ||
| 1599 | vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU", | ||
| 1600 | vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU", | ||
| 1601 | vptest_2 = "rmoy:660F38u17rM", | ||
| 1602 | |||
| 1603 | -- AVX2 integer ops | ||
| 1604 | vbroadcasti128_2 = "rx/yo:660F38u5ArM", | ||
| 1605 | vinserti128_4 = "rrmi/yyo:660F3AV38rMU", | ||
| 1606 | vextracti128_3 = "mri/oy:660F3AuL39RmU", | ||
| 1607 | vpblendd_4 = "rrmioy:660F3AV02rMU", | ||
| 1608 | vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:", | ||
| 1609 | vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:", | ||
| 1610 | vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:", | ||
| 1611 | vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:", | ||
| 1612 | vpermd_3 = "rrmy:660F38V36rM", | ||
| 1613 | vpermq_3 = "rmiy:660F3AuX00rMU", | ||
| 1614 | -- *vpgather* (!vsib) | ||
| 1615 | vperm2i128_4 = "rrmiy:660F3AV46rMU", | ||
| 1616 | vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm", | ||
| 1617 | vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm", | ||
| 1618 | vpsllvd_3 = "rrmoy:660F38V47rM", | ||
| 1619 | vpsllvq_3 = "rrmoy:660F38VX47rM", | ||
| 1620 | vpsravd_3 = "rrmoy:660F38V46rM", | ||
| 1621 | vpsrlvd_3 = "rrmoy:660F38V45rM", | ||
| 1622 | vpsrlvq_3 = "rrmoy:660F38VX45rM", | ||
| 1424 | } | 1623 | } |
| 1425 | 1624 | ||
| 1426 | ------------------------------------------------------------------------------ | 1625 | ------------------------------------------------------------------------------ |
| @@ -1471,28 +1670,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do | |||
| 1471 | map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ | 1670 | map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ |
| 1472 | end | 1671 | end |
| 1473 | 1672 | ||
| 1474 | -- SSE FP arithmetic ops. | 1673 | -- SSE / AVX FP arithmetic ops. |
| 1475 | for name,n in pairs{ sqrt = 1, add = 8, mul = 9, | 1674 | for name,n in pairs{ sqrt = 1, add = 8, mul = 9, |
| 1476 | sub = 12, min = 13, div = 14, max = 15 } do | 1675 | sub = 12, min = 13, div = 14, max = 15 } do |
| 1477 | map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) | 1676 | map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) |
| 1478 | map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) | 1677 | map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) |
| 1479 | map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) | 1678 | map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) |
| 1480 | map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) | 1679 | map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) |
| 1680 | if n ~= 1 then | ||
| 1681 | map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n) | ||
| 1682 | map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n) | ||
| 1683 | map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n) | ||
| 1684 | map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n) | ||
| 1685 | end | ||
| 1686 | end | ||
| 1687 | |||
| 1688 | -- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf). | ||
| 1689 | for name,n in pairs{ | ||
| 1690 | paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4, | ||
| 1691 | paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B, | ||
| 1692 | packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC, | ||
| 1693 | paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0, | ||
| 1694 | pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76, | ||
| 1695 | pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66, | ||
| 1696 | pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE, | ||
| 1697 | pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA, | ||
| 1698 | pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5, | ||
| 1699 | pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8, | ||
| 1700 | psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8, | ||
| 1701 | psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9, | ||
| 1702 | punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A, | ||
| 1703 | punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61, | ||
| 1704 | punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF | ||
| 1705 | } do | ||
| 1706 | map_op[name.."_2"] = format("rmo:660F%02XrM", n) | ||
| 1707 | map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n) | ||
| 1481 | end | 1708 | end |
| 1482 | 1709 | ||
| 1483 | ------------------------------------------------------------------------------ | 1710 | ------------------------------------------------------------------------------ |
| 1484 | 1711 | ||
| 1712 | local map_vexarg = { u = false, v = 1, V = 2 } | ||
| 1713 | |||
| 1485 | -- Process pattern string. | 1714 | -- Process pattern string. |
| 1486 | local function dopattern(pat, args, sz, op, needrex) | 1715 | local function dopattern(pat, args, sz, op, needrex) |
| 1487 | local digit, addin | 1716 | local digit, addin, vex |
| 1488 | local opcode = 0 | 1717 | local opcode = 0 |
| 1489 | local szov = sz | 1718 | local szov = sz |
| 1490 | local narg = 1 | 1719 | local narg = 1 |
| 1491 | local rex = 0 | 1720 | local rex = 0 |
| 1492 | 1721 | ||
| 1493 | -- Limit number of section buffer positions used by a single dasm_put(). | 1722 | -- Limit number of section buffer positions used by a single dasm_put(). |
| 1494 | -- A single opcode needs a maximum of 5 positions. | 1723 | -- A single opcode needs a maximum of 6 positions. |
| 1495 | if secpos+5 > maxsecpos then wflush() end | 1724 | if secpos+6 > maxsecpos then wflush() end |
| 1496 | 1725 | ||
| 1497 | -- Process each character. | 1726 | -- Process each character. |
| 1498 | for c in gmatch(pat.."|", ".") do | 1727 | for c in gmatch(pat.."|", ".") do |
| @@ -1506,6 +1735,8 @@ local function dopattern(pat, args, sz, op, needrex) | |||
| 1506 | szov = nil | 1735 | szov = nil |
| 1507 | elseif c == "X" then -- Force REX.W. | 1736 | elseif c == "X" then -- Force REX.W. |
| 1508 | rex = 8 | 1737 | rex = 8 |
| 1738 | elseif c == "L" then -- Force VEX.L. | ||
| 1739 | vex.l = true | ||
| 1509 | elseif c == "r" then -- Merge 1st operand regno. into opcode. | 1740 | elseif c == "r" then -- Merge 1st operand regno. into opcode. |
| 1510 | addin = args[1]; opcode = opcode + (addin.reg % 8) | 1741 | addin = args[1]; opcode = opcode + (addin.reg % 8) |
| 1511 | if narg < 2 then narg = 2 end | 1742 | if narg < 2 then narg = 2 end |
| @@ -1529,21 +1760,41 @@ local function dopattern(pat, args, sz, op, needrex) | |||
| 1529 | if t.xreg and t.xreg > 7 then rex = rex + 2 end | 1760 | if t.xreg and t.xreg > 7 then rex = rex + 2 end |
| 1530 | if s > 7 then rex = rex + 4 end | 1761 | if s > 7 then rex = rex + 4 end |
| 1531 | if needrex then rex = rex + 16 end | 1762 | if needrex then rex = rex + 16 end |
| 1532 | wputop(szov, opcode, rex); opcode = nil | 1763 | wputop(szov, opcode, rex, vex); opcode = nil |
| 1533 | local imark = sub(pat, -1) -- Force a mark (ugly). | 1764 | local imark = sub(pat, -1) -- Force a mark (ugly). |
| 1534 | -- Put ModRM/SIB with regno/last digit as spare. | 1765 | -- Put ModRM/SIB with regno/last digit as spare. |
| 1535 | wputmrmsib(t, imark, s, addin and addin.vreg) | 1766 | wputmrmsib(t, imark, s, addin and addin.vreg) |
| 1536 | addin = nil | 1767 | addin = nil |
| 1768 | elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix | ||
| 1769 | local b = band(opcode, 255); opcode = shr(opcode, 8) | ||
| 1770 | local m = 1 | ||
| 1771 | if b == 0x38 then m = 2 | ||
| 1772 | elseif b == 0x3a then m = 3 end | ||
| 1773 | if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end | ||
| 1774 | if b ~= 0x0f then | ||
| 1775 | werror("expected `0F', `0F38', or `0F3A' to precede `"..c.. | ||
| 1776 | "' in pattern `"..pat.."' for `"..op.."'") | ||
| 1777 | end | ||
| 1778 | local v = map_vexarg[c] | ||
| 1779 | if v then v = remove(args, v) end | ||
| 1780 | b = band(opcode, 255) | ||
| 1781 | local p = 0 | ||
| 1782 | if b == 0x66 then p = 1 | ||
| 1783 | elseif b == 0xf3 then p = 2 | ||
| 1784 | elseif b == 0xf2 then p = 3 end | ||
| 1785 | if p ~= 0 then opcode = shr(opcode, 8) end | ||
| 1786 | if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end | ||
| 1787 | vex = { m = m, p = p, v = v } | ||
| 1537 | else | 1788 | else |
| 1538 | if opcode then -- Flush opcode. | 1789 | if opcode then -- Flush opcode. |
| 1539 | if szov == "q" and rex == 0 then rex = rex + 8 end | 1790 | if szov == "q" and rex == 0 then rex = rex + 8 end |
| 1540 | if needrex then rex = rex + 16 end | 1791 | if needrex then rex = rex + 16 end |
| 1541 | if addin and addin.reg == -1 then | 1792 | if addin and addin.reg == -1 then |
| 1542 | wputop(szov, opcode - 7, rex) | 1793 | wputop(szov, opcode - 7, rex, vex) |
| 1543 | waction("VREG", addin.vreg); wputxb(0) | 1794 | waction("VREG", addin.vreg); wputxb(0) |
| 1544 | else | 1795 | else |
| 1545 | if addin and addin.reg > 7 then rex = rex + 1 end | 1796 | if addin and addin.reg > 7 then rex = rex + 1 end |
| 1546 | wputop(szov, opcode, rex) | 1797 | wputop(szov, opcode, rex, vex) |
| 1547 | end | 1798 | end |
| 1548 | opcode = nil | 1799 | opcode = nil |
| 1549 | end | 1800 | end |
| @@ -1580,6 +1831,14 @@ local function dopattern(pat, args, sz, op, needrex) | |||
| 1580 | else | 1831 | else |
| 1581 | wputlabel("REL_", imm, 2) | 1832 | wputlabel("REL_", imm, 2) |
| 1582 | end | 1833 | end |
| 1834 | elseif c == "s" then | ||
| 1835 | local reg = a.reg | ||
| 1836 | if reg < 0 then | ||
| 1837 | wputb(0) | ||
| 1838 | waction("VREG", a.vreg); wputxb(5) | ||
| 1839 | else | ||
| 1840 | wputb(shl(reg, 4)) | ||
| 1841 | end | ||
| 1583 | else | 1842 | else |
| 1584 | werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") | 1843 | werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") |
| 1585 | end | 1844 | end |
| @@ -1656,11 +1915,14 @@ map_op[".template__"] = function(params, template, nparams) | |||
| 1656 | if pat == "" then pat = lastpat else lastpat = pat end | 1915 | if pat == "" then pat = lastpat else lastpat = pat end |
| 1657 | if matchtm(tm, args) then | 1916 | if matchtm(tm, args) then |
| 1658 | local prefix = sub(szm, 1, 1) | 1917 | local prefix = sub(szm, 1, 1) |
| 1659 | if prefix == "/" then -- Match both operand sizes. | 1918 | if prefix == "/" then -- Exactly match leading operand sizes. |
| 1660 | if args[1].opsize == sub(szm, 2, 2) and | 1919 | for i = #szm, 1, -1 do |
| 1661 | args[2].opsize == sub(szm, 3, 3) then | 1920 | if i == 1 then |
| 1662 | dopattern(pat, args, sz, params.op, needrex) -- Process pattern. | 1921 | dopattern(pat, args, sz, params.op, needrex) -- Process pattern. |
| 1663 | return | 1922 | return |
| 1923 | elseif args[i-1].opsize ~= sub(szm, i, i) then | ||
| 1924 | break | ||
| 1925 | end | ||
| 1664 | end | 1926 | end |
| 1665 | else -- Match common operand size. | 1927 | else -- Match common operand size. |
| 1666 | local szp = sz | 1928 | local szp = sz |
