diff options
Diffstat (limited to 'dynasm/dasm_x86.lua')
| -rw-r--r-- | dynasm/dasm_x86.lua | 660 |
1 files changed, 554 insertions, 106 deletions
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua index c2211125..0794e180 100644 --- a/dynasm/dasm_x86.lua +++ b/dynasm/dasm_x86.lua | |||
| @@ -11,9 +11,9 @@ local x64 = x64 | |||
| 11 | local _info = { | 11 | local _info = { |
| 12 | arch = x64 and "x64" or "x86", | 12 | arch = x64 and "x64" or "x86", |
| 13 | description = "DynASM x86/x64 module", | 13 | description = "DynASM x86/x64 module", |
| 14 | version = "1.3.0", | 14 | version = "1.5.0", |
| 15 | vernum = 10300, | 15 | vernum = 10500, |
| 16 | release = "2011-05-05", | 16 | release = "2021-05-02", |
| 17 | author = "Mike Pall", | 17 | author = "Mike Pall", |
| 18 | license = "MIT", | 18 | license = "MIT", |
| 19 | } | 19 | } |
| @@ -27,9 +27,9 @@ local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatabl | |||
| 27 | local _s = string | 27 | local _s = string |
| 28 | local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char | 28 | local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char |
| 29 | local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub | 29 | local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub |
| 30 | local concat, sort = table.concat, table.sort | 30 | local concat, sort, remove = table.concat, table.sort, table.remove |
| 31 | local bit = bit or require("bit") | 31 | local bit = bit or require("bit") |
| 32 | local band, shl, shr = bit.band, bit.lshift, bit.rshift | 32 | local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift |
| 33 | 33 | ||
| 34 | -- Inherited tables and callbacks. | 34 | -- Inherited tables and callbacks. |
| 35 | local g_opt, g_arch | 35 | local g_opt, g_arch |
| @@ -41,7 +41,7 @@ local action_names = { | |||
| 41 | -- int arg, 1 buffer pos: | 41 | -- int arg, 1 buffer pos: |
| 42 | "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", | 42 | "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", |
| 43 | -- action arg (1 byte), int arg, 1 buffer pos (reg/num): | 43 | -- action arg (1 byte), int arg, 1 buffer pos (reg/num): |
| 44 | "VREG", "SPACE", -- !x64: VREG support NYI. | 44 | "VREG", "SPACE", |
| 45 | -- ptrdiff_t arg, 1 buffer pos (address): !x64 | 45 | -- ptrdiff_t arg, 1 buffer pos (address): !x64 |
| 46 | "SETLABEL", "REL_A", | 46 | "SETLABEL", "REL_A", |
| 47 | -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): | 47 | -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): |
| @@ -83,6 +83,21 @@ local actargs = { 0 } | |||
| 83 | -- Current number of section buffer positions for dasm_put(). | 83 | -- Current number of section buffer positions for dasm_put(). |
| 84 | local secpos = 1 | 84 | local secpos = 1 |
| 85 | 85 | ||
| 86 | -- VREG kind encodings, pre-shifted by 5 bits. | ||
| 87 | local map_vreg = { | ||
| 88 | ["modrm.rm.m"] = 0x00, | ||
| 89 | ["modrm.rm.r"] = 0x20, | ||
| 90 | ["opcode"] = 0x20, | ||
| 91 | ["sib.base"] = 0x20, | ||
| 92 | ["sib.index"] = 0x40, | ||
| 93 | ["modrm.reg"] = 0x80, | ||
| 94 | ["vex.v"] = 0xa0, | ||
| 95 | ["imm.hi"] = 0xc0, | ||
| 96 | } | ||
| 97 | |||
| 98 | -- Current number of VREG actions contributing to REX/VEX shrinkage. | ||
| 99 | local vreg_shrink_count = 0 | ||
| 100 | |||
| 86 | ------------------------------------------------------------------------------ | 101 | ------------------------------------------------------------------------------ |
| 87 | 102 | ||
| 88 | -- Compute action numbers for action names. | 103 | -- Compute action numbers for action names. |
| @@ -134,6 +149,21 @@ local function waction(action, a, num) | |||
| 134 | if a or num then secpos = secpos + (num or 1) end | 149 | if a or num then secpos = secpos + (num or 1) end |
| 135 | end | 150 | end |
| 136 | 151 | ||
| 152 | -- Optionally add a VREG action. | ||
| 153 | local function wvreg(kind, vreg, psz, sk, defer) | ||
| 154 | if not vreg then return end | ||
| 155 | waction("VREG", vreg) | ||
| 156 | local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'") | ||
| 157 | if b < (sk or 0) then | ||
| 158 | vreg_shrink_count = vreg_shrink_count + 1 | ||
| 159 | end | ||
| 160 | if not defer then | ||
| 161 | b = b + vreg_shrink_count * 8 | ||
| 162 | vreg_shrink_count = 0 | ||
| 163 | end | ||
| 164 | wputxb(b + (psz or 0)) | ||
| 165 | end | ||
| 166 | |||
| 137 | -- Add call to embedded DynASM C code. | 167 | -- Add call to embedded DynASM C code. |
| 138 | local function wcall(func, args) | 168 | local function wcall(func, args) |
| 139 | wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) | 169 | wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) |
| @@ -299,7 +329,7 @@ local function mkrmap(sz, cl, names) | |||
| 299 | local iname = format("@%s%x%s", sz, i, needrex and "R" or "") | 329 | local iname = format("@%s%x%s", sz, i, needrex and "R" or "") |
| 300 | if needrex then map_reg_needrex[iname] = true end | 330 | if needrex then map_reg_needrex[iname] = true end |
| 301 | local name | 331 | local name |
| 302 | if sz == "o" then name = format("xmm%d", i) | 332 | if sz == "o" or sz == "y" then name = format("%s%d", cl, i) |
| 303 | elseif sz == "f" then name = format("st%d", i) | 333 | elseif sz == "f" then name = format("st%d", i) |
| 304 | else name = format("r%d%s", i, sz == addrsize and "" or sz) end | 334 | else name = format("r%d%s", i, sz == addrsize and "" or sz) end |
| 305 | map_archdef[name] = iname | 335 | map_archdef[name] = iname |
| @@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) | |||
| 326 | mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) | 356 | mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) |
| 327 | map_reg_valid_index[map_archdef.esp] = false | 357 | map_reg_valid_index[map_archdef.esp] = false |
| 328 | if x64 then map_reg_valid_index[map_archdef.rsp] = false end | 358 | if x64 then map_reg_valid_index[map_archdef.rsp] = false end |
| 359 | if x64 then map_reg_needrex[map_archdef.Rb] = true end | ||
| 329 | map_archdef["Ra"] = "@"..addrsize | 360 | map_archdef["Ra"] = "@"..addrsize |
| 330 | 361 | ||
| 331 | -- FP registers (internally tword sized, but use "f" as operand size). | 362 | -- FP registers (internally tword sized, but use "f" as operand size). |
| @@ -334,21 +365,24 @@ mkrmap("f", "Rf") | |||
| 334 | -- SSE registers (oword sized, but qword and dword accessible). | 365 | -- SSE registers (oword sized, but qword and dword accessible). |
| 335 | mkrmap("o", "xmm") | 366 | mkrmap("o", "xmm") |
| 336 | 367 | ||
| 368 | -- AVX registers (yword sized, but oword, qword and dword accessible). | ||
| 369 | mkrmap("y", "ymm") | ||
| 370 | |||
| 337 | -- Operand size prefixes to codes. | 371 | -- Operand size prefixes to codes. |
| 338 | local map_opsize = { | 372 | local map_opsize = { |
| 339 | byte = "b", word = "w", dword = "d", qword = "q", oword = "o", tword = "t", | 373 | byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y", |
| 340 | aword = addrsize, | 374 | tword = "t", aword = addrsize, |
| 341 | } | 375 | } |
| 342 | 376 | ||
| 343 | -- Operand size code to number. | 377 | -- Operand size code to number. |
| 344 | local map_opsizenum = { | 378 | local map_opsizenum = { |
| 345 | b = 1, w = 2, d = 4, q = 8, o = 16, t = 10, | 379 | b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10, |
| 346 | } | 380 | } |
| 347 | 381 | ||
| 348 | -- Operand size code to name. | 382 | -- Operand size code to name. |
| 349 | local map_opsizename = { | 383 | local map_opsizename = { |
| 350 | b = "byte", w = "word", d = "dword", q = "qword", o = "oword", t = "tword", | 384 | b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword", |
| 351 | f = "fpword", | 385 | t = "tword", f = "fpword", |
| 352 | } | 386 | } |
| 353 | 387 | ||
| 354 | -- Valid index register scale factors. | 388 | -- Valid index register scale factors. |
| @@ -450,6 +484,22 @@ local function wputdarg(n) | |||
| 450 | end | 484 | end |
| 451 | end | 485 | end |
| 452 | 486 | ||
| 487 | -- Put signed or unsigned qword or arg. | ||
| 488 | local function wputqarg(n) | ||
| 489 | local tn = type(n) | ||
| 490 | if tn == "number" then -- This is only used for numbers from -2^31..2^32-1. | ||
| 491 | wputb(band(n, 255)) | ||
| 492 | wputb(band(shr(n, 8), 255)) | ||
| 493 | wputb(band(shr(n, 16), 255)) | ||
| 494 | wputb(shr(n, 24)) | ||
| 495 | local sign = n < 0 and 255 or 0 | ||
| 496 | wputb(sign); wputb(sign); wputb(sign); wputb(sign) | ||
| 497 | else | ||
| 498 | waction("IMM_D", format("(unsigned int)(%s)", n)) | ||
| 499 | waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n)) | ||
| 500 | end | ||
| 501 | end | ||
| 502 | |||
| 453 | -- Put operand-size dependent number or arg (defaults to dword). | 503 | -- Put operand-size dependent number or arg (defaults to dword). |
| 454 | local function wputszarg(sz, n) | 504 | local function wputszarg(sz, n) |
| 455 | if not sz or sz == "d" or sz == "q" then wputdarg(n) | 505 | if not sz or sz == "d" or sz == "q" then wputdarg(n) |
| @@ -460,9 +510,45 @@ local function wputszarg(sz, n) | |||
| 460 | end | 510 | end |
| 461 | 511 | ||
| 462 | -- Put multi-byte opcode with operand-size dependent modifications. | 512 | -- Put multi-byte opcode with operand-size dependent modifications. |
| 463 | local function wputop(sz, op, rex) | 513 | local function wputop(sz, op, rex, vex, vregr, vregxb) |
| 514 | local psz, sk = 0, nil | ||
| 515 | if vex then | ||
| 516 | local tail | ||
| 517 | if vex.m == 1 and band(rex, 11) == 0 then | ||
| 518 | if x64 and vregxb then | ||
| 519 | sk = map_vreg["modrm.reg"] | ||
| 520 | else | ||
| 521 | wputb(0xc5) | ||
| 522 | tail = shl(bxor(band(rex, 4), 4), 5) | ||
| 523 | psz = 3 | ||
| 524 | end | ||
| 525 | end | ||
| 526 | if not tail then | ||
| 527 | wputb(0xc4) | ||
| 528 | wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) | ||
| 529 | tail = shl(band(rex, 8), 4) | ||
| 530 | psz = 4 | ||
| 531 | end | ||
| 532 | local reg, vreg = 0, nil | ||
| 533 | if vex.v then | ||
| 534 | reg = vex.v.reg | ||
| 535 | if not reg then werror("bad vex operand") end | ||
| 536 | if reg < 0 then reg = 0; vreg = vex.v.vreg end | ||
| 537 | end | ||
| 538 | if sz == "y" or vex.l then tail = tail + 4 end | ||
| 539 | wputb(tail + shl(bxor(reg, 15), 3) + vex.p) | ||
| 540 | wvreg("vex.v", vreg) | ||
| 541 | rex = 0 | ||
| 542 | if op >= 256 then werror("bad vex opcode") end | ||
| 543 | else | ||
| 544 | if rex ~= 0 then | ||
| 545 | if not x64 then werror("bad operand size") end | ||
| 546 | elseif (vregr or vregxb) and x64 then | ||
| 547 | rex = 0x10 | ||
| 548 | sk = map_vreg["vex.v"] | ||
| 549 | end | ||
| 550 | end | ||
| 464 | local r | 551 | local r |
| 465 | if rex ~= 0 and not x64 then werror("bad operand size") end | ||
| 466 | if sz == "w" then wputb(102) end | 552 | if sz == "w" then wputb(102) end |
| 467 | -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] | 553 | -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] |
| 468 | if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end | 554 | if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end |
| @@ -471,20 +557,20 @@ local function wputop(sz, op, rex) | |||
| 471 | if rex ~= 0 then | 557 | if rex ~= 0 then |
| 472 | local opc3 = band(op, 0xffff00) | 558 | local opc3 = band(op, 0xffff00) |
| 473 | if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then | 559 | if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then |
| 474 | wputb(64 + band(rex, 15)); rex = 0 | 560 | wputb(64 + band(rex, 15)); rex = 0; psz = 2 |
| 475 | end | 561 | end |
| 476 | end | 562 | end |
| 477 | wputb(shr(op, 16)); op = band(op, 0xffff) | 563 | wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1 |
| 478 | end | 564 | end |
| 479 | if op >= 256 then | 565 | if op >= 256 then |
| 480 | local b = shr(op, 8) | 566 | local b = shr(op, 8) |
| 481 | if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end | 567 | if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end |
| 482 | wputb(b) | 568 | wputb(b); op = band(op, 255); psz = psz + 1 |
| 483 | op = band(op, 255) | ||
| 484 | end | 569 | end |
| 485 | if rex ~= 0 then wputb(64 + band(rex, 15)) end | 570 | if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end |
| 486 | if sz == "b" then op = op - 1 end | 571 | if sz == "b" then op = op - 1 end |
| 487 | wputb(op) | 572 | wputb(op) |
| 573 | return psz, sk | ||
| 488 | end | 574 | end |
| 489 | 575 | ||
| 490 | -- Put ModRM or SIB formatted byte. | 576 | -- Put ModRM or SIB formatted byte. |
| @@ -494,7 +580,7 @@ local function wputmodrm(m, s, rm, vs, vrm) | |||
| 494 | end | 580 | end |
| 495 | 581 | ||
| 496 | -- Put ModRM/SIB plus optional displacement. | 582 | -- Put ModRM/SIB plus optional displacement. |
| 497 | local function wputmrmsib(t, imark, s, vsreg) | 583 | local function wputmrmsib(t, imark, s, vsreg, psz, sk) |
| 498 | local vreg, vxreg | 584 | local vreg, vxreg |
| 499 | local reg, xreg = t.reg, t.xreg | 585 | local reg, xreg = t.reg, t.xreg |
| 500 | if reg and reg < 0 then reg = 0; vreg = t.vreg end | 586 | if reg and reg < 0 then reg = 0; vreg = t.vreg end |
| @@ -504,8 +590,8 @@ local function wputmrmsib(t, imark, s, vsreg) | |||
| 504 | -- Register mode. | 590 | -- Register mode. |
| 505 | if sub(t.mode, 1, 1) == "r" then | 591 | if sub(t.mode, 1, 1) == "r" then |
| 506 | wputmodrm(3, s, reg) | 592 | wputmodrm(3, s, reg) |
| 507 | if vsreg then waction("VREG", vsreg); wputxb(2) end | 593 | wvreg("modrm.reg", vsreg, psz+1, sk, vreg) |
| 508 | if vreg then waction("VREG", vreg); wputxb(0) end | 594 | wvreg("modrm.rm.r", vreg, psz+1, sk) |
| 509 | return | 595 | return |
| 510 | end | 596 | end |
| 511 | 597 | ||
| @@ -519,28 +605,33 @@ local function wputmrmsib(t, imark, s, vsreg) | |||
| 519 | -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) | 605 | -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) |
| 520 | wputmodrm(0, s, 4) | 606 | wputmodrm(0, s, 4) |
| 521 | if imark == "I" then waction("MARK") end | 607 | if imark == "I" then waction("MARK") end |
| 522 | if vsreg then waction("VREG", vsreg); wputxb(2) end | 608 | wvreg("modrm.reg", vsreg, psz+1, sk, vxreg) |
| 523 | wputmodrm(t.xsc, xreg, 5) | 609 | wputmodrm(t.xsc, xreg, 5) |
| 524 | if vxreg then waction("VREG", vxreg); wputxb(3) end | 610 | wvreg("sib.index", vxreg, psz+2, sk) |
| 525 | else | 611 | else |
| 526 | -- Pure 32 bit displacement. | 612 | -- Pure 32 bit displacement. |
| 527 | if x64 and tdisp ~= "table" then | 613 | if x64 and tdisp ~= "table" then |
| 528 | wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) | 614 | wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) |
| 615 | wvreg("modrm.reg", vsreg, psz+1, sk) | ||
| 529 | if imark == "I" then waction("MARK") end | 616 | if imark == "I" then waction("MARK") end |
| 530 | wputmodrm(0, 4, 5) | 617 | wputmodrm(0, 4, 5) |
| 531 | else | 618 | else |
| 532 | riprel = x64 | 619 | riprel = x64 |
| 533 | wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) | 620 | wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) |
| 621 | wvreg("modrm.reg", vsreg, psz+1, sk) | ||
| 534 | if imark == "I" then waction("MARK") end | 622 | if imark == "I" then waction("MARK") end |
| 535 | end | 623 | end |
| 536 | if vsreg then waction("VREG", vsreg); wputxb(2) end | ||
| 537 | end | 624 | end |
| 538 | if riprel then -- Emit rip-relative displacement. | 625 | if riprel then -- Emit rip-relative displacement. |
| 539 | if match("UWSiI", imark) then | 626 | if match("UWSiI", imark) then |
| 540 | werror("NYI: rip-relative displacement followed by immediate") | 627 | werror("NYI: rip-relative displacement followed by immediate") |
| 541 | end | 628 | end |
| 542 | -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. | 629 | -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. |
| 543 | wputlabel("REL_", disp[1], 2) | 630 | if disp[2] == "iPJ" then |
| 631 | waction("REL_A", disp[1]) | ||
| 632 | else | ||
| 633 | wputlabel("REL_", disp[1], 2) | ||
| 634 | end | ||
| 544 | else | 635 | else |
| 545 | wputdarg(disp) | 636 | wputdarg(disp) |
| 546 | end | 637 | end |
| @@ -561,16 +652,16 @@ local function wputmrmsib(t, imark, s, vsreg) | |||
| 561 | if xreg or band(reg, 7) == 4 then | 652 | if xreg or band(reg, 7) == 4 then |
| 562 | wputmodrm(m or 2, s, 4) -- ModRM. | 653 | wputmodrm(m or 2, s, 4) -- ModRM. |
| 563 | if m == nil or imark == "I" then waction("MARK") end | 654 | if m == nil or imark == "I" then waction("MARK") end |
| 564 | if vsreg then waction("VREG", vsreg); wputxb(2) end | 655 | wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg) |
| 565 | wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. | 656 | wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. |
| 566 | if vxreg then waction("VREG", vxreg); wputxb(3) end | 657 | wvreg("sib.index", vxreg, psz+2, sk, vreg) |
| 567 | if vreg then waction("VREG", vreg); wputxb(1) end | 658 | wvreg("sib.base", vreg, psz+2, sk) |
| 568 | else | 659 | else |
| 569 | wputmodrm(m or 2, s, reg) -- ModRM. | 660 | wputmodrm(m or 2, s, reg) -- ModRM. |
| 570 | if (imark == "I" and (m == 1 or m == 2)) or | 661 | if (imark == "I" and (m == 1 or m == 2)) or |
| 571 | (m == nil and (vsreg or vreg)) then waction("MARK") end | 662 | (m == nil and (vsreg or vreg)) then waction("MARK") end |
| 572 | if vsreg then waction("VREG", vsreg); wputxb(2) end | 663 | wvreg("modrm.reg", vsreg, psz+1, sk, vreg) |
| 573 | if vreg then waction("VREG", vreg); wputxb(1) end | 664 | wvreg("modrm.rm.m", vreg, psz+1, sk) |
| 574 | end | 665 | end |
| 575 | 666 | ||
| 576 | -- Put displacement. | 667 | -- Put displacement. |
| @@ -592,10 +683,16 @@ local function opmodestr(op, args) | |||
| 592 | end | 683 | end |
| 593 | 684 | ||
| 594 | -- Convert number to valid integer or nil. | 685 | -- Convert number to valid integer or nil. |
| 595 | local function toint(expr) | 686 | local function toint(expr, isqword) |
| 596 | local n = tonumber(expr) | 687 | local n = tonumber(expr) |
| 597 | if n then | 688 | if n then |
| 598 | if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then | 689 | if n % 1 ~= 0 then |
| 690 | werror("not an integer number `"..expr.."'") | ||
| 691 | elseif isqword then | ||
| 692 | if n < -2147483648 or n > 2147483647 then | ||
| 693 | n = nil -- Handle it as an expression to avoid precision loss. | ||
| 694 | end | ||
| 695 | elseif n < -2147483648 or n > 4294967295 then | ||
| 599 | werror("bad integer number `"..expr.."'") | 696 | werror("bad integer number `"..expr.."'") |
| 600 | end | 697 | end |
| 601 | return n | 698 | return n |
| @@ -651,9 +748,9 @@ local function dispexpr(expr) | |||
| 651 | return imm*map_opsizenum[ops] | 748 | return imm*map_opsizenum[ops] |
| 652 | end | 749 | end |
| 653 | local mode, iexpr = immexpr(dispt) | 750 | local mode, iexpr = immexpr(dispt) |
| 654 | if mode == "iJ" then | 751 | if mode == "iJ" or mode == "iPJ" then |
| 655 | if c == "-" then werror("cannot invert label reference") end | 752 | if c == "-" then werror("cannot invert label reference") end |
| 656 | return { iexpr } | 753 | return { iexpr, mode } |
| 657 | end | 754 | end |
| 658 | return expr -- Need to return original signed expression. | 755 | return expr -- Need to return original signed expression. |
| 659 | end | 756 | end |
| @@ -678,7 +775,7 @@ local function rtexpr(expr) | |||
| 678 | end | 775 | end |
| 679 | 776 | ||
| 680 | -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. | 777 | -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. |
| 681 | local function parseoperand(param) | 778 | local function parseoperand(param, isqword) |
| 682 | local t = {} | 779 | local t = {} |
| 683 | 780 | ||
| 684 | local expr = param | 781 | local expr = param |
| @@ -766,7 +863,7 @@ local function parseoperand(param) | |||
| 766 | t.disp = dispexpr(tailx) | 863 | t.disp = dispexpr(tailx) |
| 767 | else | 864 | else |
| 768 | -- imm or opsize*imm | 865 | -- imm or opsize*imm |
| 769 | local imm = toint(expr) | 866 | local imm = toint(expr, isqword) |
| 770 | if not imm and sub(expr, 1, 1) == "*" and t.opsize then | 867 | if not imm and sub(expr, 1, 1) == "*" and t.opsize then |
| 771 | imm = toint(sub(expr, 2)) | 868 | imm = toint(sub(expr, 2)) |
| 772 | if imm then | 869 | if imm then |
| @@ -881,9 +978,16 @@ end | |||
| 881 | -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. | 978 | -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. |
| 882 | -- The spare 3 bits are either filled with the last hex digit or | 979 | -- The spare 3 bits are either filled with the last hex digit or |
| 883 | -- the result from a previous "r"/"R". The opcode is restored. | 980 | -- the result from a previous "r"/"R". The opcode is restored. |
| 981 | -- "u" Use VEX encoding, vvvv unused. | ||
| 982 | -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is | ||
| 983 | -- removed from the list used by future characters). | ||
| 984 | -- "w" Use VEX encoding, vvvv from 3rd operand. | ||
| 985 | -- "L" Force VEX.L | ||
| 884 | -- | 986 | -- |
| 885 | -- All of the following characters force a flush of the opcode: | 987 | -- All of the following characters force a flush of the opcode: |
| 886 | -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. | 988 | -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. |
| 989 | -- "s" stores a 4 bit immediate from the last register operand, | ||
| 990 | -- followed by 4 zero bits. | ||
| 887 | -- "S" stores a signed 8 bit immediate from the last operand. | 991 | -- "S" stores a signed 8 bit immediate from the last operand. |
| 888 | -- "U" stores an unsigned 8 bit immediate from the last operand. | 992 | -- "U" stores an unsigned 8 bit immediate from the last operand. |
| 889 | -- "W" stores an unsigned 16 bit immediate from the last operand. | 993 | -- "W" stores an unsigned 16 bit immediate from the last operand. |
| @@ -1047,6 +1151,8 @@ local map_op = { | |||
| 1047 | rep_0 = "F3", | 1151 | rep_0 = "F3", |
| 1048 | repe_0 = "F3", | 1152 | repe_0 = "F3", |
| 1049 | repz_0 = "F3", | 1153 | repz_0 = "F3", |
| 1154 | endbr32_0 = "F30F1EFB", | ||
| 1155 | endbr64_0 = "F30F1EFA", | ||
| 1050 | -- F4: *hlt | 1156 | -- F4: *hlt |
| 1051 | cmc_0 = "F5", | 1157 | cmc_0 = "F5", |
| 1052 | -- F6: test... mb,i; div... mb | 1158 | -- F6: test... mb,i; div... mb |
| @@ -1226,46 +1332,14 @@ local map_op = { | |||
| 1226 | movups_2 = "rmo:0F10rM|mro:0F11Rm", | 1332 | movups_2 = "rmo:0F10rM|mro:0F11Rm", |
| 1227 | orpd_2 = "rmo:660F56rM", | 1333 | orpd_2 = "rmo:660F56rM", |
| 1228 | orps_2 = "rmo:0F56rM", | 1334 | orps_2 = "rmo:0F56rM", |
| 1229 | packssdw_2 = "rmo:660F6BrM", | ||
| 1230 | packsswb_2 = "rmo:660F63rM", | ||
| 1231 | packuswb_2 = "rmo:660F67rM", | ||
| 1232 | paddb_2 = "rmo:660FFCrM", | ||
| 1233 | paddd_2 = "rmo:660FFErM", | ||
| 1234 | paddq_2 = "rmo:660FD4rM", | ||
| 1235 | paddsb_2 = "rmo:660FECrM", | ||
| 1236 | paddsw_2 = "rmo:660FEDrM", | ||
| 1237 | paddusb_2 = "rmo:660FDCrM", | ||
| 1238 | paddusw_2 = "rmo:660FDDrM", | ||
| 1239 | paddw_2 = "rmo:660FFDrM", | ||
| 1240 | pand_2 = "rmo:660FDBrM", | ||
| 1241 | pandn_2 = "rmo:660FDFrM", | ||
| 1242 | pause_0 = "F390", | 1335 | pause_0 = "F390", |
| 1243 | pavgb_2 = "rmo:660FE0rM", | ||
| 1244 | pavgw_2 = "rmo:660FE3rM", | ||
| 1245 | pcmpeqb_2 = "rmo:660F74rM", | ||
| 1246 | pcmpeqd_2 = "rmo:660F76rM", | ||
| 1247 | pcmpeqw_2 = "rmo:660F75rM", | ||
| 1248 | pcmpgtb_2 = "rmo:660F64rM", | ||
| 1249 | pcmpgtd_2 = "rmo:660F66rM", | ||
| 1250 | pcmpgtw_2 = "rmo:660F65rM", | ||
| 1251 | pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. | 1336 | pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. |
| 1252 | pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", | 1337 | pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", |
| 1253 | pmaddwd_2 = "rmo:660FF5rM", | ||
| 1254 | pmaxsw_2 = "rmo:660FEErM", | ||
| 1255 | pmaxub_2 = "rmo:660FDErM", | ||
| 1256 | pminsw_2 = "rmo:660FEArM", | ||
| 1257 | pminub_2 = "rmo:660FDArM", | ||
| 1258 | pmovmskb_2 = "rr/do:660FD7rM", | 1338 | pmovmskb_2 = "rr/do:660FD7rM", |
| 1259 | pmulhuw_2 = "rmo:660FE4rM", | ||
| 1260 | pmulhw_2 = "rmo:660FE5rM", | ||
| 1261 | pmullw_2 = "rmo:660FD5rM", | ||
| 1262 | pmuludq_2 = "rmo:660FF4rM", | ||
| 1263 | por_2 = "rmo:660FEBrM", | ||
| 1264 | prefetchnta_1 = "xb:n0F180m", | 1339 | prefetchnta_1 = "xb:n0F180m", |
| 1265 | prefetcht0_1 = "xb:n0F181m", | 1340 | prefetcht0_1 = "xb:n0F181m", |
| 1266 | prefetcht1_1 = "xb:n0F182m", | 1341 | prefetcht1_1 = "xb:n0F182m", |
| 1267 | prefetcht2_1 = "xb:n0F183m", | 1342 | prefetcht2_1 = "xb:n0F183m", |
| 1268 | psadbw_2 = "rmo:660FF6rM", | ||
| 1269 | pshufd_3 = "rmio:660F70rMU", | 1343 | pshufd_3 = "rmio:660F70rMU", |
| 1270 | pshufhw_3 = "rmio:F30F70rMU", | 1344 | pshufhw_3 = "rmio:F30F70rMU", |
| 1271 | pshuflw_3 = "rmio:F20F70rMU", | 1345 | pshuflw_3 = "rmio:F20F70rMU", |
| @@ -1279,23 +1353,6 @@ local map_op = { | |||
| 1279 | psrldq_2 = "rio:660F733mU", | 1353 | psrldq_2 = "rio:660F733mU", |
| 1280 | psrlq_2 = "rmo:660FD3rM|rio:660F732mU", | 1354 | psrlq_2 = "rmo:660FD3rM|rio:660F732mU", |
| 1281 | psrlw_2 = "rmo:660FD1rM|rio:660F712mU", | 1355 | psrlw_2 = "rmo:660FD1rM|rio:660F712mU", |
| 1282 | psubb_2 = "rmo:660FF8rM", | ||
| 1283 | psubd_2 = "rmo:660FFArM", | ||
| 1284 | psubq_2 = "rmo:660FFBrM", | ||
| 1285 | psubsb_2 = "rmo:660FE8rM", | ||
| 1286 | psubsw_2 = "rmo:660FE9rM", | ||
| 1287 | psubusb_2 = "rmo:660FD8rM", | ||
| 1288 | psubusw_2 = "rmo:660FD9rM", | ||
| 1289 | psubw_2 = "rmo:660FF9rM", | ||
| 1290 | punpckhbw_2 = "rmo:660F68rM", | ||
| 1291 | punpckhdq_2 = "rmo:660F6ArM", | ||
| 1292 | punpckhqdq_2 = "rmo:660F6DrM", | ||
| 1293 | punpckhwd_2 = "rmo:660F69rM", | ||
| 1294 | punpcklbw_2 = "rmo:660F60rM", | ||
| 1295 | punpckldq_2 = "rmo:660F62rM", | ||
| 1296 | punpcklqdq_2 = "rmo:660F6CrM", | ||
| 1297 | punpcklwd_2 = "rmo:660F61rM", | ||
| 1298 | pxor_2 = "rmo:660FEFrM", | ||
| 1299 | rcpps_2 = "rmo:0F53rM", | 1356 | rcpps_2 = "rmo:0F53rM", |
| 1300 | rcpss_2 = "rro:F30F53rM|rx/od:", | 1357 | rcpss_2 = "rro:F30F53rM|rx/od:", |
| 1301 | rsqrtps_2 = "rmo:0F52rM", | 1358 | rsqrtps_2 = "rmo:0F52rM", |
| @@ -1413,6 +1470,327 @@ local map_op = { | |||
| 1413 | movntsd_2 = "xr/qo:nF20F2BRm", | 1470 | movntsd_2 = "xr/qo:nF20F2BRm", |
| 1414 | movntss_2 = "xr/do:F30F2BRm", | 1471 | movntss_2 = "xr/do:F30F2BRm", |
| 1415 | -- popcnt is also in SSE4.2 | 1472 | -- popcnt is also in SSE4.2 |
| 1473 | |||
| 1474 | -- AES-NI | ||
| 1475 | aesdec_2 = "rmo:660F38DErM", | ||
| 1476 | aesdeclast_2 = "rmo:660F38DFrM", | ||
| 1477 | aesenc_2 = "rmo:660F38DCrM", | ||
| 1478 | aesenclast_2 = "rmo:660F38DDrM", | ||
| 1479 | aesimc_2 = "rmo:660F38DBrM", | ||
| 1480 | aeskeygenassist_3 = "rmio:660F3ADFrMU", | ||
| 1481 | pclmulqdq_3 = "rmio:660F3A44rMU", | ||
| 1482 | |||
| 1483 | -- AVX FP ops | ||
| 1484 | vaddsubpd_3 = "rrmoy:660FVD0rM", | ||
| 1485 | vaddsubps_3 = "rrmoy:F20FVD0rM", | ||
| 1486 | vandpd_3 = "rrmoy:660FV54rM", | ||
| 1487 | vandps_3 = "rrmoy:0FV54rM", | ||
| 1488 | vandnpd_3 = "rrmoy:660FV55rM", | ||
| 1489 | vandnps_3 = "rrmoy:0FV55rM", | ||
| 1490 | vblendpd_4 = "rrmioy:660F3AV0DrMU", | ||
| 1491 | vblendps_4 = "rrmioy:660F3AV0CrMU", | ||
| 1492 | vblendvpd_4 = "rrmroy:660F3AV4BrMs", | ||
| 1493 | vblendvps_4 = "rrmroy:660F3AV4ArMs", | ||
| 1494 | vbroadcastf128_2 = "rx/yo:660F38u1ArM", | ||
| 1495 | vcmppd_4 = "rrmioy:660FVC2rMU", | ||
| 1496 | vcmpps_4 = "rrmioy:0FVC2rMU", | ||
| 1497 | vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:", | ||
| 1498 | vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:", | ||
| 1499 | vcomisd_2 = "rro:660Fu2FrM|rx/oq:", | ||
| 1500 | vcomiss_2 = "rro:0Fu2FrM|rx/od:", | ||
| 1501 | vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:", | ||
| 1502 | vcvtdq2ps_2 = "rmoy:0Fu5BrM", | ||
| 1503 | vcvtpd2dq_2 = "rmoy:F20FuE6rM", | ||
| 1504 | vcvtpd2ps_2 = "rmoy:660Fu5ArM", | ||
| 1505 | vcvtps2dq_2 = "rmoy:660Fu5BrM", | ||
| 1506 | vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:", | ||
| 1507 | vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:", | ||
| 1508 | vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:", | ||
| 1509 | vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM", | ||
| 1510 | vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM", | ||
| 1511 | vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:", | ||
| 1512 | vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:", | ||
| 1513 | vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM", | ||
| 1514 | vcvttps2dq_2 = "rmoy:F30Fu5BrM", | ||
| 1515 | vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:", | ||
| 1516 | vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:", | ||
| 1517 | vdppd_4 = "rrmio:660F3AV41rMU", | ||
| 1518 | vdpps_4 = "rrmioy:660F3AV40rMU", | ||
| 1519 | vextractf128_3 = "mri/oy:660F3AuL19RmU", | ||
| 1520 | vextractps_3 = "mri/do:660F3Au17RmU", | ||
| 1521 | vhaddpd_3 = "rrmoy:660FV7CrM", | ||
| 1522 | vhaddps_3 = "rrmoy:F20FV7CrM", | ||
| 1523 | vhsubpd_3 = "rrmoy:660FV7DrM", | ||
| 1524 | vhsubps_3 = "rrmoy:F20FV7DrM", | ||
| 1525 | vinsertf128_4 = "rrmi/yyo:660F3AV18rMU", | ||
| 1526 | vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:", | ||
| 1527 | vldmxcsr_1 = "xd:0FuAE2m", | ||
| 1528 | vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm", | ||
| 1529 | vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm", | ||
| 1530 | vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm", | ||
| 1531 | vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm", | ||
| 1532 | vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:", | ||
| 1533 | vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm", | ||
| 1534 | vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:", | ||
| 1535 | vmovhlps_3 = "rrro:0FV12rM", | ||
| 1536 | vmovhpd_2 = "xr/qo:660Fu17Rm", | ||
| 1537 | vmovhpd_3 = "rrx/ooq:660FV16rM", | ||
| 1538 | vmovhps_2 = "xr/qo:0Fu17Rm", | ||
| 1539 | vmovhps_3 = "rrx/ooq:0FV16rM", | ||
| 1540 | vmovlhps_3 = "rrro:0FV16rM", | ||
| 1541 | vmovlpd_2 = "xr/qo:660Fu13Rm", | ||
| 1542 | vmovlpd_3 = "rrx/ooq:660FV12rM", | ||
| 1543 | vmovlps_2 = "xr/qo:0Fu13Rm", | ||
| 1544 | vmovlps_3 = "rrx/ooq:0FV12rM", | ||
| 1545 | vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM", | ||
| 1546 | vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM", | ||
| 1547 | vmovntpd_2 = "xroy:660Fu2BRm", | ||
| 1548 | vmovntps_2 = "xroy:0Fu2BRm", | ||
| 1549 | vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm", | ||
| 1550 | vmovsd_3 = "rrro:F20FV10rM", | ||
| 1551 | vmovshdup_2 = "rmoy:F30Fu16rM", | ||
| 1552 | vmovsldup_2 = "rmoy:F30Fu12rM", | ||
| 1553 | vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm", | ||
| 1554 | vmovss_3 = "rrro:F30FV10rM", | ||
| 1555 | vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm", | ||
| 1556 | vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm", | ||
| 1557 | vorpd_3 = "rrmoy:660FV56rM", | ||
| 1558 | vorps_3 = "rrmoy:0FV56rM", | ||
| 1559 | vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU", | ||
| 1560 | vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU", | ||
| 1561 | vperm2f128_4 = "rrmiy:660F3AV06rMU", | ||
| 1562 | vptestpd_2 = "rmoy:660F38u0FrM", | ||
| 1563 | vptestps_2 = "rmoy:660F38u0ErM", | ||
| 1564 | vrcpps_2 = "rmoy:0Fu53rM", | ||
| 1565 | vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", | ||
| 1566 | vrsqrtps_2 = "rmoy:0Fu52rM", | ||
| 1567 | vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", | ||
| 1568 | vroundpd_3 = "rmioy:660F3Au09rMU", | ||
| 1569 | vroundps_3 = "rmioy:660F3Au08rMU", | ||
| 1570 | vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", | ||
| 1571 | vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", | ||
| 1572 | vshufpd_4 = "rrmioy:660FVC6rMU", | ||
| 1573 | vshufps_4 = "rrmioy:0FVC6rMU", | ||
| 1574 | vsqrtps_2 = "rmoy:0Fu51rM", | ||
| 1575 | vsqrtss_2 = "rro:F30Fu51rM|rx/od:", | ||
| 1576 | vsqrtpd_2 = "rmoy:660Fu51rM", | ||
| 1577 | vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:", | ||
| 1578 | vstmxcsr_1 = "xd:0FuAE3m", | ||
| 1579 | vucomisd_2 = "rro:660Fu2ErM|rx/oq:", | ||
| 1580 | vucomiss_2 = "rro:0Fu2ErM|rx/od:", | ||
| 1581 | vunpckhpd_3 = "rrmoy:660FV15rM", | ||
| 1582 | vunpckhps_3 = "rrmoy:0FV15rM", | ||
| 1583 | vunpcklpd_3 = "rrmoy:660FV14rM", | ||
| 1584 | vunpcklps_3 = "rrmoy:0FV14rM", | ||
| 1585 | vxorpd_3 = "rrmoy:660FV57rM", | ||
| 1586 | vxorps_3 = "rrmoy:0FV57rM", | ||
| 1587 | vzeroall_0 = "0FuL77", | ||
| 1588 | vzeroupper_0 = "0Fu77", | ||
| 1589 | |||
| 1590 | -- AVX2 FP ops | ||
| 1591 | vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:", | ||
| 1592 | vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:", | ||
| 1593 | -- *vgather* (!vsib) | ||
| 1594 | vpermpd_3 = "rmiy:660F3AuX01rMU", | ||
| 1595 | vpermps_3 = "rrmy:660F38V16rM", | ||
| 1596 | |||
| 1597 | -- AVX, AVX2 integer ops | ||
| 1598 | -- In general, xmm requires AVX, ymm requires AVX2. | ||
| 1599 | vaesdec_3 = "rrmo:660F38VDErM", | ||
| 1600 | vaesdeclast_3 = "rrmo:660F38VDFrM", | ||
| 1601 | vaesenc_3 = "rrmo:660F38VDCrM", | ||
| 1602 | vaesenclast_3 = "rrmo:660F38VDDrM", | ||
| 1603 | vaesimc_2 = "rmo:660F38uDBrM", | ||
| 1604 | vaeskeygenassist_3 = "rmio:660F3AuDFrMU", | ||
| 1605 | vlddqu_2 = "rxoy:F20FuF0rM", | ||
| 1606 | vmaskmovdqu_2 = "rro:660FuF7rM", | ||
| 1607 | vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", | ||
| 1608 | vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm", | ||
| 1609 | vmovntdq_2 = "xroy:660FuE7Rm", | ||
| 1610 | vmovntdqa_2 = "rxoy:660F38u2ArM", | ||
| 1611 | vmpsadbw_4 = "rrmioy:660F3AV42rMU", | ||
| 1612 | vpabsb_2 = "rmoy:660F38u1CrM", | ||
| 1613 | vpabsd_2 = "rmoy:660F38u1ErM", | ||
| 1614 | vpabsw_2 = "rmoy:660F38u1DrM", | ||
| 1615 | vpackusdw_3 = "rrmoy:660F38V2BrM", | ||
| 1616 | vpalignr_4 = "rrmioy:660F3AV0FrMU", | ||
| 1617 | vpblendvb_4 = "rrmroy:660F3AV4CrMs", | ||
| 1618 | vpblendw_4 = "rrmioy:660F3AV0ErMU", | ||
| 1619 | vpclmulqdq_4 = "rrmio:660F3AV44rMU", | ||
| 1620 | vpcmpeqq_3 = "rrmoy:660F38V29rM", | ||
| 1621 | vpcmpestri_3 = "rmio:660F3Au61rMU", | ||
| 1622 | vpcmpestrm_3 = "rmio:660F3Au60rMU", | ||
| 1623 | vpcmpgtq_3 = "rrmoy:660F38V37rM", | ||
| 1624 | vpcmpistri_3 = "rmio:660F3Au63rMU", | ||
| 1625 | vpcmpistrm_3 = "rmio:660F3Au62rMU", | ||
| 1626 | vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:", | ||
| 1627 | vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU", | ||
| 1628 | vpextrd_3 = "mri/do:660F3Au16RmU", | ||
| 1629 | vpextrq_3 = "mri/qo:660F3Au16RmU", | ||
| 1630 | vphaddw_3 = "rrmoy:660F38V01rM", | ||
| 1631 | vphaddd_3 = "rrmoy:660F38V02rM", | ||
| 1632 | vphaddsw_3 = "rrmoy:660F38V03rM", | ||
| 1633 | vphminposuw_2 = "rmo:660F38u41rM", | ||
| 1634 | vphsubw_3 = "rrmoy:660F38V05rM", | ||
| 1635 | vphsubd_3 = "rrmoy:660F38V06rM", | ||
| 1636 | vphsubsw_3 = "rrmoy:660F38V07rM", | ||
| 1637 | vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:", | ||
| 1638 | vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:", | ||
| 1639 | vpinsrd_4 = "rrmi/ood:660F3AV22rMU", | ||
| 1640 | vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU", | ||
| 1641 | vpmaddubsw_3 = "rrmoy:660F38V04rM", | ||
| 1642 | vpmaxsb_3 = "rrmoy:660F38V3CrM", | ||
| 1643 | vpmaxsd_3 = "rrmoy:660F38V3DrM", | ||
| 1644 | vpmaxuw_3 = "rrmoy:660F38V3ErM", | ||
| 1645 | vpmaxud_3 = "rrmoy:660F38V3FrM", | ||
| 1646 | vpminsb_3 = "rrmoy:660F38V38rM", | ||
| 1647 | vpminsd_3 = "rrmoy:660F38V39rM", | ||
| 1648 | vpminuw_3 = "rrmoy:660F38V3ArM", | ||
| 1649 | vpminud_3 = "rrmoy:660F38V3BrM", | ||
| 1650 | vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM", | ||
| 1651 | vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:", | ||
| 1652 | vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:", | ||
| 1653 | vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:", | ||
| 1654 | vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:", | ||
| 1655 | vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:", | ||
| 1656 | vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:", | ||
| 1657 | vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:", | ||
| 1658 | vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:", | ||
| 1659 | vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:", | ||
| 1660 | vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:", | ||
| 1661 | vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:", | ||
| 1662 | vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:", | ||
| 1663 | vpmuldq_3 = "rrmoy:660F38V28rM", | ||
| 1664 | vpmulhrsw_3 = "rrmoy:660F38V0BrM", | ||
| 1665 | vpmulld_3 = "rrmoy:660F38V40rM", | ||
| 1666 | vpshufb_3 = "rrmoy:660F38V00rM", | ||
| 1667 | vpshufd_3 = "rmioy:660Fu70rMU", | ||
| 1668 | vpshufhw_3 = "rmioy:F30Fu70rMU", | ||
| 1669 | vpshuflw_3 = "rmioy:F20Fu70rMU", | ||
| 1670 | vpsignb_3 = "rrmoy:660F38V08rM", | ||
| 1671 | vpsignw_3 = "rrmoy:660F38V09rM", | ||
| 1672 | vpsignd_3 = "rrmoy:660F38V0ArM", | ||
| 1673 | vpslldq_3 = "rrioy:660Fv737mU", | ||
| 1674 | vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU", | ||
| 1675 | vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU", | ||
| 1676 | vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU", | ||
| 1677 | vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU", | ||
| 1678 | vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU", | ||
| 1679 | vpsrldq_3 = "rrioy:660Fv733mU", | ||
| 1680 | vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU", | ||
| 1681 | vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU", | ||
| 1682 | vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU", | ||
| 1683 | vptest_2 = "rmoy:660F38u17rM", | ||
| 1684 | |||
| 1685 | -- AVX2 integer ops | ||
| 1686 | vbroadcasti128_2 = "rx/yo:660F38u5ArM", | ||
| 1687 | vinserti128_4 = "rrmi/yyo:660F3AV38rMU", | ||
| 1688 | vextracti128_3 = "mri/oy:660F3AuL39RmU", | ||
| 1689 | vpblendd_4 = "rrmioy:660F3AV02rMU", | ||
| 1690 | vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:", | ||
| 1691 | vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:", | ||
| 1692 | vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:", | ||
| 1693 | vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:", | ||
| 1694 | vpermd_3 = "rrmy:660F38V36rM", | ||
| 1695 | vpermq_3 = "rmiy:660F3AuX00rMU", | ||
| 1696 | -- *vpgather* (!vsib) | ||
| 1697 | vperm2i128_4 = "rrmiy:660F3AV46rMU", | ||
| 1698 | vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm", | ||
| 1699 | vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm", | ||
| 1700 | vpsllvd_3 = "rrmoy:660F38V47rM", | ||
| 1701 | vpsllvq_3 = "rrmoy:660F38VX47rM", | ||
| 1702 | vpsravd_3 = "rrmoy:660F38V46rM", | ||
| 1703 | vpsrlvd_3 = "rrmoy:660F38V45rM", | ||
| 1704 | vpsrlvq_3 = "rrmoy:660F38VX45rM", | ||
| 1705 | |||
| 1706 | -- Intel ADX | ||
| 1707 | adcx_2 = "rmqd:660F38F6rM", | ||
| 1708 | adox_2 = "rmqd:F30F38F6rM", | ||
| 1709 | |||
| 1710 | -- BMI1 | ||
| 1711 | andn_3 = "rrmqd:0F38VF2rM", | ||
| 1712 | bextr_3 = "rmrqd:0F38wF7rM", | ||
| 1713 | blsi_2 = "rmqd:0F38vF33m", | ||
| 1714 | blsmsk_2 = "rmqd:0F38vF32m", | ||
| 1715 | blsr_2 = "rmqd:0F38vF31m", | ||
| 1716 | tzcnt_2 = "rmqdw:F30FBCrM", | ||
| 1717 | |||
| 1718 | -- BMI2 | ||
| 1719 | bzhi_3 = "rmrqd:0F38wF5rM", | ||
| 1720 | mulx_3 = "rrmqd:F20F38VF6rM", | ||
| 1721 | pdep_3 = "rrmqd:F20F38VF5rM", | ||
| 1722 | pext_3 = "rrmqd:F30F38VF5rM", | ||
| 1723 | rorx_3 = "rmSqd:F20F3AuF0rMS", | ||
| 1724 | sarx_3 = "rmrqd:F30F38wF7rM", | ||
| 1725 | shrx_3 = "rmrqd:F20F38wF7rM", | ||
| 1726 | shlx_3 = "rmrqd:660F38wF7rM", | ||
| 1727 | |||
| 1728 | -- FMA3 | ||
| 1729 | vfmaddsub132pd_3 = "rrmoy:660F38VX96rM", | ||
| 1730 | vfmaddsub132ps_3 = "rrmoy:660F38V96rM", | ||
| 1731 | vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM", | ||
| 1732 | vfmaddsub213ps_3 = "rrmoy:660F38VA6rM", | ||
| 1733 | vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM", | ||
| 1734 | vfmaddsub231ps_3 = "rrmoy:660F38VB6rM", | ||
| 1735 | |||
| 1736 | vfmsubadd132pd_3 = "rrmoy:660F38VX97rM", | ||
| 1737 | vfmsubadd132ps_3 = "rrmoy:660F38V97rM", | ||
| 1738 | vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM", | ||
| 1739 | vfmsubadd213ps_3 = "rrmoy:660F38VA7rM", | ||
| 1740 | vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM", | ||
| 1741 | vfmsubadd231ps_3 = "rrmoy:660F38VB7rM", | ||
| 1742 | |||
| 1743 | vfmadd132pd_3 = "rrmoy:660F38VX98rM", | ||
| 1744 | vfmadd132ps_3 = "rrmoy:660F38V98rM", | ||
| 1745 | vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:", | ||
| 1746 | vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:", | ||
| 1747 | vfmadd213pd_3 = "rrmoy:660F38VXA8rM", | ||
| 1748 | vfmadd213ps_3 = "rrmoy:660F38VA8rM", | ||
| 1749 | vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:", | ||
| 1750 | vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:", | ||
| 1751 | vfmadd231pd_3 = "rrmoy:660F38VXB8rM", | ||
| 1752 | vfmadd231ps_3 = "rrmoy:660F38VB8rM", | ||
| 1753 | vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:", | ||
| 1754 | vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:", | ||
| 1755 | |||
| 1756 | vfmsub132pd_3 = "rrmoy:660F38VX9ArM", | ||
| 1757 | vfmsub132ps_3 = "rrmoy:660F38V9ArM", | ||
| 1758 | vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:", | ||
| 1759 | vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:", | ||
| 1760 | vfmsub213pd_3 = "rrmoy:660F38VXAArM", | ||
| 1761 | vfmsub213ps_3 = "rrmoy:660F38VAArM", | ||
| 1762 | vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:", | ||
| 1763 | vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:", | ||
| 1764 | vfmsub231pd_3 = "rrmoy:660F38VXBArM", | ||
| 1765 | vfmsub231ps_3 = "rrmoy:660F38VBArM", | ||
| 1766 | vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:", | ||
| 1767 | vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:", | ||
| 1768 | |||
| 1769 | vfnmadd132pd_3 = "rrmoy:660F38VX9CrM", | ||
| 1770 | vfnmadd132ps_3 = "rrmoy:660F38V9CrM", | ||
| 1771 | vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:", | ||
| 1772 | vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:", | ||
| 1773 | vfnmadd213pd_3 = "rrmoy:660F38VXACrM", | ||
| 1774 | vfnmadd213ps_3 = "rrmoy:660F38VACrM", | ||
| 1775 | vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:", | ||
| 1776 | vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:", | ||
| 1777 | vfnmadd231pd_3 = "rrmoy:660F38VXBCrM", | ||
| 1778 | vfnmadd231ps_3 = "rrmoy:660F38VBCrM", | ||
| 1779 | vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:", | ||
| 1780 | vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:", | ||
| 1781 | |||
| 1782 | vfnmsub132pd_3 = "rrmoy:660F38VX9ErM", | ||
| 1783 | vfnmsub132ps_3 = "rrmoy:660F38V9ErM", | ||
| 1784 | vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:", | ||
| 1785 | vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:", | ||
| 1786 | vfnmsub213pd_3 = "rrmoy:660F38VXAErM", | ||
| 1787 | vfnmsub213ps_3 = "rrmoy:660F38VAErM", | ||
| 1788 | vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:", | ||
| 1789 | vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:", | ||
| 1790 | vfnmsub231pd_3 = "rrmoy:660F38VXBErM", | ||
| 1791 | vfnmsub231ps_3 = "rrmoy:660F38VBErM", | ||
| 1792 | vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:", | ||
| 1793 | vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:", | ||
| 1416 | } | 1794 | } |
| 1417 | 1795 | ||
| 1418 | ------------------------------------------------------------------------------ | 1796 | ------------------------------------------------------------------------------ |
| @@ -1463,28 +1841,58 @@ for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do | |||
| 1463 | map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ | 1841 | map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ |
| 1464 | end | 1842 | end |
| 1465 | 1843 | ||
| 1466 | -- SSE FP arithmetic ops. | 1844 | -- SSE / AVX FP arithmetic ops. |
| 1467 | for name,n in pairs{ sqrt = 1, add = 8, mul = 9, | 1845 | for name,n in pairs{ sqrt = 1, add = 8, mul = 9, |
| 1468 | sub = 12, min = 13, div = 14, max = 15 } do | 1846 | sub = 12, min = 13, div = 14, max = 15 } do |
| 1469 | map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) | 1847 | map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) |
| 1470 | map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) | 1848 | map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) |
| 1471 | map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) | 1849 | map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) |
| 1472 | map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) | 1850 | map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) |
| 1851 | if n ~= 1 then | ||
| 1852 | map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n) | ||
| 1853 | map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n) | ||
| 1854 | map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n) | ||
| 1855 | map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n) | ||
| 1856 | end | ||
| 1857 | end | ||
| 1858 | |||
| 1859 | -- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf). | ||
| 1860 | for name,n in pairs{ | ||
| 1861 | paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4, | ||
| 1862 | paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B, | ||
| 1863 | packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC, | ||
| 1864 | paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0, | ||
| 1865 | pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76, | ||
| 1866 | pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66, | ||
| 1867 | pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE, | ||
| 1868 | pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA, | ||
| 1869 | pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5, | ||
| 1870 | pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8, | ||
| 1871 | psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8, | ||
| 1872 | psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9, | ||
| 1873 | punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A, | ||
| 1874 | punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61, | ||
| 1875 | punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF | ||
| 1876 | } do | ||
| 1877 | map_op[name.."_2"] = format("rmo:660F%02XrM", n) | ||
| 1878 | map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n) | ||
| 1473 | end | 1879 | end |
| 1474 | 1880 | ||
| 1475 | ------------------------------------------------------------------------------ | 1881 | ------------------------------------------------------------------------------ |
| 1476 | 1882 | ||
| 1883 | local map_vexarg = { u = false, v = 1, V = 2, w = 3 } | ||
| 1884 | |||
| 1477 | -- Process pattern string. | 1885 | -- Process pattern string. |
| 1478 | local function dopattern(pat, args, sz, op, needrex) | 1886 | local function dopattern(pat, args, sz, op, needrex) |
| 1479 | local digit, addin | 1887 | local digit, addin, vex |
| 1480 | local opcode = 0 | 1888 | local opcode = 0 |
| 1481 | local szov = sz | 1889 | local szov = sz |
| 1482 | local narg = 1 | 1890 | local narg = 1 |
| 1483 | local rex = 0 | 1891 | local rex = 0 |
| 1484 | 1892 | ||
| 1485 | -- Limit number of section buffer positions used by a single dasm_put(). | 1893 | -- Limit number of section buffer positions used by a single dasm_put(). |
| 1486 | -- A single opcode needs a maximum of 5 positions. | 1894 | -- A single opcode needs a maximum of 6 positions. |
| 1487 | if secpos+5 > maxsecpos then wflush() end | 1895 | if secpos+6 > maxsecpos then wflush() end |
| 1488 | 1896 | ||
| 1489 | -- Process each character. | 1897 | -- Process each character. |
| 1490 | for c in gmatch(pat.."|", ".") do | 1898 | for c in gmatch(pat.."|", ".") do |
| @@ -1498,6 +1906,8 @@ local function dopattern(pat, args, sz, op, needrex) | |||
| 1498 | szov = nil | 1906 | szov = nil |
| 1499 | elseif c == "X" then -- Force REX.W. | 1907 | elseif c == "X" then -- Force REX.W. |
| 1500 | rex = 8 | 1908 | rex = 8 |
| 1909 | elseif c == "L" then -- Force VEX.L. | ||
| 1910 | vex.l = true | ||
| 1501 | elseif c == "r" then -- Merge 1st operand regno. into opcode. | 1911 | elseif c == "r" then -- Merge 1st operand regno. into opcode. |
| 1502 | addin = args[1]; opcode = opcode + (addin.reg % 8) | 1912 | addin = args[1]; opcode = opcode + (addin.reg % 8) |
| 1503 | if narg < 2 then narg = 2 end | 1913 | if narg < 2 then narg = 2 end |
| @@ -1521,21 +1931,42 @@ local function dopattern(pat, args, sz, op, needrex) | |||
| 1521 | if t.xreg and t.xreg > 7 then rex = rex + 2 end | 1931 | if t.xreg and t.xreg > 7 then rex = rex + 2 end |
| 1522 | if s > 7 then rex = rex + 4 end | 1932 | if s > 7 then rex = rex + 4 end |
| 1523 | if needrex then rex = rex + 16 end | 1933 | if needrex then rex = rex + 16 end |
| 1524 | wputop(szov, opcode, rex); opcode = nil | 1934 | local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg) |
| 1935 | opcode = nil | ||
| 1525 | local imark = sub(pat, -1) -- Force a mark (ugly). | 1936 | local imark = sub(pat, -1) -- Force a mark (ugly). |
| 1526 | -- Put ModRM/SIB with regno/last digit as spare. | 1937 | -- Put ModRM/SIB with regno/last digit as spare. |
| 1527 | wputmrmsib(t, imark, s, addin and addin.vreg) | 1938 | wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk) |
| 1528 | addin = nil | 1939 | addin = nil |
| 1940 | elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix | ||
| 1941 | local b = band(opcode, 255); opcode = shr(opcode, 8) | ||
| 1942 | local m = 1 | ||
| 1943 | if b == 0x38 then m = 2 | ||
| 1944 | elseif b == 0x3a then m = 3 end | ||
| 1945 | if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end | ||
| 1946 | if b ~= 0x0f then | ||
| 1947 | werror("expected `0F', `0F38', or `0F3A' to precede `"..c.. | ||
| 1948 | "' in pattern `"..pat.."' for `"..op.."'") | ||
| 1949 | end | ||
| 1950 | local v = map_vexarg[c] | ||
| 1951 | if v then v = remove(args, v) end | ||
| 1952 | b = band(opcode, 255) | ||
| 1953 | local p = 0 | ||
| 1954 | if b == 0x66 then p = 1 | ||
| 1955 | elseif b == 0xf3 then p = 2 | ||
| 1956 | elseif b == 0xf2 then p = 3 end | ||
| 1957 | if p ~= 0 then opcode = shr(opcode, 8) end | ||
| 1958 | if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end | ||
| 1959 | vex = { m = m, p = p, v = v } | ||
| 1529 | else | 1960 | else |
| 1530 | if opcode then -- Flush opcode. | 1961 | if opcode then -- Flush opcode. |
| 1531 | if szov == "q" and rex == 0 then rex = rex + 8 end | 1962 | if szov == "q" and rex == 0 then rex = rex + 8 end |
| 1532 | if needrex then rex = rex + 16 end | 1963 | if needrex then rex = rex + 16 end |
| 1533 | if addin and addin.reg == -1 then | 1964 | if addin and addin.reg == -1 then |
| 1534 | wputop(szov, opcode - 7, rex) | 1965 | local psz, sk = wputop(szov, opcode - 7, rex, vex, true) |
| 1535 | waction("VREG", addin.vreg); wputxb(0) | 1966 | wvreg("opcode", addin.vreg, psz, sk) |
| 1536 | else | 1967 | else |
| 1537 | if addin and addin.reg > 7 then rex = rex + 1 end | 1968 | if addin and addin.reg > 7 then rex = rex + 1 end |
| 1538 | wputop(szov, opcode, rex) | 1969 | wputop(szov, opcode, rex, vex) |
| 1539 | end | 1970 | end |
| 1540 | opcode = nil | 1971 | opcode = nil |
| 1541 | end | 1972 | end |
| @@ -1549,7 +1980,7 @@ local function dopattern(pat, args, sz, op, needrex) | |||
| 1549 | local a = args[narg] | 1980 | local a = args[narg] |
| 1550 | narg = narg + 1 | 1981 | narg = narg + 1 |
| 1551 | local mode, imm = a.mode, a.imm | 1982 | local mode, imm = a.mode, a.imm |
| 1552 | if mode == "iJ" and not match("iIJ", c) then | 1983 | if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then |
| 1553 | werror("bad operand size for label") | 1984 | werror("bad operand size for label") |
| 1554 | end | 1985 | end |
| 1555 | if c == "S" then | 1986 | if c == "S" then |
| @@ -1572,6 +2003,14 @@ local function dopattern(pat, args, sz, op, needrex) | |||
| 1572 | else | 2003 | else |
| 1573 | wputlabel("REL_", imm, 2) | 2004 | wputlabel("REL_", imm, 2) |
| 1574 | end | 2005 | end |
| 2006 | elseif c == "s" then | ||
| 2007 | local reg = a.reg | ||
| 2008 | if reg < 0 then | ||
| 2009 | wputb(0) | ||
| 2010 | wvreg("imm.hi", a.vreg) | ||
| 2011 | else | ||
| 2012 | wputb(shl(reg, 4)) | ||
| 2013 | end | ||
| 1575 | else | 2014 | else |
| 1576 | werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") | 2015 | werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") |
| 1577 | end | 2016 | end |
| @@ -1648,11 +2087,14 @@ map_op[".template__"] = function(params, template, nparams) | |||
| 1648 | if pat == "" then pat = lastpat else lastpat = pat end | 2087 | if pat == "" then pat = lastpat else lastpat = pat end |
| 1649 | if matchtm(tm, args) then | 2088 | if matchtm(tm, args) then |
| 1650 | local prefix = sub(szm, 1, 1) | 2089 | local prefix = sub(szm, 1, 1) |
| 1651 | if prefix == "/" then -- Match both operand sizes. | 2090 | if prefix == "/" then -- Exactly match leading operand sizes. |
| 1652 | if args[1].opsize == sub(szm, 2, 2) and | 2091 | for i = #szm,1,-1 do |
| 1653 | args[2].opsize == sub(szm, 3, 3) then | 2092 | if i == 1 then |
| 1654 | dopattern(pat, args, sz, params.op, needrex) -- Process pattern. | 2093 | dopattern(pat, args, sz, params.op, needrex) -- Process pattern. |
| 1655 | return | 2094 | return |
| 2095 | elseif args[i-1].opsize ~= sub(szm, i, i) then | ||
| 2096 | break | ||
| 2097 | end | ||
| 1656 | end | 2098 | end |
| 1657 | else -- Match common operand size. | 2099 | else -- Match common operand size. |
| 1658 | local szp = sz | 2100 | local szp = sz |
| @@ -1717,8 +2159,8 @@ if x64 then | |||
| 1717 | rex = a.reg > 7 and 9 or 8 | 2159 | rex = a.reg > 7 and 9 or 8 |
| 1718 | end | 2160 | end |
| 1719 | end | 2161 | end |
| 1720 | wputop(sz, opcode, rex) | 2162 | local psz, sk = wputop(sz, opcode, rex, nil, vreg) |
| 1721 | if vreg then waction("VREG", vreg); wputxb(0) end | 2163 | wvreg("opcode", vreg, psz, sk) |
| 1722 | waction("IMM_D", format("(unsigned int)(%s)", op64)) | 2164 | waction("IMM_D", format("(unsigned int)(%s)", op64)) |
| 1723 | waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) | 2165 | waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) |
| 1724 | end | 2166 | end |
| @@ -1730,14 +2172,16 @@ end | |||
| 1730 | local function op_data(params) | 2172 | local function op_data(params) |
| 1731 | if not params then return "imm..." end | 2173 | if not params then return "imm..." end |
| 1732 | local sz = sub(params.op, 2, 2) | 2174 | local sz = sub(params.op, 2, 2) |
| 1733 | if sz == "a" then sz = addrsize end | 2175 | if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end |
| 1734 | for _,p in ipairs(params) do | 2176 | for _,p in ipairs(params) do |
| 1735 | local a = parseoperand(p) | 2177 | local a = parseoperand(p, sz == "q") |
| 1736 | if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then | 2178 | if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then |
| 1737 | werror("bad mode or size in `"..p.."'") | 2179 | werror("bad mode or size in `"..p.."'") |
| 1738 | end | 2180 | end |
| 1739 | if a.mode == "iJ" then | 2181 | if a.mode == "iJ" then |
| 1740 | wputlabel("IMM_", a.imm, 1) | 2182 | wputlabel("IMM_", a.imm, 1) |
| 2183 | elseif sz == "q" then | ||
| 2184 | wputqarg(a.imm) | ||
| 1741 | else | 2185 | else |
| 1742 | wputszarg(sz, a.imm) | 2186 | wputszarg(sz, a.imm) |
| 1743 | end | 2187 | end |
| @@ -1749,7 +2193,11 @@ map_op[".byte_*"] = op_data | |||
| 1749 | map_op[".sbyte_*"] = op_data | 2193 | map_op[".sbyte_*"] = op_data |
| 1750 | map_op[".word_*"] = op_data | 2194 | map_op[".word_*"] = op_data |
| 1751 | map_op[".dword_*"] = op_data | 2195 | map_op[".dword_*"] = op_data |
| 2196 | map_op[".qword_*"] = op_data | ||
| 1752 | map_op[".aword_*"] = op_data | 2197 | map_op[".aword_*"] = op_data |
| 2198 | map_op[".long_*"] = op_data | ||
| 2199 | map_op[".quad_*"] = op_data | ||
| 2200 | map_op[".addr_*"] = op_data | ||
| 1753 | 2201 | ||
| 1754 | ------------------------------------------------------------------------------ | 2202 | ------------------------------------------------------------------------------ |
| 1755 | 2203 | ||
