From 0acb23426a1d08aa23271b1acac4d05e10ed389d Mon Sep 17 00:00:00 2001
From: Mike Pall <mike>
Date: Thu, 10 Dec 2009 04:01:57 +0100
Subject: Add REX encoding to DynASM x64.

---
 dynasm/dasm_x86.lua | 77 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 45 insertions(+), 32 deletions(-)

(limited to 'dynasm')

diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
index b4eeb759..f508540e 100644
--- a/dynasm/dasm_x86.lua
+++ b/dynasm/dasm_x86.lua
@@ -455,22 +455,29 @@ local function wputszarg(sz, n)
 end
 
 -- Put multi-byte opcode with operand-size dependent modifications.
-local function wputop(sz, op)
+local function wputop(sz, op, rex)
   local r
   if sz == "w" then wputb(102) end
   -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
   if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
   if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end
   if op >= 65536 then r = op % 65536 wputb((op-r) / 65536) op = r end
-  if op >= 256 then r = op % 256 wputb((op-r) / 256) op = r end
+  if op >= 256 then
+    r = op % 256
+    local b = (op-r) / 256
+    if b == 15 and rex ~= 0 then wputb(64 + rex % 15); rex = 0 end
+    wputb(b)
+    op = r
+  end
+  if rex ~= 0 then wputb(64 + rex % 15) end
   if sz == "b" then op = op - 1 end
   wputb(op)
 end
 
 -- Put ModRM or SIB formatted byte.
 local function wputmodrm(m, s, rm, vs, vrm)
-  assert(m < 4 and s < 8 and rm < 8, "bad modrm operands")
-  wputb(64*m + 8*s + rm)
+  assert(m < 4 and s < 16 and rm < 16, "bad modrm operands")
+  wputb(64*m + 8*(s%8) + (rm%8))
 end
 
 -- Put ModRM/SIB plus optional displacement.
@@ -513,7 +520,7 @@ local function wputmrmsib(t, imark, s, vsreg)
 
   local m
   if tdisp == "number" then -- Check displacement size at assembly time.
-    if disp == 0 and reg ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
+    if disp == 0 and (reg%8) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
       if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
     elseif disp >= -128 and disp <= 127 then m = 1
     else m = 2 end
@@ -522,7 +529,7 @@ local function wputmrmsib(t, imark, s, vsreg)
   end
 
   -- Index register present or esp as base register: need SIB encoding.
-  if xreg or reg == 4 then
+  if xreg or (reg%8) == 4 then
     wputmodrm(m or 2, s, 4) -- ModRM.
     if m == nil or imark then waction("MARK") end
     if vsreg then waction("VREG", vsreg); wputxb(2) end
@@ -814,7 +821,7 @@ end
 -- (e.g. for FP memory access operations).
 --
 -- The operand size match string starts right after the mode match
--- characters and ends before the ":". "dwb" is assumed, if empty.
+-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
 -- The effective data size of the operation is matched against this list.
 --
 -- If only the regular "b", "w", "d", "q", "t" operand sizes are
@@ -836,7 +843,7 @@ end
 -- Every character after the ":" is part of the pattern string:
 --   Hex chars are accumulated to form the opcode (left to right).
 --   "n"       disables the standard opcode mods
---             (otherwise: -1 for "b", o16 prefix for "w")
+--             (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
 --   "r"/"R"   adds the reg. number from the 1st/2nd operand to the opcode.
 --   "m"/"M"   generates ModRM/SIB from the 1st/2nd operand.
 --             The spare 3 bits are either filled with the last hex digit or
@@ -1040,15 +1047,15 @@ local map_op = {
   cpuid_0 =	"0FA2", -- P1+
 
   -- floating point ops
-  fst_1 =	"ff:DDD0r|xd:D92m|xq:DD2m",
-  fstp_1 =	"ff:DDD8r|xd:D93m|xq:DD3m|xt:DB7m",
-  fld_1 =	"ff:D9C0r|xd:D90m|xq:DD0m|xt:DB5m",
+  fst_1 =	"ff:DDD0r|xd:D92m|xq:nDD2m",
+  fstp_1 =	"ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
+  fld_1 =	"ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",
 
   fpop_0 =	"DDD8", -- Alias for fstp st0.
 
   fist_1 =	"xw:nDF2m|xd:DB2m",
-  fistp_1 =	"xw:nDF3m|xd:DB3m|xq:DF7m",
-  fild_1 =	"xw:nDF0m|xd:DB0m|xq:DF5m",
+  fistp_1 =	"xw:nDF3m|xd:DB3m|xq:nDF7m",
+  fild_1 =	"xw:nDF0m|xd:DB0m|xq:nDF5m",
 
   fxch_0 =	"D9C9",
   fxch_1 =	"ff:D9C8r",
@@ -1154,19 +1161,19 @@ local map_op = {
   movdqa_2 =	"rmo:660F6FrM|mro:660F7FRm",
   movdqu_2 =	"rmo:F30F6FrM|mro:F30F7FRm",
   movhlps_2 =	"rro:0F12rM",
-  movhpd_2 =	"rx/oq:660F16rM|xr/qo:660F17Rm",
-  movhps_2 =	"rx/oq:0F16rM|xr/qo:0F17Rm",
+  movhpd_2 =	"rx/oq:660F16rM|xr/qo:n660F17Rm",
+  movhps_2 =	"rx/oq:0F16rM|xr/qo:n0F17Rm",
   movlhps_2 =	"rro:0F16rM",
-  movlpd_2 =	"rx/oq:660F12rM|xr/qo:660F13Rm",
-  movlps_2 =	"rx/oq:0F12rM|xr/qo:0F13Rm",
+  movlpd_2 =	"rx/oq:660F12rM|xr/qo:n660F13Rm",
+  movlps_2 =	"rx/oq:0F12rM|xr/qo:n0F13Rm",
   movmskpd_2 =	"rr/do:660F50rM",
   movmskps_2 =	"rr/do:0F50rM",
   movntdq_2 =	"xro:660FE7Rm",
   movnti_2 =	"xrd:0FC3Rm",
   movntpd_2 =	"xro:660F2BRm",
   movntps_2 =	"xro:0F2BRm",
-  movq_2 =	"rro:F30F7ErM|rx/oq:|xr/qo:660FD6Rm",
-  movsd_2 =	"rro:F20F10rM|rx/oq:|xr/qo:F20F11Rm",
+  movq_2 =	"rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
+  movsd_2 =	"rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
   movss_2 =	"rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
   movupd_2 =	"rmo:660F10rM|mro:660F11Rm",
   movups_2 =	"rmo:0F10rM|mro:0F11Rm",
@@ -1260,7 +1267,7 @@ local map_op = {
   xorps_2 =	"rmo:0F57rM",
 
   -- SSE3 ops
-  fisttp_1 =	"xw:nDF1m|xd:DB1m|xq:DD1m",
+  fisttp_1 =	"xw:nDF1m|xd:DB1m|xq:nDD1m",
   addsubpd_2 =	"rmo:660FD0rM",
   addsubps_2 =	"rmo:F20FD0rM",
   haddpd_2 =	"rmo:660F7CrM",
@@ -1356,7 +1363,7 @@ local map_op = {
   insertq_2 =	"rro:F20F79rM",
   insertq_4 =	"rriio:F20F78rMUU",
   lzcnt_2 =	"rmdw:F30FBDrM",
-  movntsd_2 =	"xr/qo:F20F2BRm",
+  movntsd_2 =	"xr/qo:nF20F2BRm",
   movntss_2 =	"xr/do:F30F2BRm",
   -- popcnt is also in SSE4.2
 }
@@ -1391,7 +1398,7 @@ for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
   local nc = 192 + n * 8
   local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8))
   local fn = "f"..name
-  map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:DC%Xm", nc, n, n)
+  map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n)
   if n == 2 or n == 3 then
     map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:DC%XM", nc, n, n)
   else
@@ -1427,6 +1434,7 @@ local function dopattern(pat, args, sz, op)
   local opcode = 0
   local szov = sz
   local narg = 1
+  local rex = 0
 
   -- Limit number of section buffer positions used by a single dasm_put().
   -- A single opcode needs a maximum of 2 positions. !x64
@@ -1443,34 +1451,39 @@ local function dopattern(pat, args, sz, op)
     elseif c == "n" then	-- Disable operand size mods for opcode.
       szov = nil
     elseif c == "r" then	-- Merge 1st operand regno. into opcode.
-      addin = args[1]; opcode = opcode + addin.reg
+      addin = args[1]; opcode = opcode + (addin.reg % 8)
       if narg < 2 then narg = 2 end
     elseif c == "R" then	-- Merge 2nd operand regno. into opcode.
-      addin = args[2]; opcode = opcode + addin.reg
+      addin = args[2]; opcode = opcode + (addin.reg % 8)
       narg = 3
     elseif c == "m" or c == "M" then	-- Encode ModRM/SIB.
       local s
       if addin then
 	s = addin.reg
-	opcode = opcode - s	-- Undo regno opcode merge.
+	opcode = opcode - (s%8)	-- Undo regno opcode merge.
       else
 	s = opcode % 16		-- Undo last digit.
 	opcode = (opcode - s) / 16
       end
-      wputop(szov, opcode); opcode = nil
-      local imark = (sub(pat, -1) == "I") -- Force a mark (ugly).
-      -- Put ModRM/SIB with regno/last digit as spare.
       local nn = c == "m" and 1 or 2
-      wputmrmsib(args[nn], imark, s, addin and addin.vreg)
+      local t = args[nn]
       if narg <= nn then narg = nn + 1 end
+      local rex = szov == "q" and 8 or 0
+      if t.reg and t.reg > 7 then rex = rex + 1 end
+      if t.xreg and t.xreg > 7 then rex = rex + 2 end
+      if s > 7 then rex = rex + 4 end
+      wputop(szov, opcode, rex); opcode = nil
+      local imark = (sub(pat, -1) == "I") -- Force a mark (ugly).
+      -- Put ModRM/SIB with regno/last digit as spare.
+      wputmrmsib(t, imark, s, addin and addin.vreg)
       addin = nil
     else
       if opcode then -- Flush opcode.
 	if addin and addin.reg == -1 then
-	  wputop(szov, opcode + 1)
+	  wputop(szov, opcode + 1, 0)
 	  waction("VREG", addin.vreg); wputxb(0)
 	else
-	  wputop(szov, opcode)
+	  wputop(szov, opcode, (addin and addin.reg > 7) and 4 or 0)
 	end
 	opcode = nil
       end
@@ -1583,7 +1596,7 @@ map_op[".template__"] = function(params, template, nparams)
 	end
       else -- Match common operand size.
 	local szp = sz
-	if szm == "" then szm = "dwb" end -- Default size match.
+	if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes.
 	if prefix == "1" then szp = args[1].opsize; szmix = nil
 	elseif prefix == "2" then szp = args[2].opsize; szmix = nil end
 	if not szmix and (prefix == "." or match(szm, szp or "#")) then
-- 
cgit v1.2.3-55-g6feb