aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2009-12-10 04:01:57 +0100
committerMike Pall <mike>2009-12-10 04:01:57 +0100
commit0acb23426a1d08aa23271b1acac4d05e10ed389d (patch)
tree81a0f094ca2c40edaa6eed742a91cf4e105e5cb4
parentfe36e4ac59f3408202be21fd94c4c540f23b1a6f (diff)
downloadluajit-0acb23426a1d08aa23271b1acac4d05e10ed389d.tar.gz
luajit-0acb23426a1d08aa23271b1acac4d05e10ed389d.tar.bz2
luajit-0acb23426a1d08aa23271b1acac4d05e10ed389d.zip
Add REX encoding to DynASM x64.
-rw-r--r--dynasm/dasm_x86.lua77
1 files changed, 45 insertions, 32 deletions
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
index b4eeb759..f508540e 100644
--- a/dynasm/dasm_x86.lua
+++ b/dynasm/dasm_x86.lua
@@ -455,22 +455,29 @@ local function wputszarg(sz, n)
455end 455end
456 456
457-- Put multi-byte opcode with operand-size dependent modifications. 457-- Put multi-byte opcode with operand-size dependent modifications.
458local function wputop(sz, op) 458local function wputop(sz, op, rex)
459 local r 459 local r
460 if sz == "w" then wputb(102) end 460 if sz == "w" then wputb(102) end
461 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] 461 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
462 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end 462 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
463 if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end 463 if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end
464 if op >= 65536 then r = op % 65536 wputb((op-r) / 65536) op = r end 464 if op >= 65536 then r = op % 65536 wputb((op-r) / 65536) op = r end
465 if op >= 256 then r = op % 256 wputb((op-r) / 256) op = r end 465 if op >= 256 then
466 r = op % 256
467 local b = (op-r) / 256
468 if b == 15 and rex ~= 0 then wputb(64 + rex % 15); rex = 0 end
469 wputb(b)
470 op = r
471 end
472 if rex ~= 0 then wputb(64 + rex % 15) end
466 if sz == "b" then op = op - 1 end 473 if sz == "b" then op = op - 1 end
467 wputb(op) 474 wputb(op)
468end 475end
469 476
470-- Put ModRM or SIB formatted byte. 477-- Put ModRM or SIB formatted byte.
471local function wputmodrm(m, s, rm, vs, vrm) 478local function wputmodrm(m, s, rm, vs, vrm)
472 assert(m < 4 and s < 8 and rm < 8, "bad modrm operands") 479 assert(m < 4 and s < 16 and rm < 16, "bad modrm operands")
473 wputb(64*m + 8*s + rm) 480 wputb(64*m + 8*(s%8) + (rm%8))
474end 481end
475 482
476-- Put ModRM/SIB plus optional displacement. 483-- Put ModRM/SIB plus optional displacement.
@@ -513,7 +520,7 @@ local function wputmrmsib(t, imark, s, vsreg)
513 520
514 local m 521 local m
515 if tdisp == "number" then -- Check displacement size at assembly time. 522 if tdisp == "number" then -- Check displacement size at assembly time.
516 if disp == 0 and reg ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) 523 if disp == 0 and (reg%8) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
517 if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] 524 if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
518 elseif disp >= -128 and disp <= 127 then m = 1 525 elseif disp >= -128 and disp <= 127 then m = 1
519 else m = 2 end 526 else m = 2 end
@@ -522,7 +529,7 @@ local function wputmrmsib(t, imark, s, vsreg)
522 end 529 end
523 530
524 -- Index register present or esp as base register: need SIB encoding. 531 -- Index register present or esp as base register: need SIB encoding.
525 if xreg or reg == 4 then 532 if xreg or (reg%8) == 4 then
526 wputmodrm(m or 2, s, 4) -- ModRM. 533 wputmodrm(m or 2, s, 4) -- ModRM.
527 if m == nil or imark then waction("MARK") end 534 if m == nil or imark then waction("MARK") end
528 if vsreg then waction("VREG", vsreg); wputxb(2) end 535 if vsreg then waction("VREG", vsreg); wputxb(2) end
@@ -814,7 +821,7 @@ end
814-- (e.g. for FP memory access operations). 821-- (e.g. for FP memory access operations).
815-- 822--
816-- The operand size match string starts right after the mode match 823-- The operand size match string starts right after the mode match
817-- characters and ends before the ":". "dwb" is assumed, if empty. 824-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
818-- The effective data size of the operation is matched against this list. 825-- The effective data size of the operation is matched against this list.
819-- 826--
820-- If only the regular "b", "w", "d", "q", "t" operand sizes are 827-- If only the regular "b", "w", "d", "q", "t" operand sizes are
@@ -836,7 +843,7 @@ end
836-- Every character after the ":" is part of the pattern string: 843-- Every character after the ":" is part of the pattern string:
837-- Hex chars are accumulated to form the opcode (left to right). 844-- Hex chars are accumulated to form the opcode (left to right).
838-- "n" disables the standard opcode mods 845-- "n" disables the standard opcode mods
839-- (otherwise: -1 for "b", o16 prefix for "w") 846-- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
840-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. 847-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode.
841-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. 848-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
842-- The spare 3 bits are either filled with the last hex digit or 849-- The spare 3 bits are either filled with the last hex digit or
@@ -1040,15 +1047,15 @@ local map_op = {
1040 cpuid_0 = "0FA2", -- P1+ 1047 cpuid_0 = "0FA2", -- P1+
1041 1048
1042 -- floating point ops 1049 -- floating point ops
1043 fst_1 = "ff:DDD0r|xd:D92m|xq:DD2m", 1050 fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m",
1044 fstp_1 = "ff:DDD8r|xd:D93m|xq:DD3m|xt:DB7m", 1051 fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
1045 fld_1 = "ff:D9C0r|xd:D90m|xq:DD0m|xt:DB5m", 1052 fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",
1046 1053
1047 fpop_0 = "DDD8", -- Alias for fstp st0. 1054 fpop_0 = "DDD8", -- Alias for fstp st0.
1048 1055
1049 fist_1 = "xw:nDF2m|xd:DB2m", 1056 fist_1 = "xw:nDF2m|xd:DB2m",
1050 fistp_1 = "xw:nDF3m|xd:DB3m|xq:DF7m", 1057 fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m",
1051 fild_1 = "xw:nDF0m|xd:DB0m|xq:DF5m", 1058 fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m",
1052 1059
1053 fxch_0 = "D9C9", 1060 fxch_0 = "D9C9",
1054 fxch_1 = "ff:D9C8r", 1061 fxch_1 = "ff:D9C8r",
@@ -1154,19 +1161,19 @@ local map_op = {
1154 movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", 1161 movdqa_2 = "rmo:660F6FrM|mro:660F7FRm",
1155 movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", 1162 movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm",
1156 movhlps_2 = "rro:0F12rM", 1163 movhlps_2 = "rro:0F12rM",
1157 movhpd_2 = "rx/oq:660F16rM|xr/qo:660F17Rm", 1164 movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm",
1158 movhps_2 = "rx/oq:0F16rM|xr/qo:0F17Rm", 1165 movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm",
1159 movlhps_2 = "rro:0F16rM", 1166 movlhps_2 = "rro:0F16rM",
1160 movlpd_2 = "rx/oq:660F12rM|xr/qo:660F13Rm", 1167 movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm",
1161 movlps_2 = "rx/oq:0F12rM|xr/qo:0F13Rm", 1168 movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm",
1162 movmskpd_2 = "rr/do:660F50rM", 1169 movmskpd_2 = "rr/do:660F50rM",
1163 movmskps_2 = "rr/do:0F50rM", 1170 movmskps_2 = "rr/do:0F50rM",
1164 movntdq_2 = "xro:660FE7Rm", 1171 movntdq_2 = "xro:660FE7Rm",
1165 movnti_2 = "xrd:0FC3Rm", 1172 movnti_2 = "xrd:0FC3Rm",
1166 movntpd_2 = "xro:660F2BRm", 1173 movntpd_2 = "xro:660F2BRm",
1167 movntps_2 = "xro:0F2BRm", 1174 movntps_2 = "xro:0F2BRm",
1168 movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:660FD6Rm", 1175 movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
1169 movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:F20F11Rm", 1176 movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
1170 movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", 1177 movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
1171 movupd_2 = "rmo:660F10rM|mro:660F11Rm", 1178 movupd_2 = "rmo:660F10rM|mro:660F11Rm",
1172 movups_2 = "rmo:0F10rM|mro:0F11Rm", 1179 movups_2 = "rmo:0F10rM|mro:0F11Rm",
@@ -1260,7 +1267,7 @@ local map_op = {
1260 xorps_2 = "rmo:0F57rM", 1267 xorps_2 = "rmo:0F57rM",
1261 1268
1262 -- SSE3 ops 1269 -- SSE3 ops
1263 fisttp_1 = "xw:nDF1m|xd:DB1m|xq:DD1m", 1270 fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m",
1264 addsubpd_2 = "rmo:660FD0rM", 1271 addsubpd_2 = "rmo:660FD0rM",
1265 addsubps_2 = "rmo:F20FD0rM", 1272 addsubps_2 = "rmo:F20FD0rM",
1266 haddpd_2 = "rmo:660F7CrM", 1273 haddpd_2 = "rmo:660F7CrM",
@@ -1356,7 +1363,7 @@ local map_op = {
1356 insertq_2 = "rro:F20F79rM", 1363 insertq_2 = "rro:F20F79rM",
1357 insertq_4 = "rriio:F20F78rMUU", 1364 insertq_4 = "rriio:F20F78rMUU",
1358 lzcnt_2 = "rmdw:F30FBDrM", 1365 lzcnt_2 = "rmdw:F30FBDrM",
1359 movntsd_2 = "xr/qo:F20F2BRm", 1366 movntsd_2 = "xr/qo:nF20F2BRm",
1360 movntss_2 = "xr/do:F30F2BRm", 1367 movntss_2 = "xr/do:F30F2BRm",
1361 -- popcnt is also in SSE4.2 1368 -- popcnt is also in SSE4.2
1362} 1369}
@@ -1391,7 +1398,7 @@ for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
1391 local nc = 192 + n * 8 1398 local nc = 192 + n * 8
1392 local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) 1399 local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8))
1393 local fn = "f"..name 1400 local fn = "f"..name
1394 map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:DC%Xm", nc, n, n) 1401 map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n)
1395 if n == 2 or n == 3 then 1402 if n == 2 or n == 3 then
1396 map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:DC%XM", nc, n, n) 1403 map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:DC%XM", nc, n, n)
1397 else 1404 else
@@ -1427,6 +1434,7 @@ local function dopattern(pat, args, sz, op)
1427 local opcode = 0 1434 local opcode = 0
1428 local szov = sz 1435 local szov = sz
1429 local narg = 1 1436 local narg = 1
1437 local rex = 0
1430 1438
1431 -- Limit number of section buffer positions used by a single dasm_put(). 1439 -- Limit number of section buffer positions used by a single dasm_put().
1432 -- A single opcode needs a maximum of 2 positions. !x64 1440 -- A single opcode needs a maximum of 2 positions. !x64
@@ -1443,34 +1451,39 @@ local function dopattern(pat, args, sz, op)
1443 elseif c == "n" then -- Disable operand size mods for opcode. 1451 elseif c == "n" then -- Disable operand size mods for opcode.
1444 szov = nil 1452 szov = nil
1445 elseif c == "r" then -- Merge 1st operand regno. into opcode. 1453 elseif c == "r" then -- Merge 1st operand regno. into opcode.
1446 addin = args[1]; opcode = opcode + addin.reg 1454 addin = args[1]; opcode = opcode + (addin.reg % 8)
1447 if narg < 2 then narg = 2 end 1455 if narg < 2 then narg = 2 end
1448 elseif c == "R" then -- Merge 2nd operand regno. into opcode. 1456 elseif c == "R" then -- Merge 2nd operand regno. into opcode.
1449 addin = args[2]; opcode = opcode + addin.reg 1457 addin = args[2]; opcode = opcode + (addin.reg % 8)
1450 narg = 3 1458 narg = 3
1451 elseif c == "m" or c == "M" then -- Encode ModRM/SIB. 1459 elseif c == "m" or c == "M" then -- Encode ModRM/SIB.
1452 local s 1460 local s
1453 if addin then 1461 if addin then
1454 s = addin.reg 1462 s = addin.reg
1455 opcode = opcode - s -- Undo regno opcode merge. 1463 opcode = opcode - (s%8) -- Undo regno opcode merge.
1456 else 1464 else
1457 s = opcode % 16 -- Undo last digit. 1465 s = opcode % 16 -- Undo last digit.
1458 opcode = (opcode - s) / 16 1466 opcode = (opcode - s) / 16
1459 end 1467 end
1460 wputop(szov, opcode); opcode = nil
1461 local imark = (sub(pat, -1) == "I") -- Force a mark (ugly).
1462 -- Put ModRM/SIB with regno/last digit as spare.
1463 local nn = c == "m" and 1 or 2 1468 local nn = c == "m" and 1 or 2
1464 wputmrmsib(args[nn], imark, s, addin and addin.vreg) 1469 local t = args[nn]
1465 if narg <= nn then narg = nn + 1 end 1470 if narg <= nn then narg = nn + 1 end
1471 local rex = szov == "q" and 8 or 0
1472 if t.reg and t.reg > 7 then rex = rex + 1 end
1473 if t.xreg and t.xreg > 7 then rex = rex + 2 end
1474 if s > 7 then rex = rex + 4 end
1475 wputop(szov, opcode, rex); opcode = nil
1476 local imark = (sub(pat, -1) == "I") -- Force a mark (ugly).
1477 -- Put ModRM/SIB with regno/last digit as spare.
1478 wputmrmsib(t, imark, s, addin and addin.vreg)
1466 addin = nil 1479 addin = nil
1467 else 1480 else
1468 if opcode then -- Flush opcode. 1481 if opcode then -- Flush opcode.
1469 if addin and addin.reg == -1 then 1482 if addin and addin.reg == -1 then
1470 wputop(szov, opcode + 1) 1483 wputop(szov, opcode + 1, 0)
1471 waction("VREG", addin.vreg); wputxb(0) 1484 waction("VREG", addin.vreg); wputxb(0)
1472 else 1485 else
1473 wputop(szov, opcode) 1486 wputop(szov, opcode, (addin and addin.reg > 7) and 4 or 0)
1474 end 1487 end
1475 opcode = nil 1488 opcode = nil
1476 end 1489 end
@@ -1583,7 +1596,7 @@ map_op[".template__"] = function(params, template, nparams)
1583 end 1596 end
1584 else -- Match common operand size. 1597 else -- Match common operand size.
1585 local szp = sz 1598 local szp = sz
1586 if szm == "" then szm = "dwb" end -- Default size match. 1599 if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes.
1587 if prefix == "1" then szp = args[1].opsize; szmix = nil 1600 if prefix == "1" then szp = args[1].opsize; szmix = nil
1588 elseif prefix == "2" then szp = args[2].opsize; szmix = nil end 1601 elseif prefix == "2" then szp = args[2].opsize; szmix = nil end
1589 if not szmix and (prefix == "." or match(szm, szp or "#")) then 1602 if not szmix and (prefix == "." or match(szm, szp or "#")) then