diff options
author | Mike Pall <mike> | 2009-12-10 04:01:57 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2009-12-10 04:01:57 +0100 |
commit | 0acb23426a1d08aa23271b1acac4d05e10ed389d (patch) | |
tree | 81a0f094ca2c40edaa6eed742a91cf4e105e5cb4 | |
parent | fe36e4ac59f3408202be21fd94c4c540f23b1a6f (diff) | |
download | luajit-0acb23426a1d08aa23271b1acac4d05e10ed389d.tar.gz luajit-0acb23426a1d08aa23271b1acac4d05e10ed389d.tar.bz2 luajit-0acb23426a1d08aa23271b1acac4d05e10ed389d.zip |
Add REX encoding to DynASM x64.
-rw-r--r-- | dynasm/dasm_x86.lua | 77 |
1 files changed, 45 insertions, 32 deletions
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua index b4eeb759..f508540e 100644 --- a/dynasm/dasm_x86.lua +++ b/dynasm/dasm_x86.lua | |||
@@ -455,22 +455,29 @@ local function wputszarg(sz, n) | |||
455 | end | 455 | end |
456 | 456 | ||
457 | -- Put multi-byte opcode with operand-size dependent modifications. | 457 | -- Put multi-byte opcode with operand-size dependent modifications. |
458 | local function wputop(sz, op) | 458 | local function wputop(sz, op, rex) |
459 | local r | 459 | local r |
460 | if sz == "w" then wputb(102) end | 460 | if sz == "w" then wputb(102) end |
461 | -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] | 461 | -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] |
462 | if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end | 462 | if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end |
463 | if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end | 463 | if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end |
464 | if op >= 65536 then r = op % 65536 wputb((op-r) / 65536) op = r end | 464 | if op >= 65536 then r = op % 65536 wputb((op-r) / 65536) op = r end |
465 | if op >= 256 then r = op % 256 wputb((op-r) / 256) op = r end | 465 | if op >= 256 then |
466 | r = op % 256 | ||
467 | local b = (op-r) / 256 | ||
468 | if b == 15 and rex ~= 0 then wputb(64 + rex % 15); rex = 0 end | ||
469 | wputb(b) | ||
470 | op = r | ||
471 | end | ||
472 | if rex ~= 0 then wputb(64 + rex % 15) end | ||
466 | if sz == "b" then op = op - 1 end | 473 | if sz == "b" then op = op - 1 end |
467 | wputb(op) | 474 | wputb(op) |
468 | end | 475 | end |
469 | 476 | ||
470 | -- Put ModRM or SIB formatted byte. | 477 | -- Put ModRM or SIB formatted byte. |
471 | local function wputmodrm(m, s, rm, vs, vrm) | 478 | local function wputmodrm(m, s, rm, vs, vrm) |
472 | assert(m < 4 and s < 8 and rm < 8, "bad modrm operands") | 479 | assert(m < 4 and s < 16 and rm < 16, "bad modrm operands") |
473 | wputb(64*m + 8*s + rm) | 480 | wputb(64*m + 8*(s%8) + (rm%8)) |
474 | end | 481 | end |
475 | 482 | ||
476 | -- Put ModRM/SIB plus optional displacement. | 483 | -- Put ModRM/SIB plus optional displacement. |
@@ -513,7 +520,7 @@ local function wputmrmsib(t, imark, s, vsreg) | |||
513 | 520 | ||
514 | local m | 521 | local m |
515 | if tdisp == "number" then -- Check displacement size at assembly time. | 522 | if tdisp == "number" then -- Check displacement size at assembly time. |
516 | if disp == 0 and reg ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) | 523 | if disp == 0 and (reg%8) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) |
517 | if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] | 524 | if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] |
518 | elseif disp >= -128 and disp <= 127 then m = 1 | 525 | elseif disp >= -128 and disp <= 127 then m = 1 |
519 | else m = 2 end | 526 | else m = 2 end |
@@ -522,7 +529,7 @@ local function wputmrmsib(t, imark, s, vsreg) | |||
522 | end | 529 | end |
523 | 530 | ||
524 | -- Index register present or esp as base register: need SIB encoding. | 531 | -- Index register present or esp as base register: need SIB encoding. |
525 | if xreg or reg == 4 then | 532 | if xreg or (reg%8) == 4 then |
526 | wputmodrm(m or 2, s, 4) -- ModRM. | 533 | wputmodrm(m or 2, s, 4) -- ModRM. |
527 | if m == nil or imark then waction("MARK") end | 534 | if m == nil or imark then waction("MARK") end |
528 | if vsreg then waction("VREG", vsreg); wputxb(2) end | 535 | if vsreg then waction("VREG", vsreg); wputxb(2) end |
@@ -814,7 +821,7 @@ end | |||
814 | -- (e.g. for FP memory access operations). | 821 | -- (e.g. for FP memory access operations). |
815 | -- | 822 | -- |
816 | -- The operand size match string starts right after the mode match | 823 | -- The operand size match string starts right after the mode match |
817 | -- characters and ends before the ":". "dwb" is assumed, if empty. | 824 | -- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty. |
818 | -- The effective data size of the operation is matched against this list. | 825 | -- The effective data size of the operation is matched against this list. |
819 | -- | 826 | -- |
820 | -- If only the regular "b", "w", "d", "q", "t" operand sizes are | 827 | -- If only the regular "b", "w", "d", "q", "t" operand sizes are |
@@ -836,7 +843,7 @@ end | |||
836 | -- Every character after the ":" is part of the pattern string: | 843 | -- Every character after the ":" is part of the pattern string: |
837 | -- Hex chars are accumulated to form the opcode (left to right). | 844 | -- Hex chars are accumulated to form the opcode (left to right). |
838 | -- "n" disables the standard opcode mods | 845 | -- "n" disables the standard opcode mods |
839 | -- (otherwise: -1 for "b", o16 prefix for "w") | 846 | -- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q") |
840 | -- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. | 847 | -- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. |
841 | -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. | 848 | -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. |
842 | -- The spare 3 bits are either filled with the last hex digit or | 849 | -- The spare 3 bits are either filled with the last hex digit or |
@@ -1040,15 +1047,15 @@ local map_op = { | |||
1040 | cpuid_0 = "0FA2", -- P1+ | 1047 | cpuid_0 = "0FA2", -- P1+ |
1041 | 1048 | ||
1042 | -- floating point ops | 1049 | -- floating point ops |
1043 | fst_1 = "ff:DDD0r|xd:D92m|xq:DD2m", | 1050 | fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m", |
1044 | fstp_1 = "ff:DDD8r|xd:D93m|xq:DD3m|xt:DB7m", | 1051 | fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m", |
1045 | fld_1 = "ff:D9C0r|xd:D90m|xq:DD0m|xt:DB5m", | 1052 | fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m", |
1046 | 1053 | ||
1047 | fpop_0 = "DDD8", -- Alias for fstp st0. | 1054 | fpop_0 = "DDD8", -- Alias for fstp st0. |
1048 | 1055 | ||
1049 | fist_1 = "xw:nDF2m|xd:DB2m", | 1056 | fist_1 = "xw:nDF2m|xd:DB2m", |
1050 | fistp_1 = "xw:nDF3m|xd:DB3m|xq:DF7m", | 1057 | fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m", |
1051 | fild_1 = "xw:nDF0m|xd:DB0m|xq:DF5m", | 1058 | fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m", |
1052 | 1059 | ||
1053 | fxch_0 = "D9C9", | 1060 | fxch_0 = "D9C9", |
1054 | fxch_1 = "ff:D9C8r", | 1061 | fxch_1 = "ff:D9C8r", |
@@ -1154,19 +1161,19 @@ local map_op = { | |||
1154 | movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", | 1161 | movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", |
1155 | movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", | 1162 | movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", |
1156 | movhlps_2 = "rro:0F12rM", | 1163 | movhlps_2 = "rro:0F12rM", |
1157 | movhpd_2 = "rx/oq:660F16rM|xr/qo:660F17Rm", | 1164 | movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm", |
1158 | movhps_2 = "rx/oq:0F16rM|xr/qo:0F17Rm", | 1165 | movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm", |
1159 | movlhps_2 = "rro:0F16rM", | 1166 | movlhps_2 = "rro:0F16rM", |
1160 | movlpd_2 = "rx/oq:660F12rM|xr/qo:660F13Rm", | 1167 | movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm", |
1161 | movlps_2 = "rx/oq:0F12rM|xr/qo:0F13Rm", | 1168 | movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm", |
1162 | movmskpd_2 = "rr/do:660F50rM", | 1169 | movmskpd_2 = "rr/do:660F50rM", |
1163 | movmskps_2 = "rr/do:0F50rM", | 1170 | movmskps_2 = "rr/do:0F50rM", |
1164 | movntdq_2 = "xro:660FE7Rm", | 1171 | movntdq_2 = "xro:660FE7Rm", |
1165 | movnti_2 = "xrd:0FC3Rm", | 1172 | movnti_2 = "xrd:0FC3Rm", |
1166 | movntpd_2 = "xro:660F2BRm", | 1173 | movntpd_2 = "xro:660F2BRm", |
1167 | movntps_2 = "xro:0F2BRm", | 1174 | movntps_2 = "xro:0F2BRm", |
1168 | movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:660FD6Rm", | 1175 | movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm", |
1169 | movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:F20F11Rm", | 1176 | movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm", |
1170 | movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", | 1177 | movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", |
1171 | movupd_2 = "rmo:660F10rM|mro:660F11Rm", | 1178 | movupd_2 = "rmo:660F10rM|mro:660F11Rm", |
1172 | movups_2 = "rmo:0F10rM|mro:0F11Rm", | 1179 | movups_2 = "rmo:0F10rM|mro:0F11Rm", |
@@ -1260,7 +1267,7 @@ local map_op = { | |||
1260 | xorps_2 = "rmo:0F57rM", | 1267 | xorps_2 = "rmo:0F57rM", |
1261 | 1268 | ||
1262 | -- SSE3 ops | 1269 | -- SSE3 ops |
1263 | fisttp_1 = "xw:nDF1m|xd:DB1m|xq:DD1m", | 1270 | fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m", |
1264 | addsubpd_2 = "rmo:660FD0rM", | 1271 | addsubpd_2 = "rmo:660FD0rM", |
1265 | addsubps_2 = "rmo:F20FD0rM", | 1272 | addsubps_2 = "rmo:F20FD0rM", |
1266 | haddpd_2 = "rmo:660F7CrM", | 1273 | haddpd_2 = "rmo:660F7CrM", |
@@ -1356,7 +1363,7 @@ local map_op = { | |||
1356 | insertq_2 = "rro:F20F79rM", | 1363 | insertq_2 = "rro:F20F79rM", |
1357 | insertq_4 = "rriio:F20F78rMUU", | 1364 | insertq_4 = "rriio:F20F78rMUU", |
1358 | lzcnt_2 = "rmdw:F30FBDrM", | 1365 | lzcnt_2 = "rmdw:F30FBDrM", |
1359 | movntsd_2 = "xr/qo:F20F2BRm", | 1366 | movntsd_2 = "xr/qo:nF20F2BRm", |
1360 | movntss_2 = "xr/do:F30F2BRm", | 1367 | movntss_2 = "xr/do:F30F2BRm", |
1361 | -- popcnt is also in SSE4.2 | 1368 | -- popcnt is also in SSE4.2 |
1362 | } | 1369 | } |
@@ -1391,7 +1398,7 @@ for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3, | |||
1391 | local nc = 192 + n * 8 | 1398 | local nc = 192 + n * 8 |
1392 | local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) | 1399 | local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) |
1393 | local fn = "f"..name | 1400 | local fn = "f"..name |
1394 | map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:DC%Xm", nc, n, n) | 1401 | map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n) |
1395 | if n == 2 or n == 3 then | 1402 | if n == 2 or n == 3 then |
1396 | map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:DC%XM", nc, n, n) | 1403 | map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:DC%XM", nc, n, n) |
1397 | else | 1404 | else |
@@ -1427,6 +1434,7 @@ local function dopattern(pat, args, sz, op) | |||
1427 | local opcode = 0 | 1434 | local opcode = 0 |
1428 | local szov = sz | 1435 | local szov = sz |
1429 | local narg = 1 | 1436 | local narg = 1 |
1437 | local rex = 0 | ||
1430 | 1438 | ||
1431 | -- Limit number of section buffer positions used by a single dasm_put(). | 1439 | -- Limit number of section buffer positions used by a single dasm_put(). |
1432 | -- A single opcode needs a maximum of 2 positions. !x64 | 1440 | -- A single opcode needs a maximum of 2 positions. !x64 |
@@ -1443,34 +1451,39 @@ local function dopattern(pat, args, sz, op) | |||
1443 | elseif c == "n" then -- Disable operand size mods for opcode. | 1451 | elseif c == "n" then -- Disable operand size mods for opcode. |
1444 | szov = nil | 1452 | szov = nil |
1445 | elseif c == "r" then -- Merge 1st operand regno. into opcode. | 1453 | elseif c == "r" then -- Merge 1st operand regno. into opcode. |
1446 | addin = args[1]; opcode = opcode + addin.reg | 1454 | addin = args[1]; opcode = opcode + (addin.reg % 8) |
1447 | if narg < 2 then narg = 2 end | 1455 | if narg < 2 then narg = 2 end |
1448 | elseif c == "R" then -- Merge 2nd operand regno. into opcode. | 1456 | elseif c == "R" then -- Merge 2nd operand regno. into opcode. |
1449 | addin = args[2]; opcode = opcode + addin.reg | 1457 | addin = args[2]; opcode = opcode + (addin.reg % 8) |
1450 | narg = 3 | 1458 | narg = 3 |
1451 | elseif c == "m" or c == "M" then -- Encode ModRM/SIB. | 1459 | elseif c == "m" or c == "M" then -- Encode ModRM/SIB. |
1452 | local s | 1460 | local s |
1453 | if addin then | 1461 | if addin then |
1454 | s = addin.reg | 1462 | s = addin.reg |
1455 | opcode = opcode - s -- Undo regno opcode merge. | 1463 | opcode = opcode - (s%8) -- Undo regno opcode merge. |
1456 | else | 1464 | else |
1457 | s = opcode % 16 -- Undo last digit. | 1465 | s = opcode % 16 -- Undo last digit. |
1458 | opcode = (opcode - s) / 16 | 1466 | opcode = (opcode - s) / 16 |
1459 | end | 1467 | end |
1460 | wputop(szov, opcode); opcode = nil | ||
1461 | local imark = (sub(pat, -1) == "I") -- Force a mark (ugly). | ||
1462 | -- Put ModRM/SIB with regno/last digit as spare. | ||
1463 | local nn = c == "m" and 1 or 2 | 1468 | local nn = c == "m" and 1 or 2 |
1464 | wputmrmsib(args[nn], imark, s, addin and addin.vreg) | 1469 | local t = args[nn] |
1465 | if narg <= nn then narg = nn + 1 end | 1470 | if narg <= nn then narg = nn + 1 end |
1471 | local rex = szov == "q" and 8 or 0 | ||
1472 | if t.reg and t.reg > 7 then rex = rex + 1 end | ||
1473 | if t.xreg and t.xreg > 7 then rex = rex + 2 end | ||
1474 | if s > 7 then rex = rex + 4 end | ||
1475 | wputop(szov, opcode, rex); opcode = nil | ||
1476 | local imark = (sub(pat, -1) == "I") -- Force a mark (ugly). | ||
1477 | -- Put ModRM/SIB with regno/last digit as spare. | ||
1478 | wputmrmsib(t, imark, s, addin and addin.vreg) | ||
1466 | addin = nil | 1479 | addin = nil |
1467 | else | 1480 | else |
1468 | if opcode then -- Flush opcode. | 1481 | if opcode then -- Flush opcode. |
1469 | if addin and addin.reg == -1 then | 1482 | if addin and addin.reg == -1 then |
1470 | wputop(szov, opcode + 1) | 1483 | wputop(szov, opcode + 1, 0) |
1471 | waction("VREG", addin.vreg); wputxb(0) | 1484 | waction("VREG", addin.vreg); wputxb(0) |
1472 | else | 1485 | else |
1473 | wputop(szov, opcode) | 1486 | wputop(szov, opcode, (addin and addin.reg > 7) and 4 or 0) |
1474 | end | 1487 | end |
1475 | opcode = nil | 1488 | opcode = nil |
1476 | end | 1489 | end |
@@ -1583,7 +1596,7 @@ map_op[".template__"] = function(params, template, nparams) | |||
1583 | end | 1596 | end |
1584 | else -- Match common operand size. | 1597 | else -- Match common operand size. |
1585 | local szp = sz | 1598 | local szp = sz |
1586 | if szm == "" then szm = "dwb" end -- Default size match. | 1599 | if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes. |
1587 | if prefix == "1" then szp = args[1].opsize; szmix = nil | 1600 | if prefix == "1" then szp = args[1].opsize; szmix = nil |
1588 | elseif prefix == "2" then szp = args[2].opsize; szmix = nil end | 1601 | elseif prefix == "2" then szp = args[2].opsize; szmix = nil end |
1589 | if not szmix and (prefix == "." or match(szm, szp or "#")) then | 1602 | if not szmix and (prefix == "." or match(szm, szp or "#")) then |