aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Pall <mike>2009-12-10 05:59:42 +0100
committerMike Pall <mike>2009-12-10 05:59:42 +0100
commit33171e818077838ed673e927ea593f3dc72efde8 (patch)
tree87fba712756b8411a1ced602a1c61050d0931d7f
parent8d564c70ad524466ff3128febaa3a1f668ed5d42 (diff)
downloadluajit-33171e818077838ed673e927ea593f3dc72efde8.tar.gz
luajit-33171e818077838ed673e927ea593f3dc72efde8.tar.bz2
luajit-33171e818077838ed673e927ea593f3dc72efde8.zip
Add 64 bit instruction templates to DynASM x64 and fix REX encoding.
-rw-r--r--dynasm/dasm_x86.lua113
1 files changed, 66 insertions, 47 deletions
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
index f508540e..e43e4675 100644
--- a/dynasm/dasm_x86.lua
+++ b/dynasm/dasm_x86.lua
@@ -447,7 +447,7 @@ end
447 447
448-- Put operand-size dependent number or arg (defaults to dword). 448-- Put operand-size dependent number or arg (defaults to dword).
449local function wputszarg(sz, n) 449local function wputszarg(sz, n)
450 if not sz or sz == "d" then wputdarg(n) 450 if not sz or sz == "d" or sz == "q" then wputdarg(n)
451 elseif sz == "w" then wputwarg(n) 451 elseif sz == "w" then wputwarg(n)
452 elseif sz == "b" then wputbarg(n) 452 elseif sz == "b" then wputbarg(n)
453 elseif sz == "s" then wputsbarg(n) 453 elseif sz == "s" then wputsbarg(n)
@@ -457,11 +457,20 @@ end
457-- Put multi-byte opcode with operand-size dependent modifications. 457-- Put multi-byte opcode with operand-size dependent modifications.
458local function wputop(sz, op, rex) 458local function wputop(sz, op, rex)
459 local r 459 local r
460 if rex ~= 0 and not x64 then werror("bad operand size") end
460 if sz == "w" then wputb(102) end 461 if sz == "w" then wputb(102) end
461 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] 462 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
462 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end 463 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
463 if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end 464 if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end
464 if op >= 65536 then r = op % 65536 wputb((op-r) / 65536) op = r end 465 if op >= 65536 then
466 if rex ~= 0 then
467 local opc3 = op - op % 256
468 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
469 wputb(64 + rex % 15); rex = 0
470 end
471 end
472 r = op % 65536 wputb((op-r) / 65536) op = r
473 end
465 if op >= 256 then 474 if op >= 256 then
466 r = op % 256 475 r = op % 256
467 local b = (op-r) / 256 476 local b = (op-r) / 256
@@ -844,6 +853,7 @@ end
844-- Hex chars are accumulated to form the opcode (left to right). 853-- Hex chars are accumulated to form the opcode (left to right).
845-- "n" disables the standard opcode mods 854-- "n" disables the standard opcode mods
846-- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q") 855-- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
856-- "W" Force REX.W.
847-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. 857-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode.
848-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. 858-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
849-- The spare 3 bits are either filled with the last hex digit or 859-- The spare 3 bits are either filled with the last hex digit or
@@ -888,18 +898,20 @@ local map_op = {
888 -- 38-3D: cmp... 898 -- 38-3D: cmp...
889 ds_0 = "3E", 899 ds_0 = "3E",
890 -- 3F: *aas 900 -- 3F: *aas
891 inc_1 = "rdw:40r|m:FF0m", 901 inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m",
892 dec_1 = "rdw:48r|m:FF1m", 902 dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m",
893 push_1 = "rdw:50r|mdw:FF6m|S.:6AS|ib:n6Ai|i.:68i", 903 push_1 = (x64 and "rqw:50r|mqw:FF6m" or "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i",
894 pop_1 = "rdw:58r|mdw:8F0m", 904 pop_1 = x64 and "rqw:58r|mqw:8F0m" or "rdw:58r|mdw:8F0m",
895 -- 60: *pusha, *pushad, *pushaw 905 -- 60: *pusha, *pushad, *pushaw
896 -- 61: *popa, *popad, *popaw 906 -- 61: *popa, *popad, *popaw
897 -- 62: *bound rdw,x 907 -- 62: *bound rdw,x
898 -- 63: *arpl mw,rw 908 -- 63: x86: *arpl mw,rw
909 movsxd_2 = x64 and "rm/qd:63rM",
899 fs_0 = "64", 910 fs_0 = "64",
900 gs_0 = "65", 911 gs_0 = "65",
901 o16_0 = "66", 912 o16_0 = "66",
902 a16_0 = "67", 913 a16_0 = not x64 and "67" or nil,
914 a32_0 = x64 and "67",
903 -- 68: push idw 915 -- 68: push idw
904 -- 69: imul rdw,mdw,idw 916 -- 69: imul rdw,mdw,idw
905 -- 6A: push ib 917 -- 6A: push ib
@@ -925,11 +937,13 @@ local map_op = {
925 -- 8E: *mov seg,mdw 937 -- 8E: *mov seg,mdw
926 -- 8F: pop mdw 938 -- 8F: pop mdw
927 nop_0 = "90", 939 nop_0 = "90",
928 xchg_2 = "Rrdw:90R|rRdw:90r|rm:87rM|mr:87Rm", 940 xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm",
929 cbw_0 = "6698", 941 cbw_0 = "6698",
930 cwde_0 = "98", 942 cwde_0 = "98",
943 cdqe_0 = "4898",
931 cwd_0 = "6699", 944 cwd_0 = "6699",
932 cdq_0 = "99", 945 cdq_0 = "99",
946 cqo_0 = "4899",
933 -- 9A: *call iw:idw 947 -- 9A: *call iw:idw
934 wait_0 = "9B", 948 wait_0 = "9B",
935 fwait_0 = "9B", 949 fwait_0 = "9B",
@@ -941,6 +955,7 @@ local map_op = {
941 popfd_0 = "9D", 955 popfd_0 = "9D",
942 sahf_0 = "9E", 956 sahf_0 = "9E",
943 lahf_0 = "9F", 957 lahf_0 = "9F",
958 -- !x64: mov with 64 bit immediate
944 mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", 959 mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
945 movsb_0 = "A4", 960 movsb_0 = "A4",
946 movsw_0 = "66A5", 961 movsw_0 = "66A5",
@@ -994,8 +1009,8 @@ local map_op = {
994 -- E5: *in Rdw,ib 1009 -- E5: *in Rdw,ib
995 -- E6: *out ib,Rb 1010 -- E6: *out ib,Rb
996 -- E7: *out ib,Rdw 1011 -- E7: *out ib,Rdw
997 call_1 = "md:FF2m|J.:E8J", 1012 call_1 = x64 and "mq:FF2m|J.:E8J" or "md:FF2m|J.:E8J",
998 jmp_1 = "md:FF4m|J.:E9J", -- short: EB 1013 jmp_1 = x64 and "mq:FF4m|J.:E9J" or "md:FF4m|J.:E9J", -- short: EB
999 -- EA: *jmp iw:idw 1014 -- EA: *jmp iw:idw
1000 -- EB: jmp ib 1015 -- EB: jmp ib
1001 -- EC: *in Rb,dx 1016 -- EC: *in Rb,dx
@@ -1029,19 +1044,19 @@ local map_op = {
1029 div_1 = "m:F76m", 1044 div_1 = "m:F76m",
1030 idiv_1 = "m:F77m", 1045 idiv_1 = "m:F77m",
1031 1046
1032 imul_2 = "rmdw:0FAFrM|rIdw:69rmI|rSdw:6BrmS|ridw:69rmi", 1047 imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi",
1033 imul_3 = "rmIdw:69rMI|rmSdw:6BrMS|rmidw:69rMi", 1048 imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi",
1034 1049
1035 movzx_2 = "rm/db:0FB6rM|rm/wb:0FB6rM|rm/dw:0FB7rM", 1050 movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:",
1036 movsx_2 = "rm/db:0FBErM|rm/wb:0FBErM|rm/dw:0FBFrM", 1051 movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:",
1037 1052
1038 bswap_1 = "rd:0FC8r", 1053 bswap_1 = "rqd:0FC8r",
1039 bsf_2 = "rmdw:0FBCrM", 1054 bsf_2 = "rmqdw:0FBCrM",
1040 bsr_2 = "rmdw:0FBDrM", 1055 bsr_2 = "rmqdw:0FBDrM",
1041 bt_2 = "mrdw:0FA3Rm|midw:0FBA4mU", 1056 bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU",
1042 btc_2 = "mrdw:0FBBRm|midw:0FBA7mU", 1057 btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU",
1043 btr_2 = "mrdw:0FB3Rm|midw:0FBA6mU", 1058 btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU",
1044 bts_2 = "mrdw:0FABRm|midw:0FBA5mU", 1059 bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU",
1045 1060
1046 rdtsc_0 = "0F31", -- P1+ 1061 rdtsc_0 = "0F31", -- P1+
1047 cpuid_0 = "0FA2", -- P1+ 1062 cpuid_0 = "0FA2", -- P1+
@@ -1141,23 +1156,23 @@ local map_op = {
1141 cvtpi2ps_2 = "rx/oq:0F2ArM", 1156 cvtpi2ps_2 = "rx/oq:0F2ArM",
1142 cvtps2dq_2 = "rmo:660F5BrM", 1157 cvtps2dq_2 = "rmo:660F5BrM",
1143 cvtps2pd_2 = "rro:0F5ArM|rx/oq:", 1158 cvtps2pd_2 = "rro:0F5ArM|rx/oq:",
1144 cvtsd2si_2 = "rr/do:F20F2DrM|rx/dq:", 1159 cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:",
1145 cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", 1160 cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:",
1146 cvtsi2sd_2 = "rm/od:F20F2ArM", 1161 cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArWM",
1147 cvtsi2ss_2 = "rm/od:F30F2ArM", 1162 cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArWM",
1148 cvtss2sd_2 = "rro:F30F5ArM|rx/od:", 1163 cvtss2sd_2 = "rro:F30F5ArM|rx/od:",
1149 cvtss2si_2 = "rr/do:F20F2CrM|rx/dd:", 1164 cvtss2si_2 = "rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:",
1150 cvttpd2dq_2 = "rmo:660FE6rM", 1165 cvttpd2dq_2 = "rmo:660FE6rM",
1151 cvttps2dq_2 = "rmo:F30F5BrM", 1166 cvttps2dq_2 = "rmo:F30F5BrM",
1152 cvttsd2si_2 = "rr/do:F20F2CrM|rx/dq:", 1167 cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
1153 cvttss2si_2 = "rr/do:F30F2CrM|rx/dd:", 1168 cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
1154 ldmxcsr_1 = "xd:0FAE2m", 1169 ldmxcsr_1 = "xd:0FAE2m",
1155 lfence_0 = "0FAEE8", 1170 lfence_0 = "0FAEE8",
1156 maskmovdqu_2 = "rro:660FF7rM", 1171 maskmovdqu_2 = "rro:660FF7rM",
1157 mfence_0 = "0FAEF0", 1172 mfence_0 = "0FAEF0",
1158 movapd_2 = "rmo:660F28rM|mro:660F29Rm", 1173 movapd_2 = "rmo:660F28rM|mro:660F29Rm",
1159 movaps_2 = "rmo:0F28rM|mro:0F29Rm", 1174 movaps_2 = "rmo:0F28rM|mro:0F29Rm",
1160 movd_2 = "rm/od:660F6ErM|mr/do:660F7ERm", 1175 movd_2 = "rm/od:660F6ErM|rm/oq:660F6EWrM|mr/do:660F7ERm|mr/qo:",
1161 movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", 1176 movdqa_2 = "rmo:660F6FrM|mro:660F7FRm",
1162 movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", 1177 movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm",
1163 movhlps_2 = "rro:0F12rM", 1178 movhlps_2 = "rro:0F12rM",
@@ -1169,7 +1184,7 @@ local map_op = {
1169 movmskpd_2 = "rr/do:660F50rM", 1184 movmskpd_2 = "rr/do:660F50rM",
1170 movmskps_2 = "rr/do:0F50rM", 1185 movmskps_2 = "rr/do:0F50rM",
1171 movntdq_2 = "xro:660FE7Rm", 1186 movntdq_2 = "xro:660FE7Rm",
1172 movnti_2 = "xrd:0FC3Rm", 1187 movnti_2 = "xrqd:0FC3Rm",
1173 movntpd_2 = "xro:660F2BRm", 1188 movntpd_2 = "xro:660F2BRm",
1174 movntps_2 = "xro:0F2BRm", 1189 movntps_2 = "xro:0F2BRm",
1175 movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm", 1190 movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
@@ -1304,7 +1319,7 @@ local map_op = {
1304 blendvps_3 = "rmRo:660F3814rM", 1319 blendvps_3 = "rmRo:660F3814rM",
1305 dppd_3 = "rmio:660F3A41rMU", 1320 dppd_3 = "rmio:660F3A41rMU",
1306 dpps_3 = "rmio:660F3A40rMU", 1321 dpps_3 = "rmio:660F3A40rMU",
1307 extractps_3 = "mri/do:660F3A17RmU", 1322 extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RWmU",
1308 insertps_3 = "rrio:660F3A41rMU|rxi/od:", 1323 insertps_3 = "rrio:660F3A41rMU|rxi/od:",
1309 movntdqa_2 = "rmo:660F382ArM", 1324 movntdqa_2 = "rmo:660F382ArM",
1310 mpsadbw_3 = "rmio:660F3A42rMU", 1325 mpsadbw_3 = "rmio:660F3A42rMU",
@@ -1312,14 +1327,14 @@ local map_op = {
1312 pblendvb_3 = "rmRo:660F3810rM", 1327 pblendvb_3 = "rmRo:660F3810rM",
1313 pblendw_3 = "rmio:660F3A0ErMU", 1328 pblendw_3 = "rmio:660F3A0ErMU",
1314 pcmpeqq_2 = "rmo:660F3829rM", 1329 pcmpeqq_2 = "rmo:660F3829rM",
1315 pextrb_3 = "rri/do:660F3A14nRmU|xri/bo:", 1330 pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:",
1316 pextrd_3 = "mri/do:660F3A16RmU", 1331 pextrd_3 = "mri/do:660F3A16RmU",
1317 -- x64: pextrq 1332 pextrq_3 = "mri/qo:660F3A16RmU",
1318 -- pextrw is SSE2, mem operand is SSE4.1 only 1333 -- pextrw is SSE2, mem operand is SSE4.1 only
1319 phminposuw_2 = "rmo:660F3841rM", 1334 phminposuw_2 = "rmo:660F3841rM",
1320 pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", 1335 pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:",
1321 pinsrd_3 = "rmi/od:660F3A22rMU", 1336 pinsrd_3 = "rmi/od:660F3A22rMU",
1322 -- x64: pinsrq 1337 pinsrq_3 = "rmi/oq:660F3A22rWMU",
1323 pmaxsb_2 = "rmo:660F383CrM", 1338 pmaxsb_2 = "rmo:660F383CrM",
1324 pmaxsd_2 = "rmo:660F383DrM", 1339 pmaxsd_2 = "rmo:660F383DrM",
1325 pmaxud_2 = "rmo:660F383FrM", 1340 pmaxud_2 = "rmo:660F383FrM",
@@ -1349,20 +1364,20 @@ local map_op = {
1349 roundss_3 = "rrio:660F3A0ArMU|rxi/od:", 1364 roundss_3 = "rrio:660F3A0ArMU|rxi/od:",
1350 1365
1351 -- SSE4.2 ops 1366 -- SSE4.2 ops
1352 crc32_2 = "rmd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0nrM", 1367 crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:",
1353 pcmpestri_3 = "rmio:660F3A61rMU", 1368 pcmpestri_3 = "rmio:660F3A61rMU",
1354 pcmpestrm_3 = "rmio:660F3A60rMU", 1369 pcmpestrm_3 = "rmio:660F3A60rMU",
1355 pcmpgtq_2 = "rmo:660F3837rM", 1370 pcmpgtq_2 = "rmo:660F3837rM",
1356 pcmpistri_3 = "rmio:660F3A63rMU", 1371 pcmpistri_3 = "rmio:660F3A63rMU",
1357 pcmpistrm_3 = "rmio:660F3A62rMU", 1372 pcmpistrm_3 = "rmio:660F3A62rMU",
1358 popcnt_2 = "rmdw:F30FB8rM", 1373 popcnt_2 = "rmqdw:F30FB8rM",
1359 1374
1360 -- SSE4a 1375 -- SSE4a
1361 extrq_2 = "rro:660F79rM", 1376 extrq_2 = "rro:660F79rM",
1362 extrq_3 = "riio:660F780mUU", 1377 extrq_3 = "riio:660F780mUU",
1363 insertq_2 = "rro:F20F79rM", 1378 insertq_2 = "rro:F20F79rM",
1364 insertq_4 = "rriio:F20F78rMUU", 1379 insertq_4 = "rriio:F20F78rMUU",
1365 lzcnt_2 = "rmdw:F30FBDrM", 1380 lzcnt_2 = "rmqdw:F30FBDrM",
1366 movntsd_2 = "xr/qo:nF20F2BRm", 1381 movntsd_2 = "xr/qo:nF20F2BRm",
1367 movntss_2 = "xr/do:F30F2BRm", 1382 movntss_2 = "xr/do:F30F2BRm",
1368 -- popcnt is also in SSE4.2 1383 -- popcnt is also in SSE4.2
@@ -1375,21 +1390,21 @@ for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3,
1375 ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do 1390 ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do
1376 local n8 = n * 8 1391 local n8 = n * 8
1377 map_op[name.."_2"] = format( 1392 map_op[name.."_2"] = format(
1378 "mr:%02XRm|rm:%02XrM|mI1dw:81%XmI|mS1dw:83%XmS|Ri1dwb:%02Xri|mi1dwb:81%Xmi", 1393 "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi",
1379 1+n8, 3+n8, n, n, 5+n8, n) 1394 1+n8, 3+n8, n, n, 5+n8, n)
1380end 1395end
1381 1396
1382-- Shift ops. 1397-- Shift ops.
1383for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, 1398for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3,
1384 shl = 4, shr = 5, sar = 7, sal = 4 } do 1399 shl = 4, shr = 5, sar = 7, sal = 4 } do
1385 map_op[name.."_2"] = format("m1:D1%Xm|mC1dwb:D3%Xm|mi:C1%XmU", n, n, n) 1400 map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n)
1386end 1401end
1387 1402
1388-- Conditional ops. 1403-- Conditional ops.
1389for cc,n in pairs(map_cc) do 1404for cc,n in pairs(map_cc) do
1390 map_op["j"..cc.."_1"] = format("J.:0F8%XJ", n) -- short: 7%X 1405 map_op["j"..cc.."_1"] = format("J.:0F8%XJ", n) -- short: 7%X
1391 map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) 1406 map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n)
1392 map_op["cmov"..cc.."_2"] = format("rmdw:0F4%XrM", n) -- P6+ 1407 map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+
1393end 1408end
1394 1409
1395-- FP arithmetic ops. 1410-- FP arithmetic ops.
@@ -1400,9 +1415,9 @@ for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
1400 local fn = "f"..name 1415 local fn = "f"..name
1401 map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n) 1416 map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n)
1402 if n == 2 or n == 3 then 1417 if n == 2 or n == 3 then
1403 map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:DC%XM", nc, n, n) 1418 map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n)
1404 else 1419 else
1405 map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:DC%XM", nc, nr, n, n) 1420 map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n)
1406 map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) 1421 map_op[fn.."p_1"] = format("ff:DE%02Xr", nr)
1407 map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) 1422 map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr)
1408 end 1423 end
@@ -1450,6 +1465,8 @@ local function dopattern(pat, args, sz, op)
1450 addin = nil 1465 addin = nil
1451 elseif c == "n" then -- Disable operand size mods for opcode. 1466 elseif c == "n" then -- Disable operand size mods for opcode.
1452 szov = nil 1467 szov = nil
1468 elseif c == "W" then -- Force REX.W.
1469 rex = 8
1453 elseif c == "r" then -- Merge 1st operand regno. into opcode. 1470 elseif c == "r" then -- Merge 1st operand regno. into opcode.
1454 addin = args[1]; opcode = opcode + (addin.reg % 8) 1471 addin = args[1]; opcode = opcode + (addin.reg % 8)
1455 if narg < 2 then narg = 2 end 1472 if narg < 2 then narg = 2 end
@@ -1468,7 +1485,7 @@ local function dopattern(pat, args, sz, op)
1468 local nn = c == "m" and 1 or 2 1485 local nn = c == "m" and 1 or 2
1469 local t = args[nn] 1486 local t = args[nn]
1470 if narg <= nn then narg = nn + 1 end 1487 if narg <= nn then narg = nn + 1 end
1471 local rex = szov == "q" and 8 or 0 1488 if szov == "q" and rex == 0 then rex = rex + 8 end
1472 if t.reg and t.reg > 7 then rex = rex + 1 end 1489 if t.reg and t.reg > 7 then rex = rex + 1 end
1473 if t.xreg and t.xreg > 7 then rex = rex + 2 end 1490 if t.xreg and t.xreg > 7 then rex = rex + 2 end
1474 if s > 7 then rex = rex + 4 end 1491 if s > 7 then rex = rex + 4 end
@@ -1479,11 +1496,13 @@ local function dopattern(pat, args, sz, op)
1479 addin = nil 1496 addin = nil
1480 else 1497 else
1481 if opcode then -- Flush opcode. 1498 if opcode then -- Flush opcode.
1499 if szov == "q" and rex == 0 then rex = rex + 8 end
1482 if addin and addin.reg == -1 then 1500 if addin and addin.reg == -1 then
1483 wputop(szov, opcode + 1, 0) 1501 wputop(szov, opcode + 1, rex)
1484 waction("VREG", addin.vreg); wputxb(0) 1502 waction("VREG", addin.vreg); wputxb(0)
1485 else 1503 else
1486 wputop(szov, opcode, (addin and addin.reg > 7) and 4 or 0) 1504 if addin and addin.reg > 7 then rex = rex + 1 end
1505 wputop(szov, opcode, rex)
1487 end 1506 end
1488 opcode = nil 1507 opcode = nil
1489 end 1508 end