diff options
author | Mike Pall <mike> | 2009-12-10 05:59:42 +0100 |
---|---|---|
committer | Mike Pall <mike> | 2009-12-10 05:59:42 +0100 |
commit | 33171e818077838ed673e927ea593f3dc72efde8 (patch) | |
tree | 87fba712756b8411a1ced602a1c61050d0931d7f | |
parent | 8d564c70ad524466ff3128febaa3a1f668ed5d42 (diff) | |
download | luajit-33171e818077838ed673e927ea593f3dc72efde8.tar.gz luajit-33171e818077838ed673e927ea593f3dc72efde8.tar.bz2 luajit-33171e818077838ed673e927ea593f3dc72efde8.zip |
Add 64 bit instruction templates to DynASM x64 and fix REX encoding.
-rw-r--r-- | dynasm/dasm_x86.lua | 113 |
1 files changed, 66 insertions, 47 deletions
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua index f508540e..e43e4675 100644 --- a/dynasm/dasm_x86.lua +++ b/dynasm/dasm_x86.lua | |||
@@ -447,7 +447,7 @@ end | |||
447 | 447 | ||
448 | -- Put operand-size dependent number or arg (defaults to dword). | 448 | -- Put operand-size dependent number or arg (defaults to dword). |
449 | local function wputszarg(sz, n) | 449 | local function wputszarg(sz, n) |
450 | if not sz or sz == "d" then wputdarg(n) | 450 | if not sz or sz == "d" or sz == "q" then wputdarg(n) |
451 | elseif sz == "w" then wputwarg(n) | 451 | elseif sz == "w" then wputwarg(n) |
452 | elseif sz == "b" then wputbarg(n) | 452 | elseif sz == "b" then wputbarg(n) |
453 | elseif sz == "s" then wputsbarg(n) | 453 | elseif sz == "s" then wputsbarg(n) |
@@ -457,11 +457,20 @@ end | |||
457 | -- Put multi-byte opcode with operand-size dependent modifications. | 457 | -- Put multi-byte opcode with operand-size dependent modifications. |
458 | local function wputop(sz, op, rex) | 458 | local function wputop(sz, op, rex) |
459 | local r | 459 | local r |
460 | if rex ~= 0 and not x64 then werror("bad operand size") end | ||
460 | if sz == "w" then wputb(102) end | 461 | if sz == "w" then wputb(102) end |
461 | -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] | 462 | -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] |
462 | if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end | 463 | if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end |
463 | if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end | 464 | if op >= 16777216 then r = op % 16777216 wputb((op-r) / 16777216) op = r end |
464 | if op >= 65536 then r = op % 65536 wputb((op-r) / 65536) op = r end | 465 | if op >= 65536 then |
466 | if rex ~= 0 then | ||
467 | local opc3 = op - op % 256 | ||
468 | if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then | ||
469 | wputb(64 + rex % 15); rex = 0 | ||
470 | end | ||
471 | end | ||
472 | r = op % 65536 wputb((op-r) / 65536) op = r | ||
473 | end | ||
465 | if op >= 256 then | 474 | if op >= 256 then |
466 | r = op % 256 | 475 | r = op % 256 |
467 | local b = (op-r) / 256 | 476 | local b = (op-r) / 256 |
@@ -844,6 +853,7 @@ end | |||
844 | -- Hex chars are accumulated to form the opcode (left to right). | 853 | -- Hex chars are accumulated to form the opcode (left to right). |
845 | -- "n" disables the standard opcode mods | 854 | -- "n" disables the standard opcode mods |
846 | -- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q") | 855 | -- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q") |
856 | -- "W" Force REX.W. | ||
847 | -- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. | 857 | -- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. |
848 | -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. | 858 | -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. |
849 | -- The spare 3 bits are either filled with the last hex digit or | 859 | -- The spare 3 bits are either filled with the last hex digit or |
@@ -888,18 +898,20 @@ local map_op = { | |||
888 | -- 38-3D: cmp... | 898 | -- 38-3D: cmp... |
889 | ds_0 = "3E", | 899 | ds_0 = "3E", |
890 | -- 3F: *aas | 900 | -- 3F: *aas |
891 | inc_1 = "rdw:40r|m:FF0m", | 901 | inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m", |
892 | dec_1 = "rdw:48r|m:FF1m", | 902 | dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m", |
893 | push_1 = "rdw:50r|mdw:FF6m|S.:6AS|ib:n6Ai|i.:68i", | 903 | push_1 = (x64 and "rqw:50r|mqw:FF6m" or "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i", |
894 | pop_1 = "rdw:58r|mdw:8F0m", | 904 | pop_1 = x64 and "rqw:58r|mqw:8F0m" or "rdw:58r|mdw:8F0m", |
895 | -- 60: *pusha, *pushad, *pushaw | 905 | -- 60: *pusha, *pushad, *pushaw |
896 | -- 61: *popa, *popad, *popaw | 906 | -- 61: *popa, *popad, *popaw |
897 | -- 62: *bound rdw,x | 907 | -- 62: *bound rdw,x |
898 | -- 63: *arpl mw,rw | 908 | -- 63: x86: *arpl mw,rw |
909 | movsxd_2 = x64 and "rm/qd:63rM", | ||
899 | fs_0 = "64", | 910 | fs_0 = "64", |
900 | gs_0 = "65", | 911 | gs_0 = "65", |
901 | o16_0 = "66", | 912 | o16_0 = "66", |
902 | a16_0 = "67", | 913 | a16_0 = not x64 and "67" or nil, |
914 | a32_0 = x64 and "67", | ||
903 | -- 68: push idw | 915 | -- 68: push idw |
904 | -- 69: imul rdw,mdw,idw | 916 | -- 69: imul rdw,mdw,idw |
905 | -- 6A: push ib | 917 | -- 6A: push ib |
@@ -925,11 +937,13 @@ local map_op = { | |||
925 | -- 8E: *mov seg,mdw | 937 | -- 8E: *mov seg,mdw |
926 | -- 8F: pop mdw | 938 | -- 8F: pop mdw |
927 | nop_0 = "90", | 939 | nop_0 = "90", |
928 | xchg_2 = "Rrdw:90R|rRdw:90r|rm:87rM|mr:87Rm", | 940 | xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm", |
929 | cbw_0 = "6698", | 941 | cbw_0 = "6698", |
930 | cwde_0 = "98", | 942 | cwde_0 = "98", |
943 | cdqe_0 = "4898", | ||
931 | cwd_0 = "6699", | 944 | cwd_0 = "6699", |
932 | cdq_0 = "99", | 945 | cdq_0 = "99", |
946 | cqo_0 = "4899", | ||
933 | -- 9A: *call iw:idw | 947 | -- 9A: *call iw:idw |
934 | wait_0 = "9B", | 948 | wait_0 = "9B", |
935 | fwait_0 = "9B", | 949 | fwait_0 = "9B", |
@@ -941,6 +955,7 @@ local map_op = { | |||
941 | popfd_0 = "9D", | 955 | popfd_0 = "9D", |
942 | sahf_0 = "9E", | 956 | sahf_0 = "9E", |
943 | lahf_0 = "9F", | 957 | lahf_0 = "9F", |
958 | -- !x64: mov with 64 bit immediate | ||
944 | mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", | 959 | mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", |
945 | movsb_0 = "A4", | 960 | movsb_0 = "A4", |
946 | movsw_0 = "66A5", | 961 | movsw_0 = "66A5", |
@@ -994,8 +1009,8 @@ local map_op = { | |||
994 | -- E5: *in Rdw,ib | 1009 | -- E5: *in Rdw,ib |
995 | -- E6: *out ib,Rb | 1010 | -- E6: *out ib,Rb |
996 | -- E7: *out ib,Rdw | 1011 | -- E7: *out ib,Rdw |
997 | call_1 = "md:FF2m|J.:E8J", | 1012 | call_1 = x64 and "mq:FF2m|J.:E8J" or "md:FF2m|J.:E8J", |
998 | jmp_1 = "md:FF4m|J.:E9J", -- short: EB | 1013 | jmp_1 = x64 and "mq:FF4m|J.:E9J" or "md:FF4m|J.:E9J", -- short: EB |
999 | -- EA: *jmp iw:idw | 1014 | -- EA: *jmp iw:idw |
1000 | -- EB: jmp ib | 1015 | -- EB: jmp ib |
1001 | -- EC: *in Rb,dx | 1016 | -- EC: *in Rb,dx |
@@ -1029,19 +1044,19 @@ local map_op = { | |||
1029 | div_1 = "m:F76m", | 1044 | div_1 = "m:F76m", |
1030 | idiv_1 = "m:F77m", | 1045 | idiv_1 = "m:F77m", |
1031 | 1046 | ||
1032 | imul_2 = "rmdw:0FAFrM|rIdw:69rmI|rSdw:6BrmS|ridw:69rmi", | 1047 | imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi", |
1033 | imul_3 = "rmIdw:69rMI|rmSdw:6BrMS|rmidw:69rMi", | 1048 | imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi", |
1034 | 1049 | ||
1035 | movzx_2 = "rm/db:0FB6rM|rm/wb:0FB6rM|rm/dw:0FB7rM", | 1050 | movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:", |
1036 | movsx_2 = "rm/db:0FBErM|rm/wb:0FBErM|rm/dw:0FBFrM", | 1051 | movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:", |
1037 | 1052 | ||
1038 | bswap_1 = "rd:0FC8r", | 1053 | bswap_1 = "rqd:0FC8r", |
1039 | bsf_2 = "rmdw:0FBCrM", | 1054 | bsf_2 = "rmqdw:0FBCrM", |
1040 | bsr_2 = "rmdw:0FBDrM", | 1055 | bsr_2 = "rmqdw:0FBDrM", |
1041 | bt_2 = "mrdw:0FA3Rm|midw:0FBA4mU", | 1056 | bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU", |
1042 | btc_2 = "mrdw:0FBBRm|midw:0FBA7mU", | 1057 | btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU", |
1043 | btr_2 = "mrdw:0FB3Rm|midw:0FBA6mU", | 1058 | btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU", |
1044 | bts_2 = "mrdw:0FABRm|midw:0FBA5mU", | 1059 | bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU", |
1045 | 1060 | ||
1046 | rdtsc_0 = "0F31", -- P1+ | 1061 | rdtsc_0 = "0F31", -- P1+ |
1047 | cpuid_0 = "0FA2", -- P1+ | 1062 | cpuid_0 = "0FA2", -- P1+ |
@@ -1141,23 +1156,23 @@ local map_op = { | |||
1141 | cvtpi2ps_2 = "rx/oq:0F2ArM", | 1156 | cvtpi2ps_2 = "rx/oq:0F2ArM", |
1142 | cvtps2dq_2 = "rmo:660F5BrM", | 1157 | cvtps2dq_2 = "rmo:660F5BrM", |
1143 | cvtps2pd_2 = "rro:0F5ArM|rx/oq:", | 1158 | cvtps2pd_2 = "rro:0F5ArM|rx/oq:", |
1144 | cvtsd2si_2 = "rr/do:F20F2DrM|rx/dq:", | 1159 | cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:", |
1145 | cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", | 1160 | cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", |
1146 | cvtsi2sd_2 = "rm/od:F20F2ArM", | 1161 | cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArWM", |
1147 | cvtsi2ss_2 = "rm/od:F30F2ArM", | 1162 | cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArWM", |
1148 | cvtss2sd_2 = "rro:F30F5ArM|rx/od:", | 1163 | cvtss2sd_2 = "rro:F30F5ArM|rx/od:", |
1149 | cvtss2si_2 = "rr/do:F20F2CrM|rx/dd:", | 1164 | cvtss2si_2 = "rr/do:F20F2CrM|rr/qo:|rxd:|rx/qd:", |
1150 | cvttpd2dq_2 = "rmo:660FE6rM", | 1165 | cvttpd2dq_2 = "rmo:660FE6rM", |
1151 | cvttps2dq_2 = "rmo:F30F5BrM", | 1166 | cvttps2dq_2 = "rmo:F30F5BrM", |
1152 | cvttsd2si_2 = "rr/do:F20F2CrM|rx/dq:", | 1167 | cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:", |
1153 | cvttss2si_2 = "rr/do:F30F2CrM|rx/dd:", | 1168 | cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:", |
1154 | ldmxcsr_1 = "xd:0FAE2m", | 1169 | ldmxcsr_1 = "xd:0FAE2m", |
1155 | lfence_0 = "0FAEE8", | 1170 | lfence_0 = "0FAEE8", |
1156 | maskmovdqu_2 = "rro:660FF7rM", | 1171 | maskmovdqu_2 = "rro:660FF7rM", |
1157 | mfence_0 = "0FAEF0", | 1172 | mfence_0 = "0FAEF0", |
1158 | movapd_2 = "rmo:660F28rM|mro:660F29Rm", | 1173 | movapd_2 = "rmo:660F28rM|mro:660F29Rm", |
1159 | movaps_2 = "rmo:0F28rM|mro:0F29Rm", | 1174 | movaps_2 = "rmo:0F28rM|mro:0F29Rm", |
1160 | movd_2 = "rm/od:660F6ErM|mr/do:660F7ERm", | 1175 | movd_2 = "rm/od:660F6ErM|rm/oq:660F6EWrM|mr/do:660F7ERm|mr/qo:", |
1161 | movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", | 1176 | movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", |
1162 | movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", | 1177 | movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", |
1163 | movhlps_2 = "rro:0F12rM", | 1178 | movhlps_2 = "rro:0F12rM", |
@@ -1169,7 +1184,7 @@ local map_op = { | |||
1169 | movmskpd_2 = "rr/do:660F50rM", | 1184 | movmskpd_2 = "rr/do:660F50rM", |
1170 | movmskps_2 = "rr/do:0F50rM", | 1185 | movmskps_2 = "rr/do:0F50rM", |
1171 | movntdq_2 = "xro:660FE7Rm", | 1186 | movntdq_2 = "xro:660FE7Rm", |
1172 | movnti_2 = "xrd:0FC3Rm", | 1187 | movnti_2 = "xrqd:0FC3Rm", |
1173 | movntpd_2 = "xro:660F2BRm", | 1188 | movntpd_2 = "xro:660F2BRm", |
1174 | movntps_2 = "xro:0F2BRm", | 1189 | movntps_2 = "xro:0F2BRm", |
1175 | movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm", | 1190 | movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm", |
@@ -1304,7 +1319,7 @@ local map_op = { | |||
1304 | blendvps_3 = "rmRo:660F3814rM", | 1319 | blendvps_3 = "rmRo:660F3814rM", |
1305 | dppd_3 = "rmio:660F3A41rMU", | 1320 | dppd_3 = "rmio:660F3A41rMU", |
1306 | dpps_3 = "rmio:660F3A40rMU", | 1321 | dpps_3 = "rmio:660F3A40rMU", |
1307 | extractps_3 = "mri/do:660F3A17RmU", | 1322 | extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RWmU", |
1308 | insertps_3 = "rrio:660F3A41rMU|rxi/od:", | 1323 | insertps_3 = "rrio:660F3A41rMU|rxi/od:", |
1309 | movntdqa_2 = "rmo:660F382ArM", | 1324 | movntdqa_2 = "rmo:660F382ArM", |
1310 | mpsadbw_3 = "rmio:660F3A42rMU", | 1325 | mpsadbw_3 = "rmio:660F3A42rMU", |
@@ -1312,14 +1327,14 @@ local map_op = { | |||
1312 | pblendvb_3 = "rmRo:660F3810rM", | 1327 | pblendvb_3 = "rmRo:660F3810rM", |
1313 | pblendw_3 = "rmio:660F3A0ErMU", | 1328 | pblendw_3 = "rmio:660F3A0ErMU", |
1314 | pcmpeqq_2 = "rmo:660F3829rM", | 1329 | pcmpeqq_2 = "rmo:660F3829rM", |
1315 | pextrb_3 = "rri/do:660F3A14nRmU|xri/bo:", | 1330 | pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:", |
1316 | pextrd_3 = "mri/do:660F3A16RmU", | 1331 | pextrd_3 = "mri/do:660F3A16RmU", |
1317 | -- x64: pextrq | 1332 | pextrq_3 = "mri/qo:660F3A16RmU", |
1318 | -- pextrw is SSE2, mem operand is SSE4.1 only | 1333 | -- pextrw is SSE2, mem operand is SSE4.1 only |
1319 | phminposuw_2 = "rmo:660F3841rM", | 1334 | phminposuw_2 = "rmo:660F3841rM", |
1320 | pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", | 1335 | pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", |
1321 | pinsrd_3 = "rmi/od:660F3A22rMU", | 1336 | pinsrd_3 = "rmi/od:660F3A22rMU", |
1322 | -- x64: pinsrq | 1337 | pinsrq_3 = "rmi/oq:660F3A22rWMU", |
1323 | pmaxsb_2 = "rmo:660F383CrM", | 1338 | pmaxsb_2 = "rmo:660F383CrM", |
1324 | pmaxsd_2 = "rmo:660F383DrM", | 1339 | pmaxsd_2 = "rmo:660F383DrM", |
1325 | pmaxud_2 = "rmo:660F383FrM", | 1340 | pmaxud_2 = "rmo:660F383FrM", |
@@ -1349,20 +1364,20 @@ local map_op = { | |||
1349 | roundss_3 = "rrio:660F3A0ArMU|rxi/od:", | 1364 | roundss_3 = "rrio:660F3A0ArMU|rxi/od:", |
1350 | 1365 | ||
1351 | -- SSE4.2 ops | 1366 | -- SSE4.2 ops |
1352 | crc32_2 = "rmd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0nrM", | 1367 | crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:", |
1353 | pcmpestri_3 = "rmio:660F3A61rMU", | 1368 | pcmpestri_3 = "rmio:660F3A61rMU", |
1354 | pcmpestrm_3 = "rmio:660F3A60rMU", | 1369 | pcmpestrm_3 = "rmio:660F3A60rMU", |
1355 | pcmpgtq_2 = "rmo:660F3837rM", | 1370 | pcmpgtq_2 = "rmo:660F3837rM", |
1356 | pcmpistri_3 = "rmio:660F3A63rMU", | 1371 | pcmpistri_3 = "rmio:660F3A63rMU", |
1357 | pcmpistrm_3 = "rmio:660F3A62rMU", | 1372 | pcmpistrm_3 = "rmio:660F3A62rMU", |
1358 | popcnt_2 = "rmdw:F30FB8rM", | 1373 | popcnt_2 = "rmqdw:F30FB8rM", |
1359 | 1374 | ||
1360 | -- SSE4a | 1375 | -- SSE4a |
1361 | extrq_2 = "rro:660F79rM", | 1376 | extrq_2 = "rro:660F79rM", |
1362 | extrq_3 = "riio:660F780mUU", | 1377 | extrq_3 = "riio:660F780mUU", |
1363 | insertq_2 = "rro:F20F79rM", | 1378 | insertq_2 = "rro:F20F79rM", |
1364 | insertq_4 = "rriio:F20F78rMUU", | 1379 | insertq_4 = "rriio:F20F78rMUU", |
1365 | lzcnt_2 = "rmdw:F30FBDrM", | 1380 | lzcnt_2 = "rmqdw:F30FBDrM", |
1366 | movntsd_2 = "xr/qo:nF20F2BRm", | 1381 | movntsd_2 = "xr/qo:nF20F2BRm", |
1367 | movntss_2 = "xr/do:F30F2BRm", | 1382 | movntss_2 = "xr/do:F30F2BRm", |
1368 | -- popcnt is also in SSE4.2 | 1383 | -- popcnt is also in SSE4.2 |
@@ -1375,21 +1390,21 @@ for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3, | |||
1375 | ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do | 1390 | ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do |
1376 | local n8 = n * 8 | 1391 | local n8 = n * 8 |
1377 | map_op[name.."_2"] = format( | 1392 | map_op[name.."_2"] = format( |
1378 | "mr:%02XRm|rm:%02XrM|mI1dw:81%XmI|mS1dw:83%XmS|Ri1dwb:%02Xri|mi1dwb:81%Xmi", | 1393 | "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi", |
1379 | 1+n8, 3+n8, n, n, 5+n8, n) | 1394 | 1+n8, 3+n8, n, n, 5+n8, n) |
1380 | end | 1395 | end |
1381 | 1396 | ||
1382 | -- Shift ops. | 1397 | -- Shift ops. |
1383 | for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, | 1398 | for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, |
1384 | shl = 4, shr = 5, sar = 7, sal = 4 } do | 1399 | shl = 4, shr = 5, sar = 7, sal = 4 } do |
1385 | map_op[name.."_2"] = format("m1:D1%Xm|mC1dwb:D3%Xm|mi:C1%XmU", n, n, n) | 1400 | map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n) |
1386 | end | 1401 | end |
1387 | 1402 | ||
1388 | -- Conditional ops. | 1403 | -- Conditional ops. |
1389 | for cc,n in pairs(map_cc) do | 1404 | for cc,n in pairs(map_cc) do |
1390 | map_op["j"..cc.."_1"] = format("J.:0F8%XJ", n) -- short: 7%X | 1405 | map_op["j"..cc.."_1"] = format("J.:0F8%XJ", n) -- short: 7%X |
1391 | map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) | 1406 | map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) |
1392 | map_op["cmov"..cc.."_2"] = format("rmdw:0F4%XrM", n) -- P6+ | 1407 | map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+ |
1393 | end | 1408 | end |
1394 | 1409 | ||
1395 | -- FP arithmetic ops. | 1410 | -- FP arithmetic ops. |
@@ -1400,9 +1415,9 @@ for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3, | |||
1400 | local fn = "f"..name | 1415 | local fn = "f"..name |
1401 | map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n) | 1416 | map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n) |
1402 | if n == 2 or n == 3 then | 1417 | if n == 2 or n == 3 then |
1403 | map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:DC%XM", nc, n, n) | 1418 | map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n) |
1404 | else | 1419 | else |
1405 | map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:DC%XM", nc, nr, n, n) | 1420 | map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n) |
1406 | map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) | 1421 | map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) |
1407 | map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) | 1422 | map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) |
1408 | end | 1423 | end |
@@ -1450,6 +1465,8 @@ local function dopattern(pat, args, sz, op) | |||
1450 | addin = nil | 1465 | addin = nil |
1451 | elseif c == "n" then -- Disable operand size mods for opcode. | 1466 | elseif c == "n" then -- Disable operand size mods for opcode. |
1452 | szov = nil | 1467 | szov = nil |
1468 | elseif c == "W" then -- Force REX.W. | ||
1469 | rex = 8 | ||
1453 | elseif c == "r" then -- Merge 1st operand regno. into opcode. | 1470 | elseif c == "r" then -- Merge 1st operand regno. into opcode. |
1454 | addin = args[1]; opcode = opcode + (addin.reg % 8) | 1471 | addin = args[1]; opcode = opcode + (addin.reg % 8) |
1455 | if narg < 2 then narg = 2 end | 1472 | if narg < 2 then narg = 2 end |
@@ -1468,7 +1485,7 @@ local function dopattern(pat, args, sz, op) | |||
1468 | local nn = c == "m" and 1 or 2 | 1485 | local nn = c == "m" and 1 or 2 |
1469 | local t = args[nn] | 1486 | local t = args[nn] |
1470 | if narg <= nn then narg = nn + 1 end | 1487 | if narg <= nn then narg = nn + 1 end |
1471 | local rex = szov == "q" and 8 or 0 | 1488 | if szov == "q" and rex == 0 then rex = rex + 8 end |
1472 | if t.reg and t.reg > 7 then rex = rex + 1 end | 1489 | if t.reg and t.reg > 7 then rex = rex + 1 end |
1473 | if t.xreg and t.xreg > 7 then rex = rex + 2 end | 1490 | if t.xreg and t.xreg > 7 then rex = rex + 2 end |
1474 | if s > 7 then rex = rex + 4 end | 1491 | if s > 7 then rex = rex + 4 end |
@@ -1479,11 +1496,13 @@ local function dopattern(pat, args, sz, op) | |||
1479 | addin = nil | 1496 | addin = nil |
1480 | else | 1497 | else |
1481 | if opcode then -- Flush opcode. | 1498 | if opcode then -- Flush opcode. |
1499 | if szov == "q" and rex == 0 then rex = rex + 8 end | ||
1482 | if addin and addin.reg == -1 then | 1500 | if addin and addin.reg == -1 then |
1483 | wputop(szov, opcode + 1, 0) | 1501 | wputop(szov, opcode + 1, rex) |
1484 | waction("VREG", addin.vreg); wputxb(0) | 1502 | waction("VREG", addin.vreg); wputxb(0) |
1485 | else | 1503 | else |
1486 | wputop(szov, opcode, (addin and addin.reg > 7) and 4 or 0) | 1504 | if addin and addin.reg > 7 then rex = rex + 1 end |
1505 | wputop(szov, opcode, rex) | ||
1487 | end | 1506 | end |
1488 | opcode = nil | 1507 | opcode = nil |
1489 | end | 1508 | end |