aboutsummaryrefslogtreecommitdiff
path: root/dynasm
diff options
context:
space:
mode:
authorMike Pall <mike>2015-12-28 13:28:24 +0100
committerMike Pall <mike>2015-12-28 13:44:13 +0100
commita687a60eaac9bd700f821415eaa50393c2fea18a (patch)
tree940d3e9e45fe113ff7ee44e2d7123760852bb855 /dynasm
parent20f4141b1a4abbb42b61b87cc3b52906acb12f0e (diff)
downloadluajit-a687a60eaac9bd700f821415eaa50393c2fea18a.tar.gz
luajit-a687a60eaac9bd700f821415eaa50393c2fea18a.tar.bz2
luajit-a687a60eaac9bd700f821415eaa50393c2fea18a.zip
DynASM/x64: Add full VREG support.
Contributed by Peter Cawley.
Diffstat (limited to 'dynasm')
-rw-r--r--dynasm/dasm_x86.h33
-rw-r--r--dynasm/dasm_x86.lua107
2 files changed, 104 insertions, 36 deletions
diff --git a/dynasm/dasm_x86.h b/dynasm/dasm_x86.h
index 175febe0..4d2b773b 100644
--- a/dynasm/dasm_x86.h
+++ b/dynasm/dasm_x86.h
@@ -170,7 +170,7 @@ void dasm_put(Dst_DECL, int start, ...)
170 dasm_State *D = Dst_REF; 170 dasm_State *D = Dst_REF;
171 dasm_ActList p = D->actionlist + start; 171 dasm_ActList p = D->actionlist + start;
172 dasm_Section *sec = D->section; 172 dasm_Section *sec = D->section;
173 int pos = sec->pos, ofs = sec->ofs, mrm = 4; 173 int pos = sec->pos, ofs = sec->ofs, mrm = -1;
174 int *b; 174 int *b;
175 175
176 if (pos >= sec->epos) { 176 if (pos >= sec->epos) {
@@ -193,7 +193,7 @@ void dasm_put(Dst_DECL, int start, ...)
193 b[pos++] = n; 193 b[pos++] = n;
194 switch (action) { 194 switch (action) {
195 case DASM_DISP: 195 case DASM_DISP:
196 if (n == 0) { if ((mrm&7) == 4) mrm = p[-2]; if ((mrm&7) != 5) break; } 196 if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; }
197 case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; 197 case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob;
198 case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ 198 case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */
199 case DASM_IMM_D: ofs += 4; break; 199 case DASM_IMM_D: ofs += 4; break;
@@ -203,10 +203,17 @@ void dasm_put(Dst_DECL, int start, ...)
203 case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; 203 case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break;
204 case DASM_SPACE: p++; ofs += n; break; 204 case DASM_SPACE: p++; ofs += n; break;
205 case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ 205 case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */
206 case DASM_VREG: CK((n&-8) == 0 && (n != 4 || (*p&1) == 0), RANGE_VREG); 206 case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG);
207 if (*p++ == 1 && *p == DASM_DISP) mrm = n; continue; 207 if (*p < 0x40 && p[1] == DASM_DISP) mrm = n;
208 if (*p < 0x20 && (n&7) == 4) ofs++;
209 switch ((*p++ >> 3) & 3) {
210 case 3: n |= b[pos-3];
211 case 2: n |= b[pos-2];
212 case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; }
213 }
214 continue;
208 } 215 }
209 mrm = 4; 216 mrm = -1;
210 } else { 217 } else {
211 int *pl, n; 218 int *pl, n;
212 switch (action) { 219 switch (action) {
@@ -393,7 +400,21 @@ int dasm_encode(Dst_DECL, void *buffer)
393 case DASM_IMM_W: dasmw(n); break; 400 case DASM_IMM_W: dasmw(n); break;
394 case DASM_VREG: { 401 case DASM_VREG: {
395 int t = *p++; 402 int t = *p++;
396 if (t >= 5) n <<= 4; else if (t >= 2) n <<= 3; 403 unsigned char *ex = cp - (t&7);
404 if ((n & 8) && t < 0xa0) {
405 if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6);
406 } else if (n & 0x10) {
407 if (*ex & 0x80) {
408 *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2;
409 }
410 while (++ex < cp) ex[-1] = *ex;
411 if (mark) mark--;
412 cp--;
413 }
414 n &= 7;
415 if (t >= 0xc0) n <<= 4;
416 else if (t >= 0x40) n <<= 3;
417 else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; }
397 cp[-1] ^= n; 418 cp[-1] ^= n;
398 break; 419 break;
399 } 420 }
diff --git a/dynasm/dasm_x86.lua b/dynasm/dasm_x86.lua
index 1fa80b55..8c4bfc61 100644
--- a/dynasm/dasm_x86.lua
+++ b/dynasm/dasm_x86.lua
@@ -41,7 +41,7 @@ local action_names = {
41 -- int arg, 1 buffer pos: 41 -- int arg, 1 buffer pos:
42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", 42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num): 43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
44 "VREG", "SPACE", -- !x64: VREG support NYI. 44 "VREG", "SPACE",
45 -- ptrdiff_t arg, 1 buffer pos (address): !x64 45 -- ptrdiff_t arg, 1 buffer pos (address): !x64
46 "SETLABEL", "REL_A", 46 "SETLABEL", "REL_A",
47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): 47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
@@ -83,6 +83,21 @@ local actargs = { 0 }
83-- Current number of section buffer positions for dasm_put(). 83-- Current number of section buffer positions for dasm_put().
84local secpos = 1 84local secpos = 1
85 85
86-- VREG kind encodings, pre-shifted by 5 bits.
87local map_vreg = {
88 ["modrm.rm.m"] = 0x00,
89 ["modrm.rm.r"] = 0x20,
90 ["opcode"] = 0x20,
91 ["sib.base"] = 0x20,
92 ["sib.index"] = 0x40,
93 ["modrm.reg"] = 0x80,
94 ["vex.v"] = 0xa0,
95 ["imm.hi"] = 0xc0,
96}
97
98-- Current number of VREG actions contributing to REX/VEX shrinkage.
99local vreg_shrink_count = 0
100
86------------------------------------------------------------------------------ 101------------------------------------------------------------------------------
87 102
88-- Compute action numbers for action names. 103-- Compute action numbers for action names.
@@ -134,6 +149,21 @@ local function waction(action, a, num)
134 if a or num then secpos = secpos + (num or 1) end 149 if a or num then secpos = secpos + (num or 1) end
135end 150end
136 151
152-- Optionally add a VREG action.
153local function wvreg(kind, vreg, psz, sk, defer)
154 if not vreg then return end
155 waction("VREG", vreg)
156 local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
157 if b < (sk or 0) then
158 vreg_shrink_count = vreg_shrink_count + 1
159 end
160 if not defer then
161 b = b + vreg_shrink_count * 8
162 vreg_shrink_count = 0
163 end
164 wputxb(b + (psz or 0))
165end
166
137-- Add call to embedded DynASM C code. 167-- Add call to embedded DynASM C code.
138local function wcall(func, args) 168local function wcall(func, args)
139 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) 169 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
@@ -326,6 +356,7 @@ mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
326mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) 356mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
327map_reg_valid_index[map_archdef.esp] = false 357map_reg_valid_index[map_archdef.esp] = false
328if x64 then map_reg_valid_index[map_archdef.rsp] = false end 358if x64 then map_reg_valid_index[map_archdef.rsp] = false end
359if x64 then map_reg_needrex[map_archdef.Rb] = true end
329map_archdef["Ra"] = "@"..addrsize 360map_archdef["Ra"] = "@"..addrsize
330 361
331-- FP registers (internally tword sized, but use "f" as operand size). 362-- FP registers (internally tword sized, but use "f" as operand size).
@@ -463,16 +494,24 @@ local function wputszarg(sz, n)
463end 494end
464 495
465-- Put multi-byte opcode with operand-size dependent modifications. 496-- Put multi-byte opcode with operand-size dependent modifications.
466local function wputop(sz, op, rex, vex) 497local function wputop(sz, op, rex, vex, vregr, vregxb)
498 local psz, sk = 0, nil
467 if vex then 499 if vex then
468 local tail 500 local tail
469 if vex.m == 1 and band(rex, 11) == 0 then 501 if vex.m == 1 and band(rex, 11) == 0 then
470 wputb(0xc5) 502 if x64 and vregxb then
503 sk = map_vreg["modrm.reg"]
504 else
505 wputb(0xc5)
471 tail = shl(bxor(band(rex, 4), 4), 5) 506 tail = shl(bxor(band(rex, 4), 4), 5)
472 else 507 psz = 3
508 end
509 end
510 if not tail then
473 wputb(0xc4) 511 wputb(0xc4)
474 wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) 512 wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
475 tail = shl(band(rex, 8), 4) 513 tail = shl(band(rex, 8), 4)
514 psz = 4
476 end 515 end
477 local reg, vreg = 0, nil 516 local reg, vreg = 0, nil
478 if vex.v then 517 if vex.v then
@@ -482,12 +521,18 @@ local function wputop(sz, op, rex, vex)
482 end 521 end
483 if sz == "y" or vex.l then tail = tail + 4 end 522 if sz == "y" or vex.l then tail = tail + 4 end
484 wputb(tail + shl(bxor(reg, 15), 3) + vex.p) 523 wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
485 if vreg then waction("VREG", vreg); wputxb(4) end 524 wvreg("vex.v", vreg)
486 rex = 0 525 rex = 0
487 if op >= 256 then werror("bad vex opcode") end 526 if op >= 256 then werror("bad vex opcode") end
527 else
528 if rex ~= 0 then
529 if not x64 then werror("bad operand size") end
530 elseif (vregr or vregxb) and x64 then
531 rex = 0x10
532 sk = map_vreg["vex.v"]
533 end
488 end 534 end
489 local r 535 local r
490 if rex ~= 0 and not x64 then werror("bad operand size") end
491 if sz == "w" then wputb(102) end 536 if sz == "w" then wputb(102) end
492 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] 537 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
493 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end 538 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
@@ -496,20 +541,20 @@ local function wputop(sz, op, rex, vex)
496 if rex ~= 0 then 541 if rex ~= 0 then
497 local opc3 = band(op, 0xffff00) 542 local opc3 = band(op, 0xffff00)
498 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then 543 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
499 wputb(64 + band(rex, 15)); rex = 0 544 wputb(64 + band(rex, 15)); rex = 0; psz = 2
500 end 545 end
501 end 546 end
502 wputb(shr(op, 16)); op = band(op, 0xffff) 547 wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
503 end 548 end
504 if op >= 256 then 549 if op >= 256 then
505 local b = shr(op, 8) 550 local b = shr(op, 8)
506 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0 end 551 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
507 wputb(b) 552 wputb(b); op = band(op, 255); psz = psz + 1
508 op = band(op, 255)
509 end 553 end
510 if rex ~= 0 then wputb(64 + band(rex, 15)) end 554 if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
511 if sz == "b" then op = op - 1 end 555 if sz == "b" then op = op - 1 end
512 wputb(op) 556 wputb(op)
557 return psz, sk
513end 558end
514 559
515-- Put ModRM or SIB formatted byte. 560-- Put ModRM or SIB formatted byte.
@@ -519,7 +564,7 @@ local function wputmodrm(m, s, rm, vs, vrm)
519end 564end
520 565
521-- Put ModRM/SIB plus optional displacement. 566-- Put ModRM/SIB plus optional displacement.
522local function wputmrmsib(t, imark, s, vsreg) 567local function wputmrmsib(t, imark, s, vsreg, psz, sk)
523 local vreg, vxreg 568 local vreg, vxreg
524 local reg, xreg = t.reg, t.xreg 569 local reg, xreg = t.reg, t.xreg
525 if reg and reg < 0 then reg = 0; vreg = t.vreg end 570 if reg and reg < 0 then reg = 0; vreg = t.vreg end
@@ -529,8 +574,8 @@ local function wputmrmsib(t, imark, s, vsreg)
529 -- Register mode. 574 -- Register mode.
530 if sub(t.mode, 1, 1) == "r" then 575 if sub(t.mode, 1, 1) == "r" then
531 wputmodrm(3, s, reg) 576 wputmodrm(3, s, reg)
532 if vsreg then waction("VREG", vsreg); wputxb(2) end 577 wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
533 if vreg then waction("VREG", vreg); wputxb(0) end 578 wvreg("modrm.rm.r", vreg, psz+1, sk)
534 return 579 return
535 end 580 end
536 581
@@ -544,21 +589,22 @@ local function wputmrmsib(t, imark, s, vsreg)
544 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) 589 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
545 wputmodrm(0, s, 4) 590 wputmodrm(0, s, 4)
546 if imark == "I" then waction("MARK") end 591 if imark == "I" then waction("MARK") end
547 if vsreg then waction("VREG", vsreg); wputxb(2) end 592 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
548 wputmodrm(t.xsc, xreg, 5) 593 wputmodrm(t.xsc, xreg, 5)
549 if vxreg then waction("VREG", vxreg); wputxb(3) end 594 wvreg("sib.index", vxreg, psz+2, sk)
550 else 595 else
551 -- Pure 32 bit displacement. 596 -- Pure 32 bit displacement.
552 if x64 and tdisp ~= "table" then 597 if x64 and tdisp ~= "table" then
553 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) 598 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
599 wvreg("modrm.reg", vsreg, psz+1, sk)
554 if imark == "I" then waction("MARK") end 600 if imark == "I" then waction("MARK") end
555 wputmodrm(0, 4, 5) 601 wputmodrm(0, 4, 5)
556 else 602 else
557 riprel = x64 603 riprel = x64
558 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) 604 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
605 wvreg("modrm.reg", vsreg, psz+1, sk)
559 if imark == "I" then waction("MARK") end 606 if imark == "I" then waction("MARK") end
560 end 607 end
561 if vsreg then waction("VREG", vsreg); wputxb(2) end
562 end 608 end
563 if riprel then -- Emit rip-relative displacement. 609 if riprel then -- Emit rip-relative displacement.
564 if match("UWSiI", imark) then 610 if match("UWSiI", imark) then
@@ -586,16 +632,16 @@ local function wputmrmsib(t, imark, s, vsreg)
586 if xreg or band(reg, 7) == 4 then 632 if xreg or band(reg, 7) == 4 then
587 wputmodrm(m or 2, s, 4) -- ModRM. 633 wputmodrm(m or 2, s, 4) -- ModRM.
588 if m == nil or imark == "I" then waction("MARK") end 634 if m == nil or imark == "I" then waction("MARK") end
589 if vsreg then waction("VREG", vsreg); wputxb(2) end 635 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
590 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. 636 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
591 if vxreg then waction("VREG", vxreg); wputxb(3) end 637 wvreg("sib.index", vxreg, psz+2, sk, vreg)
592 if vreg then waction("VREG", vreg); wputxb(1) end 638 wvreg("sib.base", vreg, psz+2, sk)
593 else 639 else
594 wputmodrm(m or 2, s, reg) -- ModRM. 640 wputmodrm(m or 2, s, reg) -- ModRM.
595 if (imark == "I" and (m == 1 or m == 2)) or 641 if (imark == "I" and (m == 1 or m == 2)) or
596 (m == nil and (vsreg or vreg)) then waction("MARK") end 642 (m == nil and (vsreg or vreg)) then waction("MARK") end
597 if vsreg then waction("VREG", vsreg); wputxb(2) end 643 wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
598 if vreg then waction("VREG", vreg); wputxb(1) end 644 wvreg("modrm.rm.m", vreg, psz+1, sk)
599 end 645 end
600 646
601 -- Put displacement. 647 -- Put displacement.
@@ -1761,10 +1807,11 @@ local function dopattern(pat, args, sz, op, needrex)
1761 if t.xreg and t.xreg > 7 then rex = rex + 2 end 1807 if t.xreg and t.xreg > 7 then rex = rex + 2 end
1762 if s > 7 then rex = rex + 4 end 1808 if s > 7 then rex = rex + 4 end
1763 if needrex then rex = rex + 16 end 1809 if needrex then rex = rex + 16 end
1764 wputop(szov, opcode, rex, vex); opcode = nil 1810 local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
1811 opcode = nil
1765 local imark = sub(pat, -1) -- Force a mark (ugly). 1812 local imark = sub(pat, -1) -- Force a mark (ugly).
1766 -- Put ModRM/SIB with regno/last digit as spare. 1813 -- Put ModRM/SIB with regno/last digit as spare.
1767 wputmrmsib(t, imark, s, addin and addin.vreg) 1814 wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
1768 addin = nil 1815 addin = nil
1769 elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix 1816 elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
1770 local b = band(opcode, 255); opcode = shr(opcode, 8) 1817 local b = band(opcode, 255); opcode = shr(opcode, 8)
@@ -1791,8 +1838,8 @@ local function dopattern(pat, args, sz, op, needrex)
1791 if szov == "q" and rex == 0 then rex = rex + 8 end 1838 if szov == "q" and rex == 0 then rex = rex + 8 end
1792 if needrex then rex = rex + 16 end 1839 if needrex then rex = rex + 16 end
1793 if addin and addin.reg == -1 then 1840 if addin and addin.reg == -1 then
1794 wputop(szov, opcode - 7, rex, vex) 1841 local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
1795 waction("VREG", addin.vreg); wputxb(0) 1842 wvreg("opcode", addin.vreg, psz, sk)
1796 else 1843 else
1797 if addin and addin.reg > 7 then rex = rex + 1 end 1844 if addin and addin.reg > 7 then rex = rex + 1 end
1798 wputop(szov, opcode, rex, vex) 1845 wputop(szov, opcode, rex, vex)
@@ -1836,7 +1883,7 @@ local function dopattern(pat, args, sz, op, needrex)
1836 local reg = a.reg 1883 local reg = a.reg
1837 if reg < 0 then 1884 if reg < 0 then
1838 wputb(0) 1885 wputb(0)
1839 waction("VREG", a.vreg); wputxb(5) 1886 wvreg("imm.hi", a.vreg)
1840 else 1887 else
1841 wputb(shl(reg, 4)) 1888 wputb(shl(reg, 4))
1842 end 1889 end
@@ -1988,8 +2035,8 @@ if x64 then
1988 rex = a.reg > 7 and 9 or 8 2035 rex = a.reg > 7 and 9 or 8
1989 end 2036 end
1990 end 2037 end
1991 wputop(sz, opcode, rex) 2038 local psz, sk = wputop(sz, opcode, rex, nil, vreg)
1992 if vreg then waction("VREG", vreg); wputxb(0) end 2039 wvreg("opcode", vreg, psz, sk)
1993 waction("IMM_D", format("(unsigned int)(%s)", op64)) 2040 waction("IMM_D", format("(unsigned int)(%s)", op64))
1994 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) 2041 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
1995 end 2042 end