diff options
Diffstat (limited to 'src/jit')
-rw-r--r-- | src/jit/bc.lua | 19 | ||||
-rw-r--r-- | src/jit/bcsave.lua | 26 | ||||
-rw-r--r-- | src/jit/dis_arm.lua | 18 | ||||
-rw-r--r-- | src/jit/dis_mips.lua | 30 | ||||
-rw-r--r-- | src/jit/dis_mipsel.lua | 15 | ||||
-rw-r--r-- | src/jit/dis_ppc.lua | 18 | ||||
-rw-r--r-- | src/jit/dis_x64.lua | 15 | ||||
-rw-r--r-- | src/jit/dis_x86.lua | 275 | ||||
-rw-r--r-- | src/jit/dump.lua | 29 | ||||
-rw-r--r-- | src/jit/p.lua | 310 | ||||
-rw-r--r-- | src/jit/v.lua | 15 | ||||
-rw-r--r-- | src/jit/zone.lua | 45 |
12 files changed, 635 insertions, 180 deletions
diff --git a/src/jit/bc.lua b/src/jit/bc.lua index 393186a7..a8cb8496 100644 --- a/src/jit/bc.lua +++ b/src/jit/bc.lua | |||
@@ -41,7 +41,7 @@ | |||
41 | 41 | ||
42 | -- Cache some library functions and objects. | 42 | -- Cache some library functions and objects. |
43 | local jit = require("jit") | 43 | local jit = require("jit") |
44 | assert(jit.version_num == 20004, "LuaJIT core/library version mismatch") | 44 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") |
45 | local jutil = require("jit.util") | 45 | local jutil = require("jit.util") |
46 | local vmdef = require("jit.vmdef") | 46 | local vmdef = require("jit.vmdef") |
47 | local bit = require("bit") | 47 | local bit = require("bit") |
@@ -179,13 +179,12 @@ local function bcliston(outfile) | |||
179 | end | 179 | end |
180 | 180 | ||
181 | -- Public module functions. | 181 | -- Public module functions. |
182 | module(...) | 182 | return { |
183 | 183 | line = bcline, | |
184 | line = bcline | 184 | dump = bcdump, |
185 | dump = bcdump | 185 | targets = bctargets, |
186 | targets = bctargets | 186 | on = bcliston, |
187 | 187 | off = bclistoff, | |
188 | on = bcliston | 188 | start = bcliston -- For -j command line option. |
189 | off = bclistoff | 189 | } |
190 | start = bcliston -- For -j command line option. | ||
191 | 190 | ||
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua index 5c417c06..d0968b18 100644 --- a/src/jit/bcsave.lua +++ b/src/jit/bcsave.lua | |||
@@ -11,7 +11,7 @@ | |||
11 | ------------------------------------------------------------------------------ | 11 | ------------------------------------------------------------------------------ |
12 | 12 | ||
13 | local jit = require("jit") | 13 | local jit = require("jit") |
14 | assert(jit.version_num == 20004, "LuaJIT core/library version mismatch") | 14 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") |
15 | local bit = require("bit") | 15 | local bit = require("bit") |
16 | 16 | ||
17 | -- Symbol name prefix for LuaJIT bytecode. | 17 | -- Symbol name prefix for LuaJIT bytecode. |
@@ -63,7 +63,7 @@ local map_type = { | |||
63 | } | 63 | } |
64 | 64 | ||
65 | local map_arch = { | 65 | local map_arch = { |
66 | x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true, | 66 | x86 = true, x64 = true, arm = true, arm64 = true, ppc = true, |
67 | mips = true, mipsel = true, | 67 | mips = true, mipsel = true, |
68 | } | 68 | } |
69 | 69 | ||
@@ -200,9 +200,9 @@ typedef struct { | |||
200 | ]] | 200 | ]] |
201 | local symname = LJBC_PREFIX..ctx.modname | 201 | local symname = LJBC_PREFIX..ctx.modname |
202 | local is64, isbe = false, false | 202 | local is64, isbe = false, false |
203 | if ctx.arch == "x64" then | 203 | if ctx.arch == "x64" or ctx.arch == "arm64" then |
204 | is64 = true | 204 | is64 = true |
205 | elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then | 205 | elseif ctx.arch == "ppc" or ctx.arch == "mips" then |
206 | isbe = true | 206 | isbe = true |
207 | end | 207 | end |
208 | 208 | ||
@@ -237,7 +237,7 @@ typedef struct { | |||
237 | hdr.eendian = isbe and 2 or 1 | 237 | hdr.eendian = isbe and 2 or 1 |
238 | hdr.eversion = 1 | 238 | hdr.eversion = 1 |
239 | hdr.type = f16(1) | 239 | hdr.type = f16(1) |
240 | hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch]) | 240 | hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) |
241 | if ctx.arch == "mips" or ctx.arch == "mipsel" then | 241 | if ctx.arch == "mips" or ctx.arch == "mipsel" then |
242 | hdr.flags = 0x50001006 | 242 | hdr.flags = 0x50001006 |
243 | end | 243 | end |
@@ -477,13 +477,13 @@ typedef struct { | |||
477 | } mach_obj_64; | 477 | } mach_obj_64; |
478 | typedef struct { | 478 | typedef struct { |
479 | mach_fat_header fat; | 479 | mach_fat_header fat; |
480 | mach_fat_arch fat_arch[4]; | 480 | mach_fat_arch fat_arch[2]; |
481 | struct { | 481 | struct { |
482 | mach_header hdr; | 482 | mach_header hdr; |
483 | mach_segment_command seg; | 483 | mach_segment_command seg; |
484 | mach_section sec; | 484 | mach_section sec; |
485 | mach_symtab_command sym; | 485 | mach_symtab_command sym; |
486 | } arch[4]; | 486 | } arch[2]; |
487 | mach_nlist sym_entry; | 487 | mach_nlist sym_entry; |
488 | uint8_t space[4096]; | 488 | uint8_t space[4096]; |
489 | } mach_fat_obj; | 489 | } mach_fat_obj; |
@@ -494,6 +494,8 @@ typedef struct { | |||
494 | is64, align, mobj = true, 8, "mach_obj_64" | 494 | is64, align, mobj = true, 8, "mach_obj_64" |
495 | elseif ctx.arch == "arm" then | 495 | elseif ctx.arch == "arm" then |
496 | isfat, mobj = true, "mach_fat_obj" | 496 | isfat, mobj = true, "mach_fat_obj" |
497 | elseif ctx.arch == "arm64" then | ||
498 | is64, align, isfat, mobj = true, 8, true, "mach_fat_obj" | ||
497 | else | 499 | else |
498 | check(ctx.arch == "x86", "unsupported architecture for OSX") | 500 | check(ctx.arch == "x86", "unsupported architecture for OSX") |
499 | end | 501 | end |
@@ -503,8 +505,8 @@ typedef struct { | |||
503 | -- Create Mach-O object and fill in header. | 505 | -- Create Mach-O object and fill in header. |
504 | local o = ffi.new(mobj) | 506 | local o = ffi.new(mobj) |
505 | local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) | 507 | local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) |
506 | local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch] | 508 | local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12}, arm64={0x01000007,0x0100000c} })[ctx.arch] |
507 | local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch] | 509 | local cpusubtype = ({ x86={3}, x64={3}, arm={3,9}, arm64={3,0} })[ctx.arch] |
508 | if isfat then | 510 | if isfat then |
509 | o.fat.magic = be32(0xcafebabe) | 511 | o.fat.magic = be32(0xcafebabe) |
510 | o.fat.nfat_arch = be32(#cpusubtype) | 512 | o.fat.nfat_arch = be32(#cpusubtype) |
@@ -653,7 +655,7 @@ end | |||
653 | ------------------------------------------------------------------------------ | 655 | ------------------------------------------------------------------------------ |
654 | 656 | ||
655 | -- Public module functions. | 657 | -- Public module functions. |
656 | module(...) | 658 | return { |
657 | 659 | start = docmd -- Process -b command line option. | |
658 | start = docmd -- Process -b command line option. | 660 | } |
659 | 661 | ||
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua index 661f661a..1296d816 100644 --- a/src/jit/dis_arm.lua +++ b/src/jit/dis_arm.lua | |||
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len) | |||
658 | end | 658 | end |
659 | 659 | ||
660 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 660 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
661 | local function create_(code, addr, out) | 661 | local function create(code, addr, out) |
662 | local ctx = {} | 662 | local ctx = {} |
663 | ctx.code = code | 663 | ctx.code = code |
664 | ctx.addr = addr or 0 | 664 | ctx.addr = addr or 0 |
@@ -670,20 +670,20 @@ local function create_(code, addr, out) | |||
670 | end | 670 | end |
671 | 671 | ||
672 | -- Simple API: disassemble code (a string) at address and output via out. | 672 | -- Simple API: disassemble code (a string) at address and output via out. |
673 | local function disass_(code, addr, out) | 673 | local function disass(code, addr, out) |
674 | create_(code, addr, out):disass() | 674 | create(code, addr, out):disass() |
675 | end | 675 | end |
676 | 676 | ||
677 | -- Return register name for RID. | 677 | -- Return register name for RID. |
678 | local function regname_(r) | 678 | local function regname(r) |
679 | if r < 16 then return map_gpr[r] end | 679 | if r < 16 then return map_gpr[r] end |
680 | return "d"..(r-16) | 680 | return "d"..(r-16) |
681 | end | 681 | end |
682 | 682 | ||
683 | -- Public module functions. | 683 | -- Public module functions. |
684 | module(...) | 684 | return { |
685 | 685 | create = create, | |
686 | create = create_ | 686 | disass = disass, |
687 | disass = disass_ | 687 | regname = regname |
688 | regname = regname_ | 688 | } |
689 | 689 | ||
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua index 5b68b069..2bf8b389 100644 --- a/src/jit/dis_mips.lua +++ b/src/jit/dis_mips.lua | |||
@@ -384,7 +384,7 @@ local function disass_block(ctx, ofs, len) | |||
384 | end | 384 | end |
385 | 385 | ||
386 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 386 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
387 | local function create_(code, addr, out) | 387 | local function create(code, addr, out) |
388 | local ctx = {} | 388 | local ctx = {} |
389 | ctx.code = code | 389 | ctx.code = code |
390 | ctx.addr = addr or 0 | 390 | ctx.addr = addr or 0 |
@@ -396,33 +396,33 @@ local function create_(code, addr, out) | |||
396 | return ctx | 396 | return ctx |
397 | end | 397 | end |
398 | 398 | ||
399 | local function create_el_(code, addr, out) | 399 | local function create_el(code, addr, out) |
400 | local ctx = create_(code, addr, out) | 400 | local ctx = create(code, addr, out) |
401 | ctx.get = get_le | 401 | ctx.get = get_le |
402 | return ctx | 402 | return ctx |
403 | end | 403 | end |
404 | 404 | ||
405 | -- Simple API: disassemble code (a string) at address and output via out. | 405 | -- Simple API: disassemble code (a string) at address and output via out. |
406 | local function disass_(code, addr, out) | 406 | local function disass(code, addr, out) |
407 | create_(code, addr, out):disass() | 407 | create(code, addr, out):disass() |
408 | end | 408 | end |
409 | 409 | ||
410 | local function disass_el_(code, addr, out) | 410 | local function disass_el(code, addr, out) |
411 | create_el_(code, addr, out):disass() | 411 | create_el(code, addr, out):disass() |
412 | end | 412 | end |
413 | 413 | ||
414 | -- Return register name for RID. | 414 | -- Return register name for RID. |
415 | local function regname_(r) | 415 | local function regname(r) |
416 | if r < 32 then return map_gpr[r] end | 416 | if r < 32 then return map_gpr[r] end |
417 | return "f"..(r-32) | 417 | return "f"..(r-32) |
418 | end | 418 | end |
419 | 419 | ||
420 | -- Public module functions. | 420 | -- Public module functions. |
421 | module(...) | 421 | return { |
422 | 422 | create = create, | |
423 | create = create_ | 423 | create_el = create_el, |
424 | create_el = create_el_ | 424 | disass = disass, |
425 | disass = disass_ | 425 | disass_el = disass_el, |
426 | disass_el = disass_el_ | 426 | regname = regname |
427 | regname = regname_ | 427 | } |
428 | 428 | ||
diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua index d242f8dd..3f2f6efc 100644 --- a/src/jit/dis_mipsel.lua +++ b/src/jit/dis_mipsel.lua | |||
@@ -8,13 +8,10 @@ | |||
8 | -- MIPS disassembler module. All the interesting stuff is there. | 8 | -- MIPS disassembler module. All the interesting stuff is there. |
9 | ------------------------------------------------------------------------------ | 9 | ------------------------------------------------------------------------------ |
10 | 10 | ||
11 | local require = require | 11 | local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips") |
12 | 12 | return { | |
13 | module(...) | 13 | create = dis_mips.create_el, |
14 | 14 | disass = dis_mips.disass_el, | |
15 | local dis_mips = require(_PACKAGE.."dis_mips") | 15 | regname = dis_mips.regname |
16 | 16 | } | |
17 | create = dis_mips.create_el | ||
18 | disass = dis_mips.disass_el | ||
19 | regname = dis_mips.regname | ||
20 | 17 | ||
diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua index 8afecbe6..30f51ecd 100644 --- a/src/jit/dis_ppc.lua +++ b/src/jit/dis_ppc.lua | |||
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len) | |||
560 | end | 560 | end |
561 | 561 | ||
562 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 562 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
563 | local function create_(code, addr, out) | 563 | local function create(code, addr, out) |
564 | local ctx = {} | 564 | local ctx = {} |
565 | ctx.code = code | 565 | ctx.code = code |
566 | ctx.addr = addr or 0 | 566 | ctx.addr = addr or 0 |
@@ -572,20 +572,20 @@ local function create_(code, addr, out) | |||
572 | end | 572 | end |
573 | 573 | ||
574 | -- Simple API: disassemble code (a string) at address and output via out. | 574 | -- Simple API: disassemble code (a string) at address and output via out. |
575 | local function disass_(code, addr, out) | 575 | local function disass(code, addr, out) |
576 | create_(code, addr, out):disass() | 576 | create(code, addr, out):disass() |
577 | end | 577 | end |
578 | 578 | ||
579 | -- Return register name for RID. | 579 | -- Return register name for RID. |
580 | local function regname_(r) | 580 | local function regname(r) |
581 | if r < 32 then return map_gpr[r] end | 581 | if r < 32 then return map_gpr[r] end |
582 | return "f"..(r-32) | 582 | return "f"..(r-32) |
583 | end | 583 | end |
584 | 584 | ||
585 | -- Public module functions. | 585 | -- Public module functions. |
586 | module(...) | 586 | return { |
587 | 587 | create = create, | |
588 | create = create_ | 588 | disass = disass, |
589 | disass = disass_ | 589 | regname = regname |
590 | regname = regname_ | 590 | } |
591 | 591 | ||
diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua index a427e347..077b1b12 100644 --- a/src/jit/dis_x64.lua +++ b/src/jit/dis_x64.lua | |||
@@ -8,13 +8,10 @@ | |||
8 | -- x86/x64 disassembler module. All the interesting stuff is there. | 8 | -- x86/x64 disassembler module. All the interesting stuff is there. |
9 | ------------------------------------------------------------------------------ | 9 | ------------------------------------------------------------------------------ |
10 | 10 | ||
11 | local require = require | 11 | local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86") |
12 | 12 | return { | |
13 | module(...) | 13 | create = dis_x86.create64, |
14 | 14 | disass = dis_x86.disass64, | |
15 | local dis_x86 = require(_PACKAGE.."dis_x86") | 15 | regname = dis_x86.regname64 |
16 | 16 | } | |
17 | create = dis_x86.create64 | ||
18 | disass = dis_x86.disass64 | ||
19 | regname = dis_x86.regname64 | ||
20 | 17 | ||
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua index deb2f304..0bbd198f 100644 --- a/src/jit/dis_x86.lua +++ b/src/jit/dis_x86.lua | |||
@@ -15,19 +15,20 @@ | |||
15 | -- Intel and AMD manuals. The supported instruction set is quite extensive | 15 | -- Intel and AMD manuals. The supported instruction set is quite extensive |
16 | -- and reflects what a current generation Intel or AMD CPU implements in | 16 | -- and reflects what a current generation Intel or AMD CPU implements in |
17 | -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, | 17 | -- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, |
18 | -- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM) | 18 | -- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor |
19 | -- instructions. | 19 | -- (VMX/SVM) instructions. |
20 | -- | 20 | -- |
21 | -- Notes: | 21 | -- Notes: |
22 | -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. | 22 | -- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. |
23 | -- * No attempt at optimization has been made -- it's fast enough for my needs. | 23 | -- * No attempt at optimization has been made -- it's fast enough for my needs. |
24 | -- * The public API may change when more architectures are added. | ||
25 | ------------------------------------------------------------------------------ | 24 | ------------------------------------------------------------------------------ |
26 | 25 | ||
27 | local type = type | 26 | local type = type |
28 | local sub, byte, format = string.sub, string.byte, string.format | 27 | local sub, byte, format = string.sub, string.byte, string.format |
29 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub | 28 | local match, gmatch, gsub = string.match, string.gmatch, string.gsub |
30 | local lower, rep = string.lower, string.rep | 29 | local lower, rep = string.lower, string.rep |
30 | local bit = require("bit") | ||
31 | local tohex = bit.tohex | ||
31 | 32 | ||
32 | -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. | 33 | -- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. |
33 | local map_opc1_32 = { | 34 | local map_opc1_32 = { |
@@ -76,7 +77,7 @@ local map_opc1_32 = { | |||
76 | "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", | 77 | "movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", |
77 | "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", | 78 | "movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", |
78 | --Cx | 79 | --Cx |
79 | "shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi", | 80 | "shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi", |
80 | "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", | 81 | "enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", |
81 | --Dx | 82 | --Dx |
82 | "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", | 83 | "shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", |
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({ | |||
101 | [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", | 102 | [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", |
102 | [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", | 103 | [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", |
103 | [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", | 104 | [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", |
104 | [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false, | 105 | [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false, |
105 | [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, | 106 | [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, |
106 | }, { __index = map_opc1_32 }) | 107 | }, { __index = map_opc1_32 }) |
107 | 108 | ||
@@ -112,12 +113,12 @@ local map_opc2 = { | |||
112 | [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", | 113 | [0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", |
113 | "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", | 114 | "invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", |
114 | --1x | 115 | --1x |
115 | "movupsXrm|movssXrm|movupdXrm|movsdXrm", | 116 | "movupsXrm|movssXrvm|movupdXrm|movsdXrvm", |
116 | "movupsXmr|movssXmr|movupdXmr|movsdXmr", | 117 | "movupsXmr|movssXmvr|movupdXmr|movsdXmvr", |
117 | "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", | 118 | "movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", |
118 | "movlpsXmr||movlpdXmr", | 119 | "movlpsXmr||movlpdXmr", |
119 | "unpcklpsXrm||unpcklpdXrm", | 120 | "unpcklpsXrvm||unpcklpdXrvm", |
120 | "unpckhpsXrm||unpckhpdXrm", | 121 | "unpckhpsXrvm||unpckhpdXrvm", |
121 | "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", | 122 | "movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", |
122 | "movhpsXmr||movhpdXmr", | 123 | "movhpsXmr||movhpdXmr", |
123 | "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", | 124 | "$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", |
@@ -126,7 +127,7 @@ local map_opc2 = { | |||
126 | "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, | 127 | "movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, |
127 | "movapsXrm||movapdXrm", | 128 | "movapsXrm||movapdXrm", |
128 | "movapsXmr||movapdXmr", | 129 | "movapsXmr||movapdXmr", |
129 | "cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt", | 130 | "cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt", |
130 | "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", | 131 | "movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", |
131 | "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", | 132 | "cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", |
132 | "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", | 133 | "cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", |
@@ -142,27 +143,27 @@ local map_opc2 = { | |||
142 | "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", | 143 | "cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", |
143 | --5x | 144 | --5x |
144 | "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", | 145 | "movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", |
145 | "rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm", | 146 | "rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm", |
146 | "andpsXrm||andpdXrm","andnpsXrm||andnpdXrm", | 147 | "andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm", |
147 | "orpsXrm||orpdXrm","xorpsXrm||xorpdXrm", | 148 | "orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm", |
148 | "addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm", | 149 | "addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm", |
149 | "cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm", | 150 | "cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm", |
150 | "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", | 151 | "cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", |
151 | "subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm", | 152 | "subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm", |
152 | "divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm", | 153 | "divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm", |
153 | --6x | 154 | --6x |
154 | "punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm", | 155 | "punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm", |
155 | "pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm", | 156 | "pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm", |
156 | "punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm", | 157 | "punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm", |
157 | "||punpcklqdqXrm","||punpckhqdqXrm", | 158 | "||punpcklqdqXrvm","||punpckhqdqXrvm", |
158 | "movPrVSm","movqMrm|movdquXrm|movdqaXrm", | 159 | "movPrVSm","movqMrm|movdquXrm|movdqaXrm", |
159 | --7x | 160 | --7x |
160 | "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", | 161 | "pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu", |
161 | "pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", | 162 | "pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu", |
162 | "pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|", | 163 | "pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|", |
163 | "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", | 164 | "vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", |
164 | nil,nil, | 165 | nil,nil, |
165 | "||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm", | 166 | "||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm", |
166 | "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", | 167 | "movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", |
167 | --8x | 168 | --8x |
168 | "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", | 169 | "joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", |
@@ -180,27 +181,27 @@ nil,nil, | |||
180 | "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", | 181 | "bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", |
181 | --Cx | 182 | --Cx |
182 | "xaddBmr","xaddVmr", | 183 | "xaddBmr","xaddVmr", |
183 | "cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|", | 184 | "cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|", |
184 | "pinsrwPrWmu","pextrwDrPmu", | 185 | "pinsrwPrvWmu","pextrwDrPmu", |
185 | "shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp", | 186 | "shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp", |
186 | "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", | 187 | "bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", |
187 | --Dx | 188 | --Dx |
188 | "||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm", | 189 | "||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm", |
189 | "paddqPrm","pmullwPrm", | 190 | "paddqPrvm","pmullwPrvm", |
190 | "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", | 191 | "|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", |
191 | "psubusbPrm","psubuswPrm","pminubPrm","pandPrm", | 192 | "psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm", |
192 | "paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm", | 193 | "paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm", |
193 | --Ex | 194 | --Ex |
194 | "pavgbPrm","psrawPrm","psradPrm","pavgwPrm", | 195 | "pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm", |
195 | "pmulhuwPrm","pmulhwPrm", | 196 | "pmulhuwPrvm","pmulhwPrvm", |
196 | "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", | 197 | "|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", |
197 | "psubsbPrm","psubswPrm","pminswPrm","porPrm", | 198 | "psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm", |
198 | "paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm", | 199 | "paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm", |
199 | --Fx | 200 | --Fx |
200 | "|||lddquXrm","psllwPrm","pslldPrm","psllqPrm", | 201 | "|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm", |
201 | "pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$", | 202 | "pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$", |
202 | "psubbPrm","psubwPrm","psubdPrm","psubqPrm", | 203 | "psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm", |
203 | "paddbPrm","paddwPrm","padddPrm","ud", | 204 | "paddbPrvm","paddwPrvm","padddPrvm","ud", |
204 | } | 205 | } |
205 | assert(map_opc2[255] == "ud") | 206 | assert(map_opc2[255] == "ud") |
206 | 207 | ||
@@ -208,49 +209,73 @@ assert(map_opc2[255] == "ud") | |||
208 | local map_opc3 = { | 209 | local map_opc3 = { |
209 | ["38"] = { -- [66] 0f 38 xx | 210 | ["38"] = { -- [66] 0f 38 xx |
210 | --0x | 211 | --0x |
211 | [0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm", | 212 | [0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm", |
212 | "pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm", | 213 | "pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm", |
213 | "psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm", | 214 | "psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm", |
214 | nil,nil,nil,nil, | 215 | "||permilpsXrvm","||permilpdXrvm",nil,nil, |
215 | --1x | 216 | --1x |
216 | "||pblendvbXrma",nil,nil,nil, | 217 | "||pblendvbXrma",nil,nil,nil, |
217 | "||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm", | 218 | "||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm", |
218 | nil,nil,nil,nil, | 219 | "||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil, |
219 | "pabsbPrm","pabswPrm","pabsdPrm",nil, | 220 | "pabsbPrm","pabswPrm","pabsdPrm",nil, |
220 | --2x | 221 | --2x |
221 | "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", | 222 | "||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", |
222 | "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, | 223 | "||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, |
223 | "||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm", | 224 | "||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm", |
224 | nil,nil,nil,nil, | 225 | "||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr", |
225 | --3x | 226 | --3x |
226 | "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", | 227 | "||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", |
227 | "||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm", | 228 | "||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm", |
228 | "||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm", | 229 | "||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm", |
229 | "||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm", | 230 | "||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm", |
230 | --4x | 231 | --4x |
231 | "||pmulddXrm","||phminposuwXrm", | 232 | "||pmulddXrvm","||phminposuwXrm",nil,nil, |
233 | nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm", | ||
234 | --5x | ||
235 | [0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm", | ||
236 | [0x5a] = "||broadcasti128XrlXm", | ||
237 | --7x | ||
238 | [0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm", | ||
239 | --8x | ||
240 | [0x8c] = "||pmaskmovXrvVSm", | ||
241 | [0x8e] = "||pmaskmovVSmXvr", | ||
242 | --Dx | ||
243 | [0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm", | ||
244 | [0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm", | ||
232 | --Fx | 245 | --Fx |
233 | [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", | 246 | [0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", |
247 | [0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv", | ||
234 | }, | 248 | }, |
235 | 249 | ||
236 | ["3a"] = { -- [66] 0f 3a xx | 250 | ["3a"] = { -- [66] 0f 3a xx |
237 | --0x | 251 | --0x |
238 | [0x00]=nil,nil,nil,nil,nil,nil,nil,nil, | 252 | [0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil, |
239 | "||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu", | 253 | "||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil, |
240 | "||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu", | 254 | "||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu", |
255 | "||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu", | ||
241 | --1x | 256 | --1x |
242 | nil,nil,nil,nil, | 257 | nil,nil,nil,nil, |
243 | "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", | 258 | "||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", |
244 | nil,nil,nil,nil,nil,nil,nil,nil, | 259 | "||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil, |
260 | nil,nil,nil,nil, | ||
245 | --2x | 261 | --2x |
246 | "||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil, | 262 | "||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil, |
263 | --3x | ||
264 | [0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru", | ||
247 | --4x | 265 | --4x |
248 | [0x40] = "||dppsXrmu", | 266 | [0x40] = "||dppsXrvmu", |
249 | [0x41] = "||dppdXrmu", | 267 | [0x41] = "||dppdXrvmu", |
250 | [0x42] = "||mpsadbwXrmu", | 268 | [0x42] = "||mpsadbwXrvmu", |
269 | [0x44] = "||pclmulqdqXrvmu", | ||
270 | [0x46] = "||perm2i128Xrvmu", | ||
271 | [0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb", | ||
272 | [0x4c] = "||pblendvbXrvmb", | ||
251 | --6x | 273 | --6x |
252 | [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", | 274 | [0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", |
253 | [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", | 275 | [0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", |
276 | [0xdf] = "||aeskeygenassistXrmu", | ||
277 | --Fx | ||
278 | [0xf0] = "||| rorxVrmu", | ||
254 | }, | 279 | }, |
255 | } | 280 | } |
256 | 281 | ||
@@ -354,17 +379,19 @@ local map_regs = { | |||
354 | "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! | 379 | "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! |
355 | X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", | 380 | X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", |
356 | "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, | 381 | "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, |
382 | Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", | ||
383 | "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" }, | ||
357 | } | 384 | } |
358 | local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } | 385 | local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } |
359 | 386 | ||
360 | -- Maps for size names. | 387 | -- Maps for size names. |
361 | local map_sz2n = { | 388 | local map_sz2n = { |
362 | B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, | 389 | B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32, |
363 | } | 390 | } |
364 | local map_sz2prefix = { | 391 | local map_sz2prefix = { |
365 | B = "byte", W = "word", D = "dword", | 392 | B = "byte", W = "word", D = "dword", |
366 | Q = "qword", | 393 | Q = "qword", |
367 | M = "qword", X = "xword", | 394 | M = "qword", X = "xword", Y = "yword", |
368 | F = "dword", G = "qword", -- No need for sizes/register names for these two. | 395 | F = "dword", G = "qword", -- No need for sizes/register names for these two. |
369 | } | 396 | } |
370 | 397 | ||
@@ -387,10 +414,13 @@ local function putop(ctx, text, operands) | |||
387 | if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end | 414 | if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end |
388 | if ctx.rex then | 415 | if ctx.rex then |
389 | local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. | 416 | local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. |
390 | (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "") | 417 | (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "").. |
391 | if t ~= "" then text = "rex."..t.." "..text end | 418 | (ctx.vexl and "l" or "") |
419 | if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end | ||
420 | if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "") | ||
421 | elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end | ||
392 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false | 422 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false |
393 | ctx.rex = false | 423 | ctx.rex = false; ctx.vexl = false; ctx.vexv = false |
394 | end | 424 | end |
395 | if ctx.seg then | 425 | if ctx.seg then |
396 | local text2, n = gsub(text, "%[", "["..ctx.seg..":") | 426 | local text2, n = gsub(text, "%[", "["..ctx.seg..":") |
@@ -405,6 +435,7 @@ local function putop(ctx, text, operands) | |||
405 | end | 435 | end |
406 | ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) | 436 | ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) |
407 | ctx.mrm = false | 437 | ctx.mrm = false |
438 | ctx.vexv = false | ||
408 | ctx.start = pos | 439 | ctx.start = pos |
409 | ctx.imm = nil | 440 | ctx.imm = nil |
410 | end | 441 | end |
@@ -413,7 +444,7 @@ end | |||
413 | local function clearprefixes(ctx) | 444 | local function clearprefixes(ctx) |
414 | ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false | 445 | ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false |
415 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false | 446 | ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false |
416 | ctx.rex = false; ctx.a32 = false | 447 | ctx.rex = false; ctx.a32 = false; ctx.vexl = false |
417 | end | 448 | end |
418 | 449 | ||
419 | -- Fallback for incomplete opcodes at the end. | 450 | -- Fallback for incomplete opcodes at the end. |
@@ -450,9 +481,9 @@ end | |||
450 | -- Process pattern string and generate the operands. | 481 | -- Process pattern string and generate the operands. |
451 | local function putpat(ctx, name, pat) | 482 | local function putpat(ctx, name, pat) |
452 | local operands, regs, sz, mode, sp, rm, sc, rx, sdisp | 483 | local operands, regs, sz, mode, sp, rm, sc, rx, sdisp |
453 | local code, pos, stop = ctx.code, ctx.pos, ctx.stop | 484 | local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl |
454 | 485 | ||
455 | -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz | 486 | -- Chars used: 1DFGIMPQRSTUVWXYabcdfgijlmoprstuvwxyz |
456 | for p in gmatch(pat, ".") do | 487 | for p in gmatch(pat, ".") do |
457 | local x = nil | 488 | local x = nil |
458 | if p == "V" or p == "U" then | 489 | if p == "V" or p == "U" then |
@@ -467,11 +498,13 @@ local function putpat(ctx, name, pat) | |||
467 | elseif p == "B" then | 498 | elseif p == "B" then |
468 | sz = "B" | 499 | sz = "B" |
469 | regs = ctx.rex and map_regs.B64 or map_regs.B | 500 | regs = ctx.rex and map_regs.B64 or map_regs.B |
470 | elseif match(p, "[WDQMXFG]") then | 501 | elseif match(p, "[WDQMXYFG]") then |
471 | sz = p | 502 | sz = p |
503 | if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end | ||
472 | regs = map_regs[sz] | 504 | regs = map_regs[sz] |
473 | elseif p == "P" then | 505 | elseif p == "P" then |
474 | sz = ctx.o16 and "X" or "M"; ctx.o16 = false | 506 | sz = ctx.o16 and "X" or "M"; ctx.o16 = false |
507 | if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end | ||
475 | regs = map_regs[sz] | 508 | regs = map_regs[sz] |
476 | elseif p == "S" then | 509 | elseif p == "S" then |
477 | name = name..lower(sz) | 510 | name = name..lower(sz) |
@@ -484,6 +517,10 @@ local function putpat(ctx, name, pat) | |||
484 | local imm = getimm(ctx, pos, 1); if not imm then return end | 517 | local imm = getimm(ctx, pos, 1); if not imm then return end |
485 | x = format("0x%02x", imm) | 518 | x = format("0x%02x", imm) |
486 | pos = pos+1 | 519 | pos = pos+1 |
520 | elseif p == "b" then | ||
521 | local imm = getimm(ctx, pos, 1); if not imm then return end | ||
522 | x = regs[imm/16+1] | ||
523 | pos = pos+1 | ||
487 | elseif p == "w" then | 524 | elseif p == "w" then |
488 | local imm = getimm(ctx, pos, 2); if not imm then return end | 525 | local imm = getimm(ctx, pos, 2); if not imm then return end |
489 | x = format("0x%x", imm) | 526 | x = format("0x%x", imm) |
@@ -532,7 +569,7 @@ local function putpat(ctx, name, pat) | |||
532 | local lo = imm % 0x1000000 | 569 | local lo = imm % 0x1000000 |
533 | x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) | 570 | x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) |
534 | else | 571 | else |
535 | x = format("0x%08x", imm) | 572 | x = "0x"..tohex(imm) |
536 | end | 573 | end |
537 | elseif p == "R" then | 574 | elseif p == "R" then |
538 | local r = byte(code, pos-1, pos-1)%8 | 575 | local r = byte(code, pos-1, pos-1)%8 |
@@ -616,8 +653,13 @@ local function putpat(ctx, name, pat) | |||
616 | else | 653 | else |
617 | x = "CR"..sp | 654 | x = "CR"..sp |
618 | end | 655 | end |
656 | elseif p == "v" then | ||
657 | if ctx.vexv then | ||
658 | x = regs[ctx.vexv+1]; ctx.vexv = false | ||
659 | end | ||
619 | elseif p == "y" then x = "DR"..sp | 660 | elseif p == "y" then x = "DR"..sp |
620 | elseif p == "z" then x = "TR"..sp | 661 | elseif p == "z" then x = "TR"..sp |
662 | elseif p == "l" then vexl = false | ||
621 | elseif p == "t" then | 663 | elseif p == "t" then |
622 | else | 664 | else |
623 | error("bad pattern `"..pat.."'") | 665 | error("bad pattern `"..pat.."'") |
@@ -692,7 +734,7 @@ map_act = { | |||
692 | B = putpat, W = putpat, D = putpat, Q = putpat, | 734 | B = putpat, W = putpat, D = putpat, Q = putpat, |
693 | V = putpat, U = putpat, T = putpat, | 735 | V = putpat, U = putpat, T = putpat, |
694 | M = putpat, X = putpat, P = putpat, | 736 | M = putpat, X = putpat, P = putpat, |
695 | F = putpat, G = putpat, | 737 | F = putpat, G = putpat, Y = putpat, |
696 | 738 | ||
697 | -- Collect prefixes. | 739 | -- Collect prefixes. |
698 | [":"] = function(ctx, name, pat) | 740 | [":"] = function(ctx, name, pat) |
@@ -753,15 +795,68 @@ map_act = { | |||
753 | 795 | ||
754 | -- REX prefix. | 796 | -- REX prefix. |
755 | rex = function(ctx, name, pat) | 797 | rex = function(ctx, name, pat) |
756 | if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed. | 798 | if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed. |
757 | for p in gmatch(pat, ".") do ctx["rex"..p] = true end | 799 | for p in gmatch(pat, ".") do ctx["rex"..p] = true end |
758 | ctx.rex = true | 800 | ctx.rex = "rex" |
801 | end, | ||
802 | |||
803 | -- VEX prefix. | ||
804 | vex = function(ctx, name, pat) | ||
805 | if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed. | ||
806 | ctx.rex = "vex" | ||
807 | local pos = ctx.pos | ||
808 | if ctx.mrm then | ||
809 | ctx.mrm = nil | ||
810 | pos = pos-1 | ||
811 | end | ||
812 | local b = byte(ctx.code, pos, pos) | ||
813 | if not b then return incomplete(ctx) end | ||
814 | pos = pos+1 | ||
815 | if b < 128 then ctx.rexr = true end | ||
816 | local m = 1 | ||
817 | if pat == "3" then | ||
818 | m = b%32; b = (b-m)/32 | ||
819 | local nb = b%2; b = (b-nb)/2 | ||
820 | if nb == 0 then ctx.rexb = true end | ||
821 | local nx = b%2; b = (b-nx)/2 | ||
822 | if nx == 0 then ctx.rexx = true end | ||
823 | b = byte(ctx.code, pos, pos) | ||
824 | if not b then return incomplete(ctx) end | ||
825 | pos = pos+1 | ||
826 | if b >= 128 then ctx.rexw = true end | ||
827 | end | ||
828 | ctx.pos = pos | ||
829 | local map | ||
830 | if m == 1 then map = map_opc2 | ||
831 | elseif m == 2 then map = map_opc3["38"] | ||
832 | elseif m == 3 then map = map_opc3["3a"] | ||
833 | else return unknown(ctx) end | ||
834 | local p = b%4; b = (b-p)/4 | ||
835 | if p == 1 then ctx.o16 = "o16" | ||
836 | elseif p == 2 then ctx.rep = "rep" | ||
837 | elseif p == 3 then ctx.rep = "repne" end | ||
838 | local l = b%2; b = (b-l)/2 | ||
839 | if l ~= 0 then ctx.vexl = true end | ||
840 | ctx.vexv = (-1-b)%16 | ||
841 | return dispatchmap(ctx, map) | ||
759 | end, | 842 | end, |
760 | 843 | ||
761 | -- Special case for nop with REX prefix. | 844 | -- Special case for nop with REX prefix. |
762 | nop = function(ctx, name, pat) | 845 | nop = function(ctx, name, pat) |
763 | return dispatch(ctx, ctx.rex and pat or "nop") | 846 | return dispatch(ctx, ctx.rex and pat or "nop") |
764 | end, | 847 | end, |
848 | |||
849 | -- Special case for 0F 77. | ||
850 | emms = function(ctx, name, pat) | ||
851 | if ctx.rex ~= "vex" then | ||
852 | return putop(ctx, "emms") | ||
853 | elseif ctx.vexl then | ||
854 | ctx.vexl = false | ||
855 | return putop(ctx, "zeroall") | ||
856 | else | ||
857 | return putop(ctx, "zeroupper") | ||
858 | end | ||
859 | end, | ||
765 | } | 860 | } |
766 | 861 | ||
767 | ------------------------------------------------------------------------------ | 862 | ------------------------------------------------------------------------------ |
@@ -782,7 +877,7 @@ local function disass_block(ctx, ofs, len) | |||
782 | end | 877 | end |
783 | 878 | ||
784 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). | 879 | -- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). |
785 | local function create_(code, addr, out) | 880 | local function create(code, addr, out) |
786 | local ctx = {} | 881 | local ctx = {} |
787 | ctx.code = code | 882 | ctx.code = code |
788 | ctx.addr = (addr or 0) - 1 | 883 | ctx.addr = (addr or 0) - 1 |
@@ -796,8 +891,8 @@ local function create_(code, addr, out) | |||
796 | return ctx | 891 | return ctx |
797 | end | 892 | end |
798 | 893 | ||
799 | local function create64_(code, addr, out) | 894 | local function create64(code, addr, out) |
800 | local ctx = create_(code, addr, out) | 895 | local ctx = create(code, addr, out) |
801 | ctx.x64 = true | 896 | ctx.x64 = true |
802 | ctx.map1 = map_opc1_64 | 897 | ctx.map1 = map_opc1_64 |
803 | ctx.aregs = map_regs.Q | 898 | ctx.aregs = map_regs.Q |
@@ -805,32 +900,32 @@ local function create64_(code, addr, out) | |||
805 | end | 900 | end |
806 | 901 | ||
807 | -- Simple API: disassemble code (a string) at address and output via out. | 902 | -- Simple API: disassemble code (a string) at address and output via out. |
808 | local function disass_(code, addr, out) | 903 | local function disass(code, addr, out) |
809 | create_(code, addr, out):disass() | 904 | create(code, addr, out):disass() |
810 | end | 905 | end |
811 | 906 | ||
812 | local function disass64_(code, addr, out) | 907 | local function disass64(code, addr, out) |
813 | create64_(code, addr, out):disass() | 908 | create64(code, addr, out):disass() |
814 | end | 909 | end |
815 | 910 | ||
816 | -- Return register name for RID. | 911 | -- Return register name for RID. |
817 | local function regname_(r) | 912 | local function regname(r) |
818 | if r < 8 then return map_regs.D[r+1] end | 913 | if r < 8 then return map_regs.D[r+1] end |
819 | return map_regs.X[r-7] | 914 | return map_regs.X[r-7] |
820 | end | 915 | end |
821 | 916 | ||
822 | local function regname64_(r) | 917 | local function regname64(r) |
823 | if r < 16 then return map_regs.Q[r+1] end | 918 | if r < 16 then return map_regs.Q[r+1] end |
824 | return map_regs.X[r-15] | 919 | return map_regs.X[r-15] |
825 | end | 920 | end |
826 | 921 | ||
827 | -- Public module functions. | 922 | -- Public module functions. |
828 | module(...) | 923 | return { |
829 | 924 | create = create, | |
830 | create = create_ | 925 | create64 = create64, |
831 | create64 = create64_ | 926 | disass = disass, |
832 | disass = disass_ | 927 | disass64 = disass64, |
833 | disass64 = disass64_ | 928 | regname = regname, |
834 | regname = regname_ | 929 | regname64 = regname64 |
835 | regname64 = regname64_ | 930 | } |
836 | 931 | ||
diff --git a/src/jit/dump.lua b/src/jit/dump.lua index ec5f8276..9a722f73 100644 --- a/src/jit/dump.lua +++ b/src/jit/dump.lua | |||
@@ -55,7 +55,7 @@ | |||
55 | 55 | ||
56 | -- Cache some library functions and objects. | 56 | -- Cache some library functions and objects. |
57 | local jit = require("jit") | 57 | local jit = require("jit") |
58 | assert(jit.version_num == 20004, "LuaJIT core/library version mismatch") | 58 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") |
59 | local jutil = require("jit.util") | 59 | local jutil = require("jit.util") |
60 | local vmdef = require("jit.vmdef") | 60 | local vmdef = require("jit.vmdef") |
61 | local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc | 61 | local funcinfo, funcbc = jutil.funcinfo, jutil.funcbc |
@@ -63,7 +63,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek | |||
63 | local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap | 63 | local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap |
64 | local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr | 64 | local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr |
65 | local bit = require("bit") | 65 | local bit = require("bit") |
66 | local band, shl, shr = bit.band, bit.lshift, bit.rshift | 66 | local band, shl, shr, tohex = bit.band, bit.lshift, bit.rshift, bit.tohex |
67 | local sub, gsub, format = string.sub, string.gsub, string.format | 67 | local sub, gsub, format = string.sub, string.gsub, string.format |
68 | local byte, char, rep = string.byte, string.char, string.rep | 68 | local byte, char, rep = string.byte, string.char, string.rep |
69 | local type, tostring = type, tostring | 69 | local type, tostring = type, tostring |
@@ -91,6 +91,7 @@ local function fillsymtab_tr(tr, nexit) | |||
91 | end | 91 | end |
92 | for i=0,nexit-1 do | 92 | for i=0,nexit-1 do |
93 | local addr = traceexitstub(tr, i) | 93 | local addr = traceexitstub(tr, i) |
94 | if addr < 0 then addr = addr + 2^32 end | ||
94 | t[addr] = tostring(i) | 95 | t[addr] = tostring(i) |
95 | end | 96 | end |
96 | local addr = traceexitstub(tr, nexit) | 97 | local addr = traceexitstub(tr, nexit) |
@@ -104,7 +105,10 @@ local function fillsymtab(tr, nexit) | |||
104 | local ircall = vmdef.ircall | 105 | local ircall = vmdef.ircall |
105 | for i=0,#ircall do | 106 | for i=0,#ircall do |
106 | local addr = ircalladdr(i) | 107 | local addr = ircalladdr(i) |
107 | if addr ~= 0 then t[addr] = ircall[i] end | 108 | if addr ~= 0 then |
109 | if addr < 0 then addr = addr + 2^32 end | ||
110 | t[addr] = ircall[i] | ||
111 | end | ||
108 | end | 112 | end |
109 | end | 113 | end |
110 | if nexitsym == 1000000 then -- Per-trace exit stubs. | 114 | if nexitsym == 1000000 then -- Per-trace exit stubs. |
@@ -118,6 +122,7 @@ local function fillsymtab(tr, nexit) | |||
118 | nexit = 1000000 | 122 | nexit = 1000000 |
119 | break | 123 | break |
120 | end | 124 | end |
125 | if addr < 0 then addr = addr + 2^32 end | ||
121 | t[addr] = tostring(i) | 126 | t[addr] = tostring(i) |
122 | end | 127 | end |
123 | nexitsym = nexit | 128 | nexitsym = nexit |
@@ -136,6 +141,7 @@ local function dump_mcode(tr) | |||
136 | local mcode, addr, loop = tracemc(tr) | 141 | local mcode, addr, loop = tracemc(tr) |
137 | if not mcode then return end | 142 | if not mcode then return end |
138 | if not disass then disass = require("jit.dis_"..jit.arch) end | 143 | if not disass then disass = require("jit.dis_"..jit.arch) end |
144 | if addr < 0 then addr = addr + 2^32 end | ||
139 | out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") | 145 | out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") |
140 | local ctx = disass.create(mcode, addr, dumpwrite) | 146 | local ctx = disass.create(mcode, addr, dumpwrite) |
141 | ctx.hexdump = 0 | 147 | ctx.hexdump = 0 |
@@ -270,8 +276,7 @@ local litname = { | |||
270 | ["CONV "] = setmetatable({}, { __index = function(t, mode) | 276 | ["CONV "] = setmetatable({}, { __index = function(t, mode) |
271 | local s = irtype[band(mode, 31)] | 277 | local s = irtype[band(mode, 31)] |
272 | s = irtype[band(shr(mode, 5), 31)].."."..s | 278 | s = irtype[band(shr(mode, 5), 31)].."."..s |
273 | if band(mode, 0x400) ~= 0 then s = s.." trunc" | 279 | if band(mode, 0x800) ~= 0 then s = s.." sext" end |
274 | elseif band(mode, 0x800) ~= 0 then s = s.." sext" end | ||
275 | local c = shr(mode, 14) | 280 | local c = shr(mode, 14) |
276 | if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end | 281 | if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end |
277 | t[mode] = s | 282 | t[mode] = s |
@@ -280,6 +285,8 @@ local litname = { | |||
280 | ["FLOAD "] = vmdef.irfield, | 285 | ["FLOAD "] = vmdef.irfield, |
281 | ["FREF "] = vmdef.irfield, | 286 | ["FREF "] = vmdef.irfield, |
282 | ["FPMATH"] = vmdef.irfpm, | 287 | ["FPMATH"] = vmdef.irfpm, |
288 | ["BUFHDR"] = { [0] = "RESET", "APPEND" }, | ||
289 | ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" }, | ||
283 | } | 290 | } |
284 | 291 | ||
285 | local function ctlsub(c) | 292 | local function ctlsub(c) |
@@ -608,7 +615,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...) | |||
608 | end | 615 | end |
609 | else | 616 | else |
610 | for i=1,ngpr do | 617 | for i=1,ngpr do |
611 | out:write(format(" %08x", regs[i])) | 618 | out:write(" ", tohex(regs[i])) |
612 | if i % 8 == 0 then out:write("\n") end | 619 | if i % 8 == 0 then out:write("\n") end |
613 | end | 620 | end |
614 | end | 621 | end |
@@ -692,9 +699,9 @@ local function dumpon(opt, outfile) | |||
692 | end | 699 | end |
693 | 700 | ||
694 | -- Public module functions. | 701 | -- Public module functions. |
695 | module(...) | 702 | return { |
696 | 703 | on = dumpon, | |
697 | on = dumpon | 704 | off = dumpoff, |
698 | off = dumpoff | 705 | start = dumpon -- For -j command line option. |
699 | start = dumpon -- For -j command line option. | 706 | } |
700 | 707 | ||
diff --git a/src/jit/p.lua b/src/jit/p.lua new file mode 100644 index 00000000..5323728b --- /dev/null +++ b/src/jit/p.lua | |||
@@ -0,0 +1,310 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT profiler. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2016 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- | ||
8 | -- This module is a simple command line interface to the built-in | ||
9 | -- low-overhead profiler of LuaJIT. | ||
10 | -- | ||
11 | -- The lower-level API of the profiler is accessible via the "jit.profile" | ||
12 | -- module or the luaJIT_profile_* C API. | ||
13 | -- | ||
14 | -- Example usage: | ||
15 | -- | ||
16 | -- luajit -jp myapp.lua | ||
17 | -- luajit -jp=s myapp.lua | ||
18 | -- luajit -jp=-s myapp.lua | ||
19 | -- luajit -jp=vl myapp.lua | ||
20 | -- luajit -jp=G,profile.txt myapp.lua | ||
21 | -- | ||
22 | -- The following dump features are available: | ||
23 | -- | ||
24 | -- f Stack dump: function name, otherwise module:line. Default mode. | ||
25 | -- F Stack dump: ditto, but always prepend module. | ||
26 | -- l Stack dump: module:line. | ||
27 | -- <number> stack dump depth (callee < caller). Default: 1. | ||
28 | -- -<number> Inverse stack dump depth (caller > callee). | ||
29 | -- s Split stack dump after first stack level. Implies abs(depth) >= 2. | ||
30 | -- p Show full path for module names. | ||
31 | -- v Show VM states. Can be combined with stack dumps, e.g. vf or fv. | ||
32 | -- z Show zones. Can be combined with stack dumps, e.g. zf or fz. | ||
33 | -- r Show raw sample counts. Default: show percentages. | ||
34 | -- a Annotate excerpts from source code files. | ||
35 | -- A Annotate complete source code files. | ||
36 | -- G Produce raw output suitable for graphical tools (e.g. flame graphs). | ||
37 | -- m<number> Minimum sample percentage to be shown. Default: 3. | ||
38 | -- i<number> Sampling interval in milliseconds. Default: 10. | ||
39 | -- | ||
40 | ---------------------------------------------------------------------------- | ||
41 | |||
42 | -- Cache some library functions and objects. | ||
43 | local jit = require("jit") | ||
44 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") | ||
45 | local profile = require("jit.profile") | ||
46 | local vmdef = require("jit.vmdef") | ||
47 | local math = math | ||
48 | local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor | ||
49 | local sort, format = table.sort, string.format | ||
50 | local stdout = io.stdout | ||
51 | local zone -- Load jit.zone module on demand. | ||
52 | |||
53 | -- Output file handle. | ||
54 | local out | ||
55 | |||
56 | ------------------------------------------------------------------------------ | ||
57 | |||
58 | local prof_ud | ||
59 | local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth | ||
60 | local prof_ann, prof_count1, prof_count2, prof_samples | ||
61 | |||
62 | local map_vmmode = { | ||
63 | N = "Compiled", | ||
64 | I = "Interpreted", | ||
65 | C = "C code", | ||
66 | G = "Garbage Collector", | ||
67 | J = "JIT Compiler", | ||
68 | } | ||
69 | |||
70 | -- Profiler callback. | ||
71 | local function prof_cb(th, samples, vmmode) | ||
72 | prof_samples = prof_samples + samples | ||
73 | local key_stack, key_stack2, key_state | ||
74 | -- Collect keys for sample. | ||
75 | if prof_states then | ||
76 | if prof_states == "v" then | ||
77 | key_state = map_vmmode[vmmode] or vmmode | ||
78 | else | ||
79 | key_state = zone:get() or "(none)" | ||
80 | end | ||
81 | end | ||
82 | if prof_fmt then | ||
83 | key_stack = profile.dumpstack(th, prof_fmt, prof_depth) | ||
84 | key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x) | ||
85 | return vmdef.ffnames[tonumber(x)] | ||
86 | end) | ||
87 | if prof_split == 2 then | ||
88 | local k1, k2 = key_stack:match("(.-) [<>] (.*)") | ||
89 | if k2 then key_stack, key_stack2 = k1, k2 end | ||
90 | elseif prof_split == 3 then | ||
91 | key_stack2 = profile.dumpstack(th, "l", 1) | ||
92 | end | ||
93 | end | ||
94 | -- Order keys. | ||
95 | local k1, k2 | ||
96 | if prof_split == 1 then | ||
97 | if key_state then | ||
98 | k1 = key_state | ||
99 | if key_stack then k2 = key_stack end | ||
100 | end | ||
101 | elseif key_stack then | ||
102 | k1 = key_stack | ||
103 | if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end | ||
104 | end | ||
105 | -- Coalesce samples in one or two levels. | ||
106 | if k1 then | ||
107 | local t1 = prof_count1 | ||
108 | t1[k1] = (t1[k1] or 0) + samples | ||
109 | if k2 then | ||
110 | local t2 = prof_count2 | ||
111 | local t3 = t2[k1] | ||
112 | if not t3 then t3 = {}; t2[k1] = t3 end | ||
113 | t3[k2] = (t3[k2] or 0) + samples | ||
114 | end | ||
115 | end | ||
116 | end | ||
117 | |||
118 | ------------------------------------------------------------------------------ | ||
119 | |||
120 | -- Show top N list. | ||
121 | local function prof_top(count1, count2, samples, indent) | ||
122 | local t, n = {}, 0 | ||
123 | for k, v in pairs(count1) do | ||
124 | n = n + 1 | ||
125 | t[n] = k | ||
126 | end | ||
127 | sort(t, function(a, b) return count1[a] > count1[b] end) | ||
128 | for i=1,n do | ||
129 | local k = t[i] | ||
130 | local v = count1[k] | ||
131 | local pct = floor(v*100/samples + 0.5) | ||
132 | if pct < prof_min then break end | ||
133 | if not prof_raw then | ||
134 | out:write(format("%s%2d%% %s\n", indent, pct, k)) | ||
135 | elseif prof_raw == "r" then | ||
136 | out:write(format("%s%5d %s\n", indent, v, k)) | ||
137 | else | ||
138 | out:write(format("%s %d\n", k, v)) | ||
139 | end | ||
140 | if count2 then | ||
141 | local r = count2[k] | ||
142 | if r then | ||
143 | prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and " -- " or | ||
144 | (prof_depth < 0 and " -> " or " <- ")) | ||
145 | end | ||
146 | end | ||
147 | end | ||
148 | end | ||
149 | |||
150 | -- Annotate source code | ||
151 | local function prof_annotate(count1, samples) | ||
152 | local files = {} | ||
153 | local ms = 0 | ||
154 | for k, v in pairs(count1) do | ||
155 | local pct = floor(v*100/samples + 0.5) | ||
156 | ms = math.max(ms, v) | ||
157 | if pct >= prof_min then | ||
158 | local file, line = k:match("^(.*):(%d+)$") | ||
159 | local fl = files[file] | ||
160 | if not fl then fl = {}; files[file] = fl; files[#files+1] = file end | ||
161 | line = tonumber(line) | ||
162 | fl[line] = prof_raw and v or pct | ||
163 | end | ||
164 | end | ||
165 | sort(files) | ||
166 | local fmtv, fmtn = " %3d%% | %s\n", " | %s\n" | ||
167 | if prof_raw then | ||
168 | local n = math.max(5, math.ceil(math.log10(ms))) | ||
169 | fmtv = "%"..n.."d | %s\n" | ||
170 | fmtn = (" "):rep(n).." | %s\n" | ||
171 | end | ||
172 | local ann = prof_ann | ||
173 | for _, file in ipairs(files) do | ||
174 | local f0 = file:byte() | ||
175 | if f0 == 40 or f0 == 91 then | ||
176 | out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file)) | ||
177 | break | ||
178 | end | ||
179 | local fp, err = io.open(file) | ||
180 | if not fp then | ||
181 | out:write(format("====== ERROR: %s: %s\n", file, err)) | ||
182 | break | ||
183 | end | ||
184 | out:write(format("\n====== %s ======\n", file)) | ||
185 | local fl = files[file] | ||
186 | local n, show = 1, false | ||
187 | if ann ~= 0 then | ||
188 | for i=1,ann do | ||
189 | if fl[i] then show = true; out:write("@@ 1 @@\n"); break end | ||
190 | end | ||
191 | end | ||
192 | for line in fp:lines() do | ||
193 | if line:byte() == 27 then | ||
194 | out:write("[Cannot annotate bytecode file]\n") | ||
195 | break | ||
196 | end | ||
197 | local v = fl[n] | ||
198 | if ann ~= 0 then | ||
199 | local v2 = fl[n+ann] | ||
200 | if show then | ||
201 | if v2 then show = n+ann elseif v then show = n | ||
202 | elseif show+ann < n then show = false end | ||
203 | elseif v2 then | ||
204 | show = n+ann | ||
205 | out:write(format("@@ %d @@\n", n)) | ||
206 | end | ||
207 | if not show then goto next end | ||
208 | end | ||
209 | if v then | ||
210 | out:write(format(fmtv, v, line)) | ||
211 | else | ||
212 | out:write(format(fmtn, line)) | ||
213 | end | ||
214 | ::next:: | ||
215 | n = n + 1 | ||
216 | end | ||
217 | fp:close() | ||
218 | end | ||
219 | end | ||
220 | |||
221 | ------------------------------------------------------------------------------ | ||
222 | |||
223 | -- Finish profiling and dump result. | ||
224 | local function prof_finish() | ||
225 | if prof_ud then | ||
226 | profile.stop() | ||
227 | local samples = prof_samples | ||
228 | if samples == 0 then | ||
229 | if prof_raw ~= true then out:write("[No samples collected]\n") end | ||
230 | return | ||
231 | end | ||
232 | if prof_ann then | ||
233 | prof_annotate(prof_count1, samples) | ||
234 | else | ||
235 | prof_top(prof_count1, prof_count2, samples, "") | ||
236 | end | ||
237 | prof_count1 = nil | ||
238 | prof_count2 = nil | ||
239 | prof_ud = nil | ||
240 | end | ||
241 | end | ||
242 | |||
243 | -- Start profiling. | ||
244 | local function prof_start(mode) | ||
245 | local interval = "" | ||
246 | mode = mode:gsub("i%d*", function(s) interval = s; return "" end) | ||
247 | prof_min = 3 | ||
248 | mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end) | ||
249 | prof_depth = 1 | ||
250 | mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end) | ||
251 | local m = {} | ||
252 | for c in mode:gmatch(".") do m[c] = c end | ||
253 | prof_states = m.z or m.v | ||
254 | if prof_states == "z" then zone = require("jit.zone") end | ||
255 | local scope = m.l or m.f or m.F or (prof_states and "" or "f") | ||
256 | local flags = (m.p or "") | ||
257 | prof_raw = m.r | ||
258 | if m.s then | ||
259 | prof_split = 2 | ||
260 | if prof_depth == -1 or m["-"] then prof_depth = -2 | ||
261 | elseif prof_depth == 1 then prof_depth = 2 end | ||
262 | elseif mode:find("[fF].*l") then | ||
263 | scope = "l" | ||
264 | prof_split = 3 | ||
265 | else | ||
266 | prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0 | ||
267 | end | ||
268 | prof_ann = m.A and 0 or (m.a and 3) | ||
269 | if prof_ann then | ||
270 | scope = "l" | ||
271 | prof_fmt = "pl" | ||
272 | prof_split = 0 | ||
273 | prof_depth = 1 | ||
274 | elseif m.G and scope ~= "" then | ||
275 | prof_fmt = flags..scope.."Z;" | ||
276 | prof_depth = -100 | ||
277 | prof_raw = true | ||
278 | prof_min = 0 | ||
279 | elseif scope == "" then | ||
280 | prof_fmt = false | ||
281 | else | ||
282 | local sc = prof_split == 3 and m.f or m.F or scope | ||
283 | prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ") | ||
284 | end | ||
285 | prof_count1 = {} | ||
286 | prof_count2 = {} | ||
287 | prof_samples = 0 | ||
288 | profile.start(scope:lower()..interval, prof_cb) | ||
289 | prof_ud = newproxy(true) | ||
290 | getmetatable(prof_ud).__gc = prof_finish | ||
291 | end | ||
292 | |||
293 | ------------------------------------------------------------------------------ | ||
294 | |||
295 | local function start(mode, outfile) | ||
296 | if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end | ||
297 | if outfile then | ||
298 | out = outfile == "-" and stdout or assert(io.open(outfile, "w")) | ||
299 | else | ||
300 | out = stdout | ||
301 | end | ||
302 | prof_start(mode or "f") | ||
303 | end | ||
304 | |||
305 | -- Public module functions. | ||
306 | return { | ||
307 | start = start, -- For -j command line option. | ||
308 | stop = prof_finish | ||
309 | } | ||
310 | |||
diff --git a/src/jit/v.lua b/src/jit/v.lua index d615921c..60c8b05a 100644 --- a/src/jit/v.lua +++ b/src/jit/v.lua | |||
@@ -59,7 +59,7 @@ | |||
59 | 59 | ||
60 | -- Cache some library functions and objects. | 60 | -- Cache some library functions and objects. |
61 | local jit = require("jit") | 61 | local jit = require("jit") |
62 | assert(jit.version_num == 20004, "LuaJIT core/library version mismatch") | 62 | assert(jit.version_num == 20100, "LuaJIT core/library version mismatch") |
63 | local jutil = require("jit.util") | 63 | local jutil = require("jit.util") |
64 | local vmdef = require("jit.vmdef") | 64 | local vmdef = require("jit.vmdef") |
65 | local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo | 65 | local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo |
@@ -116,6 +116,9 @@ local function dump_trace(what, tr, func, pc, otr, oex) | |||
116 | if ltype == "interpreter" then | 116 | if ltype == "interpreter" then |
117 | out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", | 117 | out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", |
118 | tr, startex, startloc)) | 118 | tr, startex, startloc)) |
119 | elseif ltype == "stitch" then | ||
120 | out:write(format("[TRACE %3s %s%s %s %s]\n", | ||
121 | tr, startex, startloc, ltype, fmtfunc(func, pc))) | ||
119 | elseif link == tr or link == 0 then | 122 | elseif link == tr or link == 0 then |
120 | out:write(format("[TRACE %3s %s%s %s]\n", | 123 | out:write(format("[TRACE %3s %s%s %s]\n", |
121 | tr, startex, startloc, ltype)) | 124 | tr, startex, startloc, ltype)) |
@@ -159,9 +162,9 @@ local function dumpon(outfile) | |||
159 | end | 162 | end |
160 | 163 | ||
161 | -- Public module functions. | 164 | -- Public module functions. |
162 | module(...) | 165 | return { |
163 | 166 | on = dumpon, | |
164 | on = dumpon | 167 | off = dumpoff, |
165 | off = dumpoff | 168 | start = dumpon -- For -j command line option. |
166 | start = dumpon -- For -j command line option. | 169 | } |
167 | 170 | ||
diff --git a/src/jit/zone.lua b/src/jit/zone.lua new file mode 100644 index 00000000..f5f9656a --- /dev/null +++ b/src/jit/zone.lua | |||
@@ -0,0 +1,45 @@ | |||
1 | ---------------------------------------------------------------------------- | ||
2 | -- LuaJIT profiler zones. | ||
3 | -- | ||
4 | -- Copyright (C) 2005-2016 Mike Pall. All rights reserved. | ||
5 | -- Released under the MIT license. See Copyright Notice in luajit.h | ||
6 | ---------------------------------------------------------------------------- | ||
7 | -- | ||
8 | -- This module implements a simple hierarchical zone model. | ||
9 | -- | ||
10 | -- Example usage: | ||
11 | -- | ||
12 | -- local zone = require("jit.zone") | ||
13 | -- zone("AI") | ||
14 | -- ... | ||
15 | -- zone("A*") | ||
16 | -- ... | ||
17 | -- print(zone:get()) --> "A*" | ||
18 | -- ... | ||
19 | -- zone() | ||
20 | -- ... | ||
21 | -- print(zone:get()) --> "AI" | ||
22 | -- ... | ||
23 | -- zone() | ||
24 | -- | ||
25 | ---------------------------------------------------------------------------- | ||
26 | |||
27 | local remove = table.remove | ||
28 | |||
29 | return setmetatable({ | ||
30 | flush = function(t) | ||
31 | for i=#t,1,-1 do t[i] = nil end | ||
32 | end, | ||
33 | get = function(t) | ||
34 | return t[#t] | ||
35 | end | ||
36 | }, { | ||
37 | __call = function(t, zone) | ||
38 | if zone then | ||
39 | t[#t+1] = zone | ||
40 | else | ||
41 | return (assert(remove(t), "empty zone stack")) | ||
42 | end | ||
43 | end | ||
44 | }) | ||
45 | |||