aboutsummaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/bc.lua17
-rw-r--r--src/jit/bcsave.lua326
-rw-r--r--src/jit/dis_arm.lua18
-rw-r--r--src/jit/dis_arm64.lua1233
-rw-r--r--src/jit/dis_arm64be.lua12
-rw-r--r--src/jit/dis_mips.lua372
-rw-r--r--src/jit/dis_mips64.lua17
-rw-r--r--src/jit/dis_mips64el.lua17
-rw-r--r--src/jit/dis_mips64r6.lua17
-rw-r--r--src/jit/dis_mips64r6el.lua17
-rw-r--r--src/jit/dis_mipsel.lua15
-rw-r--r--src/jit/dis_ppc.lua18
-rw-r--r--src/jit/dis_x64.lua15
-rw-r--r--src/jit/dis_x86.lua317
-rw-r--r--src/jit/dump.lua83
-rw-r--r--src/jit/p.lua309
-rw-r--r--src/jit/v.lua24
-rw-r--r--src/jit/zone.lua45
18 files changed, 2475 insertions, 397 deletions
diff --git a/src/jit/bc.lua b/src/jit/bc.lua
index f162155d..8014d602 100644
--- a/src/jit/bc.lua
+++ b/src/jit/bc.lua
@@ -178,13 +178,12 @@ local function bcliston(outfile)
178end 178end
179 179
180-- Public module functions. 180-- Public module functions.
181module(...) 181return {
182 182 line = bcline,
183line = bcline 183 dump = bcdump,
184dump = bcdump 184 targets = bctargets,
185targets = bctargets 185 on = bcliston,
186 186 off = bclistoff,
187on = bcliston 187 start = bcliston -- For -j command line option.
188off = bclistoff 188}
189start = bcliston -- For -j command line option.
190 189
diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua
index 11b0b853..7d19cb06 100644
--- a/src/jit/bcsave.lua
+++ b/src/jit/bcsave.lua
@@ -11,12 +11,16 @@
11------------------------------------------------------------------------------ 11------------------------------------------------------------------------------
12 12
13local jit = require("jit") 13local jit = require("jit")
14assert(jit.version_num == 20099, "LuaJIT core/library version mismatch") 14assert(jit.version_num == 20199, "LuaJIT core/library version mismatch")
15local bit = require("bit") 15local bit = require("bit")
16 16
17-- Symbol name prefix for LuaJIT bytecode. 17-- Symbol name prefix for LuaJIT bytecode.
18local LJBC_PREFIX = "luaJIT_BC_" 18local LJBC_PREFIX = "luaJIT_BC_"
19 19
20local type, assert = type, assert
21local format = string.format
22local tremove, tconcat = table.remove, table.concat
23
20------------------------------------------------------------------------------ 24------------------------------------------------------------------------------
21 25
22local function usage() 26local function usage()
@@ -25,15 +29,19 @@ Save LuaJIT bytecode: luajit -b[options] input output
25 -l Only list bytecode. 29 -l Only list bytecode.
26 -s Strip debug info (default). 30 -s Strip debug info (default).
27 -g Keep debug info. 31 -g Keep debug info.
32 -W Generate 32 bit (non-GC64) bytecode.
33 -X Generate 64 bit (GC64) bytecode.
34 -d Generate bytecode in deterministic manner.
28 -n name Set module name (default: auto-detect from input name). 35 -n name Set module name (default: auto-detect from input name).
29 -t type Set output file type (default: auto-detect from output name). 36 -t type Set output file type (default: auto-detect from output name).
30 -a arch Override architecture for object files (default: native). 37 -a arch Override architecture for object files (default: native).
31 -o os Override OS for object files (default: native). 38 -o os Override OS for object files (default: native).
39 -F name Override filename (default: input filename).
32 -e chunk Use chunk string as input. 40 -e chunk Use chunk string as input.
33 -- Stop handling options. 41 -- Stop handling options.
34 - Use stdin as input and/or stdout as output. 42 - Use stdin as input and/or stdout as output.
35 43
36File types: c h obj o raw (default) 44File types: c cc h obj o raw (default)
37]] 45]]
38 os.exit(1) 46 os.exit(1)
39end 47end
@@ -45,10 +53,23 @@ local function check(ok, ...)
45 os.exit(1) 53 os.exit(1)
46end 54end
47 55
48local function readfile(input) 56local function readfile(ctx, input)
49 if type(input) == "function" then return input end 57 if ctx.string then
50 if input == "-" then input = nil end 58 return check(loadstring(input, nil, ctx.mode))
51 return check(loadfile(input)) 59 elseif ctx.filename then
60 local data
61 if input == "-" then
62 data = io.stdin:read("*a")
63 else
64 local fp = assert(io.open(input, "rb"))
65 data = assert(fp:read("*a"))
66 assert(fp:close())
67 end
68 return check(load(data, ctx.filename, ctx.mode))
69 else
70 if input == "-" then input = nil end
71 return check(loadfile(input, ctx.mode))
72 end
52end 73end
53 74
54local function savefile(name, mode) 75local function savefile(name, mode)
@@ -56,15 +77,30 @@ local function savefile(name, mode)
56 return check(io.open(name, mode)) 77 return check(io.open(name, mode))
57end 78end
58 79
80local function set_stdout_binary(ffi)
81 ffi.cdef[[int _setmode(int fd, int mode);]]
82 ffi.C._setmode(1, 0x8000)
83end
84
59------------------------------------------------------------------------------ 85------------------------------------------------------------------------------
60 86
61local map_type = { 87local map_type = {
62 raw = "raw", c = "c", h = "h", o = "obj", obj = "obj", 88 raw = "raw", c = "c", cc = "c", h = "h", o = "obj", obj = "obj",
63} 89}
64 90
65local map_arch = { 91local map_arch = {
66 x86 = true, x64 = true, arm = true, ppc = true, ppcspe = true, 92 x86 = { e = "le", b = 32, m = 3, p = 0x14c, },
67 mips = true, mipsel = true, 93 x64 = { e = "le", b = 64, m = 62, p = 0x8664, },
94 arm = { e = "le", b = 32, m = 40, p = 0x1c0, },
95 arm64 = { e = "le", b = 64, m = 183, p = 0xaa64, },
96 arm64be = { e = "be", b = 64, m = 183, },
97 ppc = { e = "be", b = 32, m = 20, },
98 mips = { e = "be", b = 32, m = 8, f = 0x50001006, },
99 mipsel = { e = "le", b = 32, m = 8, f = 0x50001006, },
100 mips64 = { e = "be", b = 64, m = 8, f = 0x80000007, },
101 mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, },
102 mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, },
103 mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, },
68} 104}
69 105
70local map_os = { 106local map_os = {
@@ -73,33 +109,33 @@ local map_os = {
73} 109}
74 110
75local function checkarg(str, map, err) 111local function checkarg(str, map, err)
76 str = string.lower(str) 112 str = str:lower()
77 local s = check(map[str], "unknown ", err) 113 local s = check(map[str], "unknown ", err)
78 return s == true and str or s 114 return type(s) == "string" and s or str
79end 115end
80 116
81local function detecttype(str) 117local function detecttype(str)
82 local ext = string.match(string.lower(str), "%.(%a+)$") 118 local ext = str:lower():match("%.(%a+)$")
83 return map_type[ext] or "raw" 119 return map_type[ext] or "raw"
84end 120end
85 121
86local function checkmodname(str) 122local function checkmodname(str)
87 check(string.match(str, "^[%w_.%-]+$"), "bad module name") 123 check(str:match("^[%w_.%-]+$"), "bad module name")
88 return string.gsub(str, "[%.%-]", "_") 124 return str:gsub("[%.%-]", "_")
89end 125end
90 126
91local function detectmodname(str) 127local function detectmodname(str)
92 if type(str) == "string" then 128 if type(str) == "string" then
93 local tail = string.match(str, "[^/\\]+$") 129 local tail = str:match("[^/\\]+$")
94 if tail then str = tail end 130 if tail then str = tail end
95 local head = string.match(str, "^(.*)%.[^.]*$") 131 local head = str:match("^(.*)%.[^.]*$")
96 if head then str = head end 132 if head then str = head end
97 str = string.match(str, "^[%w_.%-]+") 133 str = str:match("^[%w_.%-]+")
98 else 134 else
99 str = nil 135 str = nil
100 end 136 end
101 check(str, "cannot derive module name, use -n name") 137 check(str, "cannot derive module name, use -n name")
102 return string.gsub(str, "[%.%-]", "_") 138 return str:gsub("[%.%-]", "_")
103end 139end
104 140
105------------------------------------------------------------------------------ 141------------------------------------------------------------------------------
@@ -111,6 +147,11 @@ local function bcsave_tail(fp, output, s)
111end 147end
112 148
113local function bcsave_raw(output, s) 149local function bcsave_raw(output, s)
150 if output == "-" and jit.os == "Windows" then
151 local ok, ffi = pcall(require, "ffi")
152 check(ok, "FFI library required to write binary file to stdout")
153 set_stdout_binary(ffi)
154 end
114 local fp = savefile(output, "wb") 155 local fp = savefile(output, "wb")
115 bcsave_tail(fp, output, s) 156 bcsave_tail(fp, output, s)
116end 157end
@@ -118,19 +159,21 @@ end
118local function bcsave_c(ctx, output, s) 159local function bcsave_c(ctx, output, s)
119 local fp = savefile(output, "w") 160 local fp = savefile(output, "w")
120 if ctx.type == "c" then 161 if ctx.type == "c" then
121 fp:write(string.format([[ 162 fp:write(format([[
122#ifdef __cplusplus 163#ifdef __cplusplus
123extern "C" 164extern "C"
124#endif 165#endif
125#ifdef _WIN32 166#ifdef _WIN32
126__declspec(dllexport) 167__declspec(dllexport)
168#elif (defined(__ELF__) || defined(__MACH__) || defined(__psp2__)) && !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__))
169__attribute__((visibility("default")))
127#endif 170#endif
128const char %s%s[] = { 171const unsigned char %s%s[] = {
129]], LJBC_PREFIX, ctx.modname)) 172]], LJBC_PREFIX, ctx.modname))
130 else 173 else
131 fp:write(string.format([[ 174 fp:write(format([[
132#define %s%s_SIZE %d 175#define %s%s_SIZE %d
133static const char %s%s[] = { 176static const unsigned char %s%s[] = {
134]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname)) 177]], LJBC_PREFIX, ctx.modname, #s, LJBC_PREFIX, ctx.modname))
135 end 178 end
136 local t, n, m = {}, 0, 0 179 local t, n, m = {}, 0, 0
@@ -138,13 +181,13 @@ static const char %s%s[] = {
138 local b = tostring(string.byte(s, i)) 181 local b = tostring(string.byte(s, i))
139 m = m + #b + 1 182 m = m + #b + 1
140 if m > 78 then 183 if m > 78 then
141 fp:write(table.concat(t, ",", 1, n), ",\n") 184 fp:write(tconcat(t, ",", 1, n), ",\n")
142 n, m = 0, #b + 1 185 n, m = 0, #b + 1
143 end 186 end
144 n = n + 1 187 n = n + 1
145 t[n] = b 188 t[n] = b
146 end 189 end
147 bcsave_tail(fp, output, table.concat(t, ",", 1, n).."\n};\n") 190 bcsave_tail(fp, output, tconcat(t, ",", 1, n).."\n};\n")
148end 191end
149 192
150local function bcsave_elfobj(ctx, output, s, ffi) 193local function bcsave_elfobj(ctx, output, s, ffi)
@@ -199,12 +242,8 @@ typedef struct {
199} ELF64obj; 242} ELF64obj;
200]] 243]]
201 local symname = LJBC_PREFIX..ctx.modname 244 local symname = LJBC_PREFIX..ctx.modname
202 local is64, isbe = false, false 245 local ai = assert(map_arch[ctx.arch])
203 if ctx.arch == "x64" then 246 local is64, isbe = ai.b == 64, ai.e == "be"
204 is64 = true
205 elseif ctx.arch == "ppc" or ctx.arch == "ppcspe" or ctx.arch == "mips" then
206 isbe = true
207 end
208 247
209 -- Handle different host/target endianess. 248 -- Handle different host/target endianess.
210 local function f32(x) return x end 249 local function f32(x) return x end
@@ -237,10 +276,8 @@ typedef struct {
237 hdr.eendian = isbe and 2 or 1 276 hdr.eendian = isbe and 2 or 1
238 hdr.eversion = 1 277 hdr.eversion = 1
239 hdr.type = f16(1) 278 hdr.type = f16(1)
240 hdr.machine = f16(({ x86=3, x64=62, arm=40, ppc=20, ppcspe=20, mips=8, mipsel=8 })[ctx.arch]) 279 hdr.machine = f16(ai.m)
241 if ctx.arch == "mips" or ctx.arch == "mipsel" then 280 hdr.flags = f32(ai.f or 0)
242 hdr.flags = f32(0x50001006)
243 end
244 hdr.version = f32(1) 281 hdr.version = f32(1)
245 hdr.shofs = fofs(ffi.offsetof(o, "sect")) 282 hdr.shofs = fofs(ffi.offsetof(o, "sect"))
246 hdr.ehsize = f16(ffi.sizeof(hdr)) 283 hdr.ehsize = f16(ffi.sizeof(hdr))
@@ -336,12 +373,8 @@ typedef struct {
336} PEobj; 373} PEobj;
337]] 374]]
338 local symname = LJBC_PREFIX..ctx.modname 375 local symname = LJBC_PREFIX..ctx.modname
339 local is64 = false 376 local ai = assert(map_arch[ctx.arch])
340 if ctx.arch == "x86" then 377 local is64 = ai.b == 64
341 symname = "_"..symname
342 elseif ctx.arch == "x64" then
343 is64 = true
344 end
345 local symexport = " /EXPORT:"..symname..",DATA " 378 local symexport = " /EXPORT:"..symname..",DATA "
346 379
347 -- The file format is always little-endian. Swap if the host is big-endian. 380 -- The file format is always little-endian. Swap if the host is big-endian.
@@ -355,7 +388,7 @@ typedef struct {
355 -- Create PE object and fill in header. 388 -- Create PE object and fill in header.
356 local o = ffi.new("PEobj") 389 local o = ffi.new("PEobj")
357 local hdr = o.hdr 390 local hdr = o.hdr
358 hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) 391 hdr.arch = f16(assert(ai.p))
359 hdr.nsects = f16(2) 392 hdr.nsects = f16(2)
360 hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) 393 hdr.symtabofs = f32(ffi.offsetof(o, "sym0"))
361 hdr.nsyms = f32(6) 394 hdr.nsyms = f32(6)
@@ -411,23 +444,11 @@ typedef struct
411typedef struct { 444typedef struct {
412 uint32_t cmd, cmdsize; 445 uint32_t cmd, cmdsize;
413 char segname[16]; 446 char segname[16];
414 uint32_t vmaddr, vmsize, fileoff, filesize;
415 uint32_t maxprot, initprot, nsects, flags;
416} mach_segment_command;
417typedef struct {
418 uint32_t cmd, cmdsize;
419 char segname[16];
420 uint64_t vmaddr, vmsize, fileoff, filesize; 447 uint64_t vmaddr, vmsize, fileoff, filesize;
421 uint32_t maxprot, initprot, nsects, flags; 448 uint32_t maxprot, initprot, nsects, flags;
422} mach_segment_command_64; 449} mach_segment_command_64;
423typedef struct { 450typedef struct {
424 char sectname[16], segname[16]; 451 char sectname[16], segname[16];
425 uint32_t addr, size;
426 uint32_t offset, align, reloff, nreloc, flags;
427 uint32_t reserved1, reserved2;
428} mach_section;
429typedef struct {
430 char sectname[16], segname[16];
431 uint64_t addr, size; 452 uint64_t addr, size;
432 uint32_t offset, align, reloff, nreloc, flags; 453 uint32_t offset, align, reloff, nreloc, flags;
433 uint32_t reserved1, reserved2, reserved3; 454 uint32_t reserved1, reserved2, reserved3;
@@ -438,130 +459,78 @@ typedef struct {
438typedef struct { 459typedef struct {
439 int32_t strx; 460 int32_t strx;
440 uint8_t type, sect; 461 uint8_t type, sect;
441 int16_t desc;
442 uint32_t value;
443} mach_nlist;
444typedef struct {
445 uint32_t strx;
446 uint8_t type, sect;
447 uint16_t desc; 462 uint16_t desc;
448 uint64_t value; 463 uint64_t value;
449} mach_nlist_64; 464} mach_nlist_64;
450typedef struct
451{
452 uint32_t magic, nfat_arch;
453} mach_fat_header;
454typedef struct
455{
456 uint32_t cputype, cpusubtype, offset, size, align;
457} mach_fat_arch;
458typedef struct {
459 struct {
460 mach_header hdr;
461 mach_segment_command seg;
462 mach_section sec;
463 mach_symtab_command sym;
464 } arch[1];
465 mach_nlist sym_entry;
466 uint8_t space[4096];
467} mach_obj;
468typedef struct { 465typedef struct {
469 struct { 466 mach_header_64 hdr;
470 mach_header_64 hdr; 467 mach_segment_command_64 seg;
471 mach_segment_command_64 seg; 468 mach_section_64 sec;
472 mach_section_64 sec; 469 mach_symtab_command sym;
473 mach_symtab_command sym;
474 } arch[1];
475 mach_nlist_64 sym_entry;
476 uint8_t space[4096];
477} mach_obj_64; 470} mach_obj_64;
478typedef struct { 471typedef struct {
479 mach_fat_header fat; 472 mach_nlist_64 sym_entry;
480 mach_fat_arch fat_arch[4];
481 struct {
482 mach_header hdr;
483 mach_segment_command seg;
484 mach_section sec;
485 mach_symtab_command sym;
486 } arch[4];
487 mach_nlist sym_entry;
488 uint8_t space[4096]; 473 uint8_t space[4096];
489} mach_fat_obj; 474} mach_obj_64_tail;
490]] 475]]
491 local symname = '_'..LJBC_PREFIX..ctx.modname 476 local symname = '_'..LJBC_PREFIX..ctx.modname
492 local isfat, is64, align, mobj = false, false, 4, "mach_obj" 477 local cputype, cpusubtype = 0x01000007, 3
493 if ctx.arch == "x64" then 478 if ctx.arch ~= "x64" then
494 is64, align, mobj = true, 8, "mach_obj_64" 479 check(ctx.arch == "arm64", "unsupported architecture for OSX")
495 elseif ctx.arch == "arm" then 480 cputype, cpusubtype = 0x0100000c, 0
496 isfat, mobj = true, "mach_fat_obj"
497 else
498 check(ctx.arch == "x86", "unsupported architecture for OSX")
499 end 481 end
500 local function aligned(v, a) return bit.band(v+a-1, -a) end 482 local function aligned(v, a) return bit.band(v+a-1, -a) end
501 local be32 = bit.bswap -- Mach-O FAT is BE, supported archs are LE.
502 483
503 -- Create Mach-O object and fill in header. 484 -- Create Mach-O object and fill in header.
504 local o = ffi.new(mobj) 485 local o = ffi.new("mach_obj_64")
505 local mach_size = aligned(ffi.offsetof(o, "space")+#symname+2, align) 486 local t = ffi.new("mach_obj_64_tail")
506 local cputype = ({ x86={7}, x64={0x01000007}, arm={7,12,12,12} })[ctx.arch] 487 local ofs_bc = ffi.sizeof(o)
507 local cpusubtype = ({ x86={3}, x64={3}, arm={3,6,9,11} })[ctx.arch] 488 local sz_bc = aligned(#s, 8)
508 if isfat then 489 local ofs_sym = ofs_bc + sz_bc
509 o.fat.magic = be32(0xcafebabe)
510 o.fat.nfat_arch = be32(#cpusubtype)
511 end
512 490
513 -- Fill in sections and symbols. 491 -- Fill in sections and symbols.
514 for i=0,#cpusubtype-1 do 492 o.hdr.magic = 0xfeedfacf
515 local ofs = 0 493 o.hdr.cputype = cputype
516 if isfat then 494 o.hdr.cpusubtype = cpusubtype
517 local a = o.fat_arch[i] 495 o.hdr.filetype = 1
518 a.cputype = be32(cputype[i+1]) 496 o.hdr.ncmds = 2
519 a.cpusubtype = be32(cpusubtype[i+1]) 497 o.hdr.sizeofcmds = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)+ffi.sizeof(o.sym)
520 -- Subsequent slices overlap each other to share data. 498 o.seg.cmd = 0x19
521 ofs = ffi.offsetof(o, "arch") + i*ffi.sizeof(o.arch[0]) 499 o.seg.cmdsize = ffi.sizeof(o.seg)+ffi.sizeof(o.sec)
522 a.offset = be32(ofs) 500 o.seg.vmsize = #s
523 a.size = be32(mach_size-ofs+#s) 501 o.seg.fileoff = ofs_bc
524 end 502 o.seg.filesize = #s
525 local a = o.arch[i] 503 o.seg.maxprot = 1
526 a.hdr.magic = is64 and 0xfeedfacf or 0xfeedface 504 o.seg.initprot = 1
527 a.hdr.cputype = cputype[i+1] 505 o.seg.nsects = 1
528 a.hdr.cpusubtype = cpusubtype[i+1] 506 ffi.copy(o.sec.sectname, "__data")
529 a.hdr.filetype = 1 507 ffi.copy(o.sec.segname, "__DATA")
530 a.hdr.ncmds = 2 508 o.sec.size = #s
531 a.hdr.sizeofcmds = ffi.sizeof(a.seg)+ffi.sizeof(a.sec)+ffi.sizeof(a.sym) 509 o.sec.offset = ofs_bc
532 a.seg.cmd = is64 and 0x19 or 0x1 510 o.sym.cmd = 2
533 a.seg.cmdsize = ffi.sizeof(a.seg)+ffi.sizeof(a.sec) 511 o.sym.cmdsize = ffi.sizeof(o.sym)
534 a.seg.vmsize = #s 512 o.sym.symoff = ofs_sym
535 a.seg.fileoff = mach_size-ofs 513 o.sym.nsyms = 1
536 a.seg.filesize = #s 514 o.sym.stroff = ofs_sym + ffi.offsetof(t, "space")
537 a.seg.maxprot = 1 515 o.sym.strsize = aligned(#symname+2, 8)
538 a.seg.initprot = 1 516 t.sym_entry.type = 0xf
539 a.seg.nsects = 1 517 t.sym_entry.sect = 1
540 ffi.copy(a.sec.sectname, "__data") 518 t.sym_entry.strx = 1
541 ffi.copy(a.sec.segname, "__DATA") 519 ffi.copy(t.space+1, symname)
542 a.sec.size = #s
543 a.sec.offset = mach_size-ofs
544 a.sym.cmd = 2
545 a.sym.cmdsize = ffi.sizeof(a.sym)
546 a.sym.symoff = ffi.offsetof(o, "sym_entry")-ofs
547 a.sym.nsyms = 1
548 a.sym.stroff = ffi.offsetof(o, "sym_entry")+ffi.sizeof(o.sym_entry)-ofs
549 a.sym.strsize = aligned(#symname+2, align)
550 end
551 o.sym_entry.type = 0xf
552 o.sym_entry.sect = 1
553 o.sym_entry.strx = 1
554 ffi.copy(o.space+1, symname)
555 520
556 -- Write Mach-O object file. 521 -- Write Mach-O object file.
557 local fp = savefile(output, "wb") 522 local fp = savefile(output, "wb")
558 fp:write(ffi.string(o, mach_size)) 523 fp:write(ffi.string(o, ofs_bc))
559 bcsave_tail(fp, output, s) 524 fp:write(s, ("\0"):rep(sz_bc - #s))
525 bcsave_tail(fp, output, ffi.string(t, ffi.offsetof(t, "space") + o.sym.strsize))
560end 526end
561 527
562local function bcsave_obj(ctx, output, s) 528local function bcsave_obj(ctx, output, s)
563 local ok, ffi = pcall(require, "ffi") 529 local ok, ffi = pcall(require, "ffi")
564 check(ok, "FFI library required to write this file type") 530 check(ok, "FFI library required to write this file type")
531 if output == "-" and jit.os == "Windows" then
532 set_stdout_binary(ffi)
533 end
565 if ctx.os == "windows" then 534 if ctx.os == "windows" then
566 return bcsave_peobj(ctx, output, s, ffi) 535 return bcsave_peobj(ctx, output, s, ffi)
567 elseif ctx.os == "osx" then 536 elseif ctx.os == "osx" then
@@ -573,14 +542,14 @@ end
573 542
574------------------------------------------------------------------------------ 543------------------------------------------------------------------------------
575 544
576local function bclist(input, output) 545local function bclist(ctx, input, output)
577 local f = readfile(input) 546 local f = readfile(ctx, input)
578 require("jit.bc").dump(f, savefile(output, "w"), true) 547 require("jit.bc").dump(f, savefile(output, "w"), true)
579end 548end
580 549
581local function bcsave(ctx, input, output) 550local function bcsave(ctx, input, output)
582 local f = readfile(input) 551 local f = readfile(ctx, input)
583 local s = string.dump(f, ctx.strip) 552 local s = string.dump(f, ctx.mode)
584 local t = ctx.type 553 local t = ctx.type
585 if not t then 554 if not t then
586 t = detecttype(output) 555 t = detecttype(output)
@@ -603,35 +572,43 @@ local function docmd(...)
603 local n = 1 572 local n = 1
604 local list = false 573 local list = false
605 local ctx = { 574 local ctx = {
606 strip = true, arch = jit.arch, os = string.lower(jit.os), 575 mode = "bt", arch = jit.arch, os = jit.os:lower(),
607 type = false, modname = false, 576 type = false, modname = false, string = false,
608 } 577 }
578 local strip = "s"
579 local gc64 = ""
609 while n <= #arg do 580 while n <= #arg do
610 local a = arg[n] 581 local a = arg[n]
611 if type(a) == "string" and string.sub(a, 1, 1) == "-" and a ~= "-" then 582 if type(a) == "string" and a:sub(1, 1) == "-" and a ~= "-" then
612 table.remove(arg, n) 583 tremove(arg, n)
613 if a == "--" then break end 584 if a == "--" then break end
614 for m=2,#a do 585 for m=2,#a do
615 local opt = string.sub(a, m, m) 586 local opt = a:sub(m, m)
616 if opt == "l" then 587 if opt == "l" then
617 list = true 588 list = true
618 elseif opt == "s" then 589 elseif opt == "s" then
619 ctx.strip = true 590 strip = "s"
620 elseif opt == "g" then 591 elseif opt == "g" then
621 ctx.strip = false 592 strip = ""
593 elseif opt == "W" or opt == "X" then
594 gc64 = opt
595 elseif opt == "d" then
596 ctx.mode = ctx.mode .. opt
622 else 597 else
623 if arg[n] == nil or m ~= #a then usage() end 598 if arg[n] == nil or m ~= #a then usage() end
624 if opt == "e" then 599 if opt == "e" then
625 if n ~= 1 then usage() end 600 if n ~= 1 then usage() end
626 arg[1] = check(loadstring(arg[1])) 601 ctx.string = true
627 elseif opt == "n" then 602 elseif opt == "n" then
628 ctx.modname = checkmodname(table.remove(arg, n)) 603 ctx.modname = checkmodname(tremove(arg, n))
629 elseif opt == "t" then 604 elseif opt == "t" then
630 ctx.type = checkarg(table.remove(arg, n), map_type, "file type") 605 ctx.type = checkarg(tremove(arg, n), map_type, "file type")
631 elseif opt == "a" then 606 elseif opt == "a" then
632 ctx.arch = checkarg(table.remove(arg, n), map_arch, "architecture") 607 ctx.arch = checkarg(tremove(arg, n), map_arch, "architecture")
633 elseif opt == "o" then 608 elseif opt == "o" then
634 ctx.os = checkarg(table.remove(arg, n), map_os, "OS name") 609 ctx.os = checkarg(tremove(arg, n), map_os, "OS name")
610 elseif opt == "F" then
611 ctx.filename = "@"..tremove(arg, n)
635 else 612 else
636 usage() 613 usage()
637 end 614 end
@@ -641,9 +618,10 @@ local function docmd(...)
641 n = n + 1 618 n = n + 1
642 end 619 end
643 end 620 end
621 ctx.mode = ctx.mode .. strip .. gc64
644 if list then 622 if list then
645 if #arg == 0 or #arg > 2 then usage() end 623 if #arg == 0 or #arg > 2 then usage() end
646 bclist(arg[1], arg[2] or "-") 624 bclist(ctx, arg[1], arg[2] or "-")
647 else 625 else
648 if #arg ~= 2 then usage() end 626 if #arg ~= 2 then usage() end
649 bcsave(ctx, arg[1], arg[2]) 627 bcsave(ctx, arg[1], arg[2])
@@ -653,7 +631,7 @@ end
653------------------------------------------------------------------------------ 631------------------------------------------------------------------------------
654 632
655-- Public module functions. 633-- Public module functions.
656module(...) 634return {
657 635 start = docmd -- Process -b command line option.
658start = docmd -- Process -b command line option. 636}
659 637
diff --git a/src/jit/dis_arm.lua b/src/jit/dis_arm.lua
index ca8df958..0adc799d 100644
--- a/src/jit/dis_arm.lua
+++ b/src/jit/dis_arm.lua
@@ -658,7 +658,7 @@ local function disass_block(ctx, ofs, len)
658end 658end
659 659
660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 660-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
661local function create_(code, addr, out) 661local function create(code, addr, out)
662 local ctx = {} 662 local ctx = {}
663 ctx.code = code 663 ctx.code = code
664 ctx.addr = addr or 0 664 ctx.addr = addr or 0
@@ -670,20 +670,20 @@ local function create_(code, addr, out)
670end 670end
671 671
672-- Simple API: disassemble code (a string) at address and output via out. 672-- Simple API: disassemble code (a string) at address and output via out.
673local function disass_(code, addr, out) 673local function disass(code, addr, out)
674 create_(code, addr, out):disass() 674 create(code, addr, out):disass()
675end 675end
676 676
677-- Return register name for RID. 677-- Return register name for RID.
678local function regname_(r) 678local function regname(r)
679 if r < 16 then return map_gpr[r] end 679 if r < 16 then return map_gpr[r] end
680 return "d"..(r-16) 680 return "d"..(r-16)
681end 681end
682 682
683-- Public module functions. 683-- Public module functions.
684module(...) 684return {
685 685 create = create,
686create = create_ 686 disass = disass,
687disass = disass_ 687 regname = regname
688regname = regname_ 688}
689 689
diff --git a/src/jit/dis_arm64.lua b/src/jit/dis_arm64.lua
new file mode 100644
index 00000000..896fab79
--- /dev/null
+++ b/src/jit/dis_arm64.lua
@@ -0,0 +1,1233 @@
1----------------------------------------------------------------------------
2-- LuaJIT ARM64 disassembler module.
3--
4-- Copyright (C) 2005-2026 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6--
7-- Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
8-- Sponsored by Cisco Systems, Inc.
9----------------------------------------------------------------------------
10-- This is a helper module used by the LuaJIT machine code dumper module.
11--
12-- It disassembles most user-mode AArch64 instructions.
13-- NYI: Advanced SIMD and VFP instructions.
14------------------------------------------------------------------------------
15
16local type = type
17local sub, byte, format = string.sub, string.byte, string.format
18local match, gmatch, gsub = string.match, string.gmatch, string.gsub
19local concat = table.concat
20local bit = require("bit")
21local band, bor, bxor, tohex = bit.band, bit.bor, bit.bxor, bit.tohex
22local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
23local ror = bit.ror
24
25------------------------------------------------------------------------------
26-- Opcode maps
27------------------------------------------------------------------------------
28
29local map_adr = { -- PC-relative addressing.
30 shift = 31, mask = 1,
31 [0] = "adrDBx", "adrpDBx"
32}
33
34local map_addsubi = { -- Add/subtract immediate.
35 shift = 29, mask = 3,
36 [0] = "add|movDNIg", "adds|cmnD0NIg", "subDNIg", "subs|cmpD0NIg",
37}
38
39local map_logi = { -- Logical immediate.
40 shift = 31, mask = 1,
41 [0] = {
42 shift = 22, mask = 1,
43 [0] = {
44 shift = 29, mask = 3,
45 [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
46 },
47 false -- unallocated
48 },
49 {
50 shift = 29, mask = 3,
51 [0] = "andDNig", "orr|movDN0ig", "eorDNig", "ands|tstD0Nig"
52 }
53}
54
55local map_movwi = { -- Move wide immediate.
56 shift = 31, mask = 1,
57 [0] = {
58 shift = 22, mask = 1,
59 [0] = {
60 shift = 29, mask = 3,
61 [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
62 }, false -- unallocated
63 },
64 {
65 shift = 29, mask = 3,
66 [0] = "movnDWRg", false, "movz|movDYRg", "movkDWRg"
67 },
68}
69
70local map_bitf = { -- Bitfield.
71 shift = 31, mask = 1,
72 [0] = {
73 shift = 22, mask = 1,
74 [0] = {
75 shift = 29, mask = 3,
76 [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12w",
77 "bfm|bfi|bfxilDN13w",
78 "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12w"
79 }
80 },
81 {
82 shift = 22, mask = 1,
83 {
84 shift = 29, mask = 3,
85 [0] = "sbfm|sbfiz|sbfx|asr|sxtw|sxth|sxtbDN12x",
86 "bfm|bfi|bfxilDN13x",
87 "ubfm|ubfiz|ubfx|lsr|lsl|uxth|uxtbDN12x"
88 }
89 }
90}
91
92local map_datai = { -- Data processing - immediate.
93 shift = 23, mask = 7,
94 [0] = map_adr, map_adr, map_addsubi, false,
95 map_logi, map_movwi, map_bitf,
96 {
97 shift = 15, mask = 0x1c0c1,
98 [0] = "extr|rorDNM4w", [0x10080] = "extr|rorDNM4x",
99 [0x10081] = "extr|rorDNM4x"
100 }
101}
102
103local map_logsr = { -- Logical, shifted register.
104 shift = 31, mask = 1,
105 [0] = {
106 shift = 15, mask = 1,
107 [0] = {
108 shift = 29, mask = 3,
109 [0] = {
110 shift = 21, mask = 1,
111 [0] = "andDNMSg", "bicDNMSg"
112 },
113 {
114 shift = 21, mask = 1,
115 [0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
116 },
117 {
118 shift = 21, mask = 1,
119 [0] = "eorDNMSg", "eonDNMSg"
120 },
121 {
122 shift = 21, mask = 1,
123 [0] = "ands|tstD0NMSg", "bicsDNMSg"
124 }
125 },
126 false -- unallocated
127 },
128 {
129 shift = 29, mask = 3,
130 [0] = {
131 shift = 21, mask = 1,
132 [0] = "andDNMSg", "bicDNMSg"
133 },
134 {
135 shift = 21, mask = 1,
136 [0] = "orr|movDN0MSg", "orn|mvnDN0MSg"
137 },
138 {
139 shift = 21, mask = 1,
140 [0] = "eorDNMSg", "eonDNMSg"
141 },
142 {
143 shift = 21, mask = 1,
144 [0] = "ands|tstD0NMSg", "bicsDNMSg"
145 }
146 }
147}
148
149local map_assh = {
150 shift = 31, mask = 1,
151 [0] = {
152 shift = 15, mask = 1,
153 [0] = {
154 shift = 29, mask = 3,
155 [0] = {
156 shift = 22, mask = 3,
157 [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
158 },
159 {
160 shift = 22, mask = 3,
161 [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg",
162 "adds|cmnD0NMSg", "adds|cmnD0NMg"
163 },
164 {
165 shift = 22, mask = 3,
166 [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
167 },
168 {
169 shift = 22, mask = 3,
170 [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
171 "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
172 },
173 },
174 false -- unallocated
175 },
176 {
177 shift = 29, mask = 3,
178 [0] = {
179 shift = 22, mask = 3,
180 [0] = "addDNMSg", "addDNMSg", "addDNMSg", "addDNMg"
181 },
182 {
183 shift = 22, mask = 3,
184 [0] = "adds|cmnD0NMSg", "adds|cmnD0NMSg", "adds|cmnD0NMSg",
185 "adds|cmnD0NMg"
186 },
187 {
188 shift = 22, mask = 3,
189 [0] = "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0MSg", "sub|negDN0Mg"
190 },
191 {
192 shift = 22, mask = 3,
193 [0] = "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0MzSg",
194 "subs|cmp|negsD0N0MzSg", "subs|cmp|negsD0N0Mzg"
195 }
196 }
197}
198
199local map_addsubsh = { -- Add/subtract, shifted register.
200 shift = 22, mask = 3,
201 [0] = map_assh, map_assh, map_assh
202}
203
204local map_addsubex = { -- Add/subtract, extended register.
205 shift = 22, mask = 3,
206 [0] = {
207 shift = 29, mask = 3,
208 [0] = "addDNMXg", "adds|cmnD0NMXg", "subDNMXg", "subs|cmpD0NMzXg",
209 }
210}
211
212local map_addsubc = { -- Add/subtract, with carry.
213 shift = 10, mask = 63,
214 [0] = {
215 shift = 29, mask = 3,
216 [0] = "adcDNMg", "adcsDNMg", "sbc|ngcDN0Mg", "sbcs|ngcsDN0Mg",
217 }
218}
219
220local map_ccomp = {
221 shift = 4, mask = 1,
222 [0] = {
223 shift = 10, mask = 3,
224 [0] = { -- Conditional compare register.
225 shift = 29, mask = 3,
226 "ccmnNMVCg", false, "ccmpNMVCg",
227 },
228 [2] = { -- Conditional compare immediate.
229 shift = 29, mask = 3,
230 "ccmnN5VCg", false, "ccmpN5VCg",
231 }
232 }
233}
234
235local map_csel = { -- Conditional select.
236 shift = 11, mask = 1,
237 [0] = {
238 shift = 10, mask = 1,
239 [0] = {
240 shift = 29, mask = 3,
241 [0] = "cselDNMzCg", false, "csinv|cinv|csetmDNMcg", false,
242 },
243 {
244 shift = 29, mask = 3,
245 [0] = "csinc|cinc|csetDNMcg", false, "csneg|cnegDNMcg", false,
246 }
247 }
248}
249
250local map_data1s = { -- Data processing, 1 source.
251 shift = 29, mask = 1,
252 [0] = {
253 shift = 31, mask = 1,
254 [0] = {
255 shift = 10, mask = 0x7ff,
256 [0] = "rbitDNg", "rev16DNg", "revDNw", false, "clzDNg", "clsDNg"
257 },
258 {
259 shift = 10, mask = 0x7ff,
260 [0] = "rbitDNg", "rev16DNg", "rev32DNx", "revDNx", "clzDNg", "clsDNg"
261 }
262 }
263}
264
265local map_data2s = { -- Data processing, 2 sources.
266 shift = 29, mask = 1,
267 [0] = {
268 shift = 10, mask = 63,
269 false, "udivDNMg", "sdivDNMg", false, false, false, false, "lslDNMg",
270 "lsrDNMg", "asrDNMg", "rorDNMg"
271 }
272}
273
274local map_data3s = { -- Data processing, 3 sources.
275 shift = 29, mask = 7,
276 [0] = {
277 shift = 21, mask = 7,
278 [0] = {
279 shift = 15, mask = 1,
280 [0] = "madd|mulDNMA0g", "msub|mnegDNMA0g"
281 }
282 }, false, false, false,
283 {
284 shift = 15, mask = 1,
285 [0] = {
286 shift = 21, mask = 7,
287 [0] = "madd|mulDNMA0g", "smaddl|smullDxNMwA0x", "smulhDNMx", false,
288 false, "umaddl|umullDxNMwA0x", "umulhDNMx"
289 },
290 {
291 shift = 21, mask = 7,
292 [0] = "msub|mnegDNMA0g", "smsubl|smneglDxNMwA0x", false, false,
293 false, "umsubl|umneglDxNMwA0x"
294 }
295 }
296}
297
298local map_datar = { -- Data processing, register.
299 shift = 28, mask = 1,
300 [0] = {
301 shift = 24, mask = 1,
302 [0] = map_logsr,
303 {
304 shift = 21, mask = 1,
305 [0] = map_addsubsh, map_addsubex
306 }
307 },
308 {
309 shift = 21, mask = 15,
310 [0] = map_addsubc, false, map_ccomp, false, map_csel, false,
311 {
312 shift = 30, mask = 1,
313 [0] = map_data2s, map_data1s
314 },
315 false, map_data3s, map_data3s, map_data3s, map_data3s, map_data3s,
316 map_data3s, map_data3s, map_data3s
317 }
318}
319
320local map_lrl = { -- Load register, literal.
321 shift = 26, mask = 1,
322 [0] = {
323 shift = 30, mask = 3,
324 [0] = "ldrDwB", "ldrDxB", "ldrswDxB"
325 },
326 {
327 shift = 30, mask = 3,
328 [0] = "ldrDsB", "ldrDdB"
329 }
330}
331
332local map_lsriind = { -- Load/store register, immediate pre/post-indexed.
333 shift = 30, mask = 3,
334 [0] = {
335 shift = 26, mask = 1,
336 [0] = {
337 shift = 22, mask = 3,
338 [0] = "strbDwzL", "ldrbDwzL", "ldrsbDxzL", "ldrsbDwzL"
339 }
340 },
341 {
342 shift = 26, mask = 1,
343 [0] = {
344 shift = 22, mask = 3,
345 [0] = "strhDwzL", "ldrhDwzL", "ldrshDxzL", "ldrshDwzL"
346 }
347 },
348 {
349 shift = 26, mask = 1,
350 [0] = {
351 shift = 22, mask = 3,
352 [0] = "strDwzL", "ldrDwzL", "ldrswDxzL"
353 },
354 {
355 shift = 22, mask = 3,
356 [0] = "strDszL", "ldrDszL"
357 }
358 },
359 {
360 shift = 26, mask = 1,
361 [0] = {
362 shift = 22, mask = 3,
363 [0] = "strDxzL", "ldrDxzL"
364 },
365 {
366 shift = 22, mask = 3,
367 [0] = "strDdzL", "ldrDdzL"
368 }
369 }
370}
371
372local map_lsriro = {
373 shift = 21, mask = 1,
374 [0] = { -- Load/store register immediate.
375 shift = 10, mask = 3,
376 [0] = { -- Unscaled immediate.
377 shift = 26, mask = 1,
378 [0] = {
379 shift = 30, mask = 3,
380 [0] = {
381 shift = 22, mask = 3,
382 [0] = "sturbDwK", "ldurbDwK"
383 },
384 {
385 shift = 22, mask = 3,
386 [0] = "sturhDwK", "ldurhDwK"
387 },
388 {
389 shift = 22, mask = 3,
390 [0] = "sturDwK", "ldurDwK"
391 },
392 {
393 shift = 22, mask = 3,
394 [0] = "sturDxK", "ldurDxK"
395 }
396 }
397 }, map_lsriind, false, map_lsriind
398 },
399 { -- Load/store register, register offset.
400 shift = 10, mask = 3,
401 [2] = {
402 shift = 26, mask = 1,
403 [0] = {
404 shift = 30, mask = 3,
405 [0] = {
406 shift = 22, mask = 3,
407 [0] = "strbDwO", "ldrbDwO", "ldrsbDxO", "ldrsbDwO"
408 },
409 {
410 shift = 22, mask = 3,
411 [0] = "strhDwO", "ldrhDwO", "ldrshDxO", "ldrshDwO"
412 },
413 {
414 shift = 22, mask = 3,
415 [0] = "strDwO", "ldrDwO", "ldrswDxO"
416 },
417 {
418 shift = 22, mask = 3,
419 [0] = "strDxO", "ldrDxO"
420 }
421 },
422 {
423 shift = 30, mask = 3,
424 [2] = {
425 shift = 22, mask = 3,
426 [0] = "strDsO", "ldrDsO"
427 },
428 [3] = {
429 shift = 22, mask = 3,
430 [0] = "strDdO", "ldrDdO"
431 }
432 }
433 }
434 }
435}
436
437local map_lsp = { -- Load/store register pair, offset.
438 shift = 22, mask = 1,
439 [0] = {
440 shift = 30, mask = 3,
441 [0] = {
442 shift = 26, mask = 1,
443 [0] = "stpDzAzwP", "stpDzAzsP",
444 },
445 {
446 shift = 26, mask = 1,
447 "stpDzAzdP"
448 },
449 {
450 shift = 26, mask = 1,
451 [0] = "stpDzAzxP"
452 }
453 },
454 {
455 shift = 30, mask = 3,
456 [0] = {
457 shift = 26, mask = 1,
458 [0] = "ldpDzAzwP", "ldpDzAzsP",
459 },
460 {
461 shift = 26, mask = 1,
462 [0] = "ldpswDAxP", "ldpDzAzdP"
463 },
464 {
465 shift = 26, mask = 1,
466 [0] = "ldpDzAzxP"
467 }
468 }
469}
470
471local map_ls = { -- Loads and stores.
472 shift = 24, mask = 0x31,
473 [0x10] = map_lrl, [0x30] = map_lsriro,
474 [0x20] = {
475 shift = 23, mask = 3,
476 map_lsp, map_lsp, map_lsp
477 },
478 [0x21] = {
479 shift = 23, mask = 3,
480 map_lsp, map_lsp, map_lsp
481 },
482 [0x31] = {
483 shift = 26, mask = 1,
484 [0] = {
485 shift = 30, mask = 3,
486 [0] = {
487 shift = 22, mask = 3,
488 [0] = "strbDwzU", "ldrbDwzU", "ldrsbDwzU", "ldrsbDxzU"
489 },
490 {
491 shift = 22, mask = 3,
492 [0] = "strhDwzU", "ldrhDwzU", "ldrshDwzU", "ldrshDxzU"
493 },
494 {
495 shift = 22, mask = 3,
496 [0] = "strDwzU", "ldrDwzU", "ldrswDxzU"
497 },
498 {
499 shift = 22, mask = 3,
500 [0] = "strDxzU", "ldrDxzU"
501 }
502 },
503 {
504 shift = 30, mask = 3,
505 [2] = {
506 shift = 22, mask = 3,
507 [0] = "strDszU", "ldrDszU"
508 },
509 [3] = {
510 shift = 22, mask = 3,
511 [0] = "strDdzU", "ldrDdzU"
512 }
513 }
514 },
515}
516
517local map_datafp = { -- Data processing, SIMD and FP.
518 shift = 28, mask = 7,
519 { -- 001
520 shift = 24, mask = 1,
521 [0] = {
522 shift = 21, mask = 1,
523 {
524 shift = 10, mask = 3,
525 [0] = {
526 shift = 12, mask = 1,
527 [0] = {
528 shift = 13, mask = 1,
529 [0] = {
530 shift = 14, mask = 1,
531 [0] = {
532 shift = 15, mask = 1,
533 [0] = { -- FP/int conversion.
534 shift = 31, mask = 1,
535 [0] = {
536 shift = 16, mask = 0xff,
537 [0x20] = "fcvtnsDwNs", [0x21] = "fcvtnuDwNs",
538 [0x22] = "scvtfDsNw", [0x23] = "ucvtfDsNw",
539 [0x24] = "fcvtasDwNs", [0x25] = "fcvtauDwNs",
540 [0x26] = "fmovDwNs", [0x27] = "fmovDsNw",
541 [0x28] = "fcvtpsDwNs", [0x29] = "fcvtpuDwNs",
542 [0x30] = "fcvtmsDwNs", [0x31] = "fcvtmuDwNs",
543 [0x38] = "fcvtzsDwNs", [0x39] = "fcvtzuDwNs",
544 [0x60] = "fcvtnsDwNd", [0x61] = "fcvtnuDwNd",
545 [0x62] = "scvtfDdNw", [0x63] = "ucvtfDdNw",
546 [0x64] = "fcvtasDwNd", [0x65] = "fcvtauDwNd",
547 [0x68] = "fcvtpsDwNd", [0x69] = "fcvtpuDwNd",
548 [0x70] = "fcvtmsDwNd", [0x71] = "fcvtmuDwNd",
549 [0x78] = "fcvtzsDwNd", [0x79] = "fcvtzuDwNd"
550 },
551 {
552 shift = 16, mask = 0xff,
553 [0x20] = "fcvtnsDxNs", [0x21] = "fcvtnuDxNs",
554 [0x22] = "scvtfDsNx", [0x23] = "ucvtfDsNx",
555 [0x24] = "fcvtasDxNs", [0x25] = "fcvtauDxNs",
556 [0x28] = "fcvtpsDxNs", [0x29] = "fcvtpuDxNs",
557 [0x30] = "fcvtmsDxNs", [0x31] = "fcvtmuDxNs",
558 [0x38] = "fcvtzsDxNs", [0x39] = "fcvtzuDxNs",
559 [0x60] = "fcvtnsDxNd", [0x61] = "fcvtnuDxNd",
560 [0x62] = "scvtfDdNx", [0x63] = "ucvtfDdNx",
561 [0x64] = "fcvtasDxNd", [0x65] = "fcvtauDxNd",
562 [0x66] = "fmovDxNd", [0x67] = "fmovDdNx",
563 [0x68] = "fcvtpsDxNd", [0x69] = "fcvtpuDxNd",
564 [0x70] = "fcvtmsDxNd", [0x71] = "fcvtmuDxNd",
565 [0x78] = "fcvtzsDxNd", [0x79] = "fcvtzuDxNd"
566 }
567 }
568 },
569 { -- FP data-processing, 1 source.
570 shift = 31, mask = 1,
571 [0] = {
572 shift = 22, mask = 3,
573 [0] = {
574 shift = 15, mask = 63,
575 [0] = "fmovDNf", "fabsDNf", "fnegDNf",
576 "fsqrtDNf", false, "fcvtDdNs", false, false,
577 "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
578 "frintaDNf", false, "frintxDNf", "frintiDNf",
579 },
580 {
581 shift = 15, mask = 63,
582 [0] = "fmovDNf", "fabsDNf", "fnegDNf",
583 "fsqrtDNf", "fcvtDsNd", false, false, false,
584 "frintnDNf", "frintpDNf", "frintmDNf", "frintzDNf",
585 "frintaDNf", false, "frintxDNf", "frintiDNf",
586 }
587 }
588 }
589 },
590 { -- FP compare.
591 shift = 31, mask = 1,
592 [0] = {
593 shift = 14, mask = 3,
594 [0] = {
595 shift = 23, mask = 1,
596 [0] = {
597 shift = 0, mask = 31,
598 [0] = "fcmpNMf", [8] = "fcmpNZf",
599 [16] = "fcmpeNMf", [24] = "fcmpeNZf",
600 }
601 }
602 }
603 }
604 },
605 { -- FP immediate.
606 shift = 31, mask = 1,
607 [0] = {
608 shift = 5, mask = 31,
609 [0] = {
610 shift = 23, mask = 1,
611 [0] = "fmovDFf"
612 }
613 }
614 }
615 },
616 { -- FP conditional compare.
617 shift = 31, mask = 1,
618 [0] = {
619 shift = 23, mask = 1,
620 [0] = {
621 shift = 4, mask = 1,
622 [0] = "fccmpNMVCf", "fccmpeNMVCf"
623 }
624 }
625 },
626 { -- FP data-processing, 2 sources.
627 shift = 31, mask = 1,
628 [0] = {
629 shift = 23, mask = 1,
630 [0] = {
631 shift = 12, mask = 15,
632 [0] = "fmulDNMf", "fdivDNMf", "faddDNMf", "fsubDNMf",
633 "fmaxDNMf", "fminDNMf", "fmaxnmDNMf", "fminnmDNMf",
634 "fnmulDNMf"
635 }
636 }
637 },
638 { -- FP conditional select.
639 shift = 31, mask = 1,
640 [0] = {
641 shift = 23, mask = 1,
642 [0] = "fcselDNMCf"
643 }
644 }
645 }
646 },
647 { -- FP data-processing, 3 sources.
648 shift = 31, mask = 1,
649 [0] = {
650 shift = 15, mask = 1,
651 [0] = {
652 shift = 21, mask = 5,
653 [0] = "fmaddDNMAf", "fnmaddDNMAf"
654 },
655 {
656 shift = 21, mask = 5,
657 [0] = "fmsubDNMAf", "fnmsubDNMAf"
658 }
659 }
660 }
661 },
662 { -- 010
663 shift = 0, mask = 0x81f8fc00,
664 [0x100e400] = "moviDdG"
665 }
666}
667
668local map_br = { -- Branches, exception generating and system instructions.
669 shift = 29, mask = 7,
670 [0] = "bB",
671 { -- Compare & branch, immediate.
672 shift = 24, mask = 3,
673 [0] = "cbzDBg", "cbnzDBg", "tbzDTBw", "tbnzDTBw"
674 },
675 { -- Conditional branch, immediate.
676 shift = 24, mask = 3,
677 [0] = {
678 shift = 4, mask = 1,
679 [0] = {
680 shift = 0, mask = 15,
681 [0] = "beqB", "bneB", "bhsB", "bloB", "bmiB", "bplB", "bvsB", "bvcB",
682 "bhiB", "blsB", "bgeB", "bltB", "bgtB", "bleB", "balB"
683 }
684 }
685 }, false, "blB",
686 { -- Compare & branch, immediate.
687 shift = 24, mask = 3,
688 [0] = "cbzDBg", "cbnzDBg", "tbzDTBx", "tbnzDTBx"
689 },
690 {
691 shift = 24, mask = 3,
692 [0] = { -- Exception generation.
693 shift = 0, mask = 0xe0001f,
694 [0x200000] = "brkW"
695 },
696 { -- System instructions.
697 shift = 0, mask = 0x3fffff,
698 [0x03201f] = "nop",
699 [0x03245f] = "bti c",
700 [0x03249f] = "bti j",
701 [0x0324df] = "bti jc",
702 },
703 { -- Unconditional branch, register.
704 shift = 0, mask = 0xfffc1f,
705 [0x1f0000] = "brNx", [0x3f0000] = "blrNx",
706 [0x5f0000] = "retNx"
707 },
708 }
709}
710
711local map_init = {
712 shift = 25, mask = 15,
713 [0] = false, false, false, false, map_ls, map_datar, map_ls, map_datafp,
714 map_datai, map_datai, map_br, map_br, map_ls, map_datar, map_ls, map_datafp
715}
716
717------------------------------------------------------------------------------
718
719local map_regs = { x = {}, w = {}, d = {}, s = {} }
720
721for i=0,30 do
722 map_regs.x[i] = "x"..i
723 map_regs.w[i] = "w"..i
724 map_regs.d[i] = "d"..i
725 map_regs.s[i] = "s"..i
726end
727map_regs.x[31] = "sp"
728map_regs.w[31] = "wsp"
729map_regs.d[31] = "d31"
730map_regs.s[31] = "s31"
731
732local map_cond = {
733 [0] = "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
734 "hi", "ls", "ge", "lt", "gt", "le", "al",
735}
736
737local map_shift = { [0] = "lsl", "lsr", "asr", "ror"}
738
739local map_extend = {
740 [0] = "uxtb", "uxth", "uxtw", "uxtx", "sxtb", "sxth", "sxtw", "sxtx",
741}
742
743------------------------------------------------------------------------------
744
745-- Output a nicely formatted line with an opcode and operands.
746local function putop(ctx, text, operands)
747 local pos = ctx.pos
748 local extra = ""
749 if ctx.rel then
750 local sym = ctx.symtab[ctx.rel]
751 if sym then
752 extra = "\t->"..sym
753 end
754 end
755 if ctx.hexdump > 0 then
756 ctx.out(format("%08x %s %-5s %s%s\n",
757 ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra))
758 else
759 ctx.out(format("%08x %-5s %s%s\n",
760 ctx.addr+pos, text, concat(operands, ", "), extra))
761 end
762 ctx.pos = pos + 4
763end
764
765-- Fallback for unknown opcodes.
766local function unknown(ctx)
767 return putop(ctx, ".long", { "0x"..tohex(ctx.op) })
768end
769
770local function match_reg(p, pat, regnum)
771 return map_regs[match(pat, p.."%w-([xwds])")][regnum]
772end
773
774local function fmt_hex32(x)
775 if x < 0 then
776 return tohex(x)
777 else
778 return format("%x", x)
779 end
780end
781
782local imm13_rep = { 0x55555555, 0x11111111, 0x01010101, 0x00010001, 0x00000001 }
783
784local function decode_imm13(op)
785 local imms = band(rshift(op, 10), 63)
786 local immr = band(rshift(op, 16), 63)
787 if band(op, 0x00400000) == 0 then
788 local len = 5
789 if imms >= 56 then
790 if imms >= 60 then len = 1 else len = 2 end
791 elseif imms >= 48 then len = 3 elseif imms >= 32 then len = 4 end
792 local l = lshift(1, len)-1
793 local s = band(imms, l)
794 local r = band(immr, l)
795 local imm = ror(rshift(-1, 31-s), r)
796 if len ~= 5 then imm = band(imm, lshift(1, l)-1) + rshift(imm, 31-l) end
797 imm = imm * imm13_rep[len]
798 local ix = fmt_hex32(imm)
799 if rshift(op, 31) ~= 0 then
800 return ix..tohex(imm)
801 else
802 return ix
803 end
804 else
805 local lo, hi = -1, 0
806 if imms < 32 then lo = rshift(-1, 31-imms) else hi = rshift(-1, 63-imms) end
807 if immr ~= 0 then
808 lo, hi = ror(lo, immr), ror(hi, immr)
809 local x = immr == 32 and 0 or band(bxor(lo, hi), lshift(-1, 32-immr))
810 lo, hi = bxor(lo, x), bxor(hi, x)
811 if immr >= 32 then lo, hi = hi, lo end
812 end
813 if hi ~= 0 then
814 return fmt_hex32(hi)..tohex(lo)
815 else
816 return fmt_hex32(lo)
817 end
818 end
819end
820
821local function parse_immpc(op, name)
822 if name == "b" or name == "bl" then
823 return arshift(lshift(op, 6), 4)
824 elseif name == "adr" or name == "adrp" then
825 local immlo = band(rshift(op, 29), 3)
826 local immhi = lshift(arshift(lshift(op, 8), 13), 2)
827 return bor(immhi, immlo)
828 elseif name == "tbz" or name == "tbnz" then
829 return lshift(arshift(lshift(op, 13), 18), 2)
830 else
831 return lshift(arshift(lshift(op, 8), 13), 2)
832 end
833end
834
835local function parse_fpimm8(op)
836 local sign = band(op, 0x100000) == 0 and 1 or -1
837 local exp = bxor(rshift(arshift(lshift(op, 12), 5), 24), 0x80) - 131
838 local frac = 16+band(rshift(op, 13), 15)
839 return sign * frac * 2^exp
840end
841
842local function decode_fpmovi(op)
843 local lo = rshift(op, 5)
844 local hi = rshift(op, 9)
845 lo = bor(band(lo, 1) * 0xff, band(lo, 2) * 0x7f80, band(lo, 4) * 0x3fc000,
846 band(lo, 8) * 0x1fe00000)
847 hi = bor(band(hi, 1) * 0xff, band(hi, 0x80) * 0x1fe,
848 band(hi, 0x100) * 0xff00, band(hi, 0x200) * 0x7f8000)
849 if hi ~= 0 then
850 return fmt_hex32(hi)..tohex(lo)
851 else
852 return fmt_hex32(lo)
853 end
854end
855
856local function prefer_bfx(sf, uns, imms, immr)
857 if imms < immr or imms == 31 or imms == 63 then
858 return false
859 end
860 if immr == 0 then
861 if sf == 0 and (imms == 7 or imms == 15) then
862 return false
863 end
864 if sf ~= 0 and uns == 0 and (imms == 7 or imms == 15 or imms == 31) then
865 return false
866 end
867 end
868 return true
869end
870
871-- Disassemble a single instruction.
872local function disass_ins(ctx)
873 local pos = ctx.pos
874 local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4)
875 local op = bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0)
876 local operands = {}
877 local suffix = ""
878 local last, name, pat
879 local map_reg
880 ctx.op = op
881 ctx.rel = nil
882 last = nil
883 local opat
884 opat = map_init[band(rshift(op, 25), 15)]
885 while type(opat) ~= "string" do
886 if not opat then return unknown(ctx) end
887 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
888 end
889 name, pat = match(opat, "^([a-z0-9]*)(.*)")
890 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
891 if altname then pat = pat2 end
892 if sub(pat, 1, 1) == "." then
893 local s2, p2 = match(pat, "^([a-z0-9.]*)(.*)")
894 suffix = suffix..s2
895 pat = p2
896 end
897
898 local rt = match(pat, "[gf]")
899 if rt then
900 if rt == "g" then
901 map_reg = band(op, 0x80000000) ~= 0 and map_regs.x or map_regs.w
902 else
903 map_reg = band(op, 0x400000) ~= 0 and map_regs.d or map_regs.s
904 end
905 end
906
907 local second0, immr
908
909 for p in gmatch(pat, ".") do
910 local x = nil
911 if p == "D" then
912 local regnum = band(op, 31)
913 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
914 elseif p == "N" then
915 local regnum = band(rshift(op, 5), 31)
916 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
917 elseif p == "M" then
918 local regnum = band(rshift(op, 16), 31)
919 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
920 elseif p == "A" then
921 local regnum = band(rshift(op, 10), 31)
922 x = rt and map_reg[regnum] or match_reg(p, pat, regnum)
923 elseif p == "B" then
924 local addr = ctx.addr + pos + parse_immpc(op, name)
925 ctx.rel = addr
926 x = format("0x%08x", addr)
927 elseif p == "T" then
928 x = bor(band(rshift(op, 26), 32), band(rshift(op, 19), 31))
929 elseif p == "V" then
930 x = band(op, 15)
931 elseif p == "C" then
932 x = map_cond[band(rshift(op, 12), 15)]
933 elseif p == "c" then
934 local rn = band(rshift(op, 5), 31)
935 local rm = band(rshift(op, 16), 31)
936 local cond = band(rshift(op, 12), 15)
937 local invc = bxor(cond, 1)
938 x = map_cond[cond]
939 if altname and cond ~= 14 and cond ~= 15 then
940 local a1, a2 = match(altname, "([^|]*)|(.*)")
941 if rn == rm then
942 local n = #operands
943 operands[n] = nil
944 x = map_cond[invc]
945 if rn ~= 31 then
946 if a1 then name = a1 else name = altname end
947 else
948 operands[n-1] = nil
949 name = a2
950 end
951 end
952 end
953 elseif p == "W" then
954 x = band(rshift(op, 5), 0xffff)
955 elseif p == "Y" then
956 x = band(rshift(op, 5), 0xffff)
957 local hw = band(rshift(op, 21), 3)
958 if altname and (hw == 0 or x ~= 0) then
959 name = altname
960 end
961 elseif p == "L" then
962 local rn = map_regs.x[band(rshift(op, 5), 31)]
963 local imm9 = arshift(lshift(op, 11), 23)
964 if band(op, 0x800) ~= 0 then
965 x = "["..rn..", #"..imm9.."]!"
966 else
967 x = "["..rn.."], #"..imm9
968 end
969 elseif p == "U" then
970 local rn = map_regs.x[band(rshift(op, 5), 31)]
971 local sz = band(rshift(op, 30), 3)
972 local imm12 = lshift(rshift(lshift(op, 10), 20), sz)
973 if imm12 ~= 0 then
974 x = "["..rn..", #"..imm12.."]"
975 else
976 x = "["..rn.."]"
977 end
978 elseif p == "K" then
979 local rn = map_regs.x[band(rshift(op, 5), 31)]
980 local imm9 = arshift(lshift(op, 11), 23)
981 if imm9 ~= 0 then
982 x = "["..rn..", #"..imm9.."]"
983 else
984 x = "["..rn.."]"
985 end
986 elseif p == "O" then
987 local rn, rm = map_regs.x[band(rshift(op, 5), 31)]
988 local m = band(rshift(op, 13), 1)
989 if m == 0 then
990 rm = map_regs.w[band(rshift(op, 16), 31)]
991 else
992 rm = map_regs.x[band(rshift(op, 16), 31)]
993 end
994 x = "["..rn..", "..rm
995 local opt = band(rshift(op, 13), 7)
996 local s = band(rshift(op, 12), 1)
997 local sz = band(rshift(op, 30), 3)
998 -- extension to be applied
999 if opt == 3 then
1000 if s == 0 then x = x.."]"
1001 else x = x..", lsl #"..sz.."]" end
1002 elseif opt == 2 or opt == 6 or opt == 7 then
1003 if s == 0 then x = x..", "..map_extend[opt].."]"
1004 else x = x..", "..map_extend[opt].." #"..sz.."]" end
1005 else
1006 x = x.."]"
1007 end
1008 elseif p == "P" then
1009 local sh = 2 + rshift(op, 31 - band(rshift(op, 26), 1))
1010 local imm7 = lshift(arshift(lshift(op, 10), 25), sh)
1011 local rn = map_regs.x[band(rshift(op, 5), 31)]
1012 local ind = band(rshift(op, 23), 3)
1013 if ind == 1 then
1014 x = "["..rn.."], #"..imm7
1015 elseif ind == 2 then
1016 if imm7 == 0 then
1017 x = "["..rn.."]"
1018 else
1019 x = "["..rn..", #"..imm7.."]"
1020 end
1021 elseif ind == 3 then
1022 x = "["..rn..", #"..imm7.."]!"
1023 end
1024 elseif p == "I" then
1025 local shf = band(rshift(op, 22), 3)
1026 local imm12 = band(rshift(op, 10), 0x0fff)
1027 local rn, rd = band(rshift(op, 5), 31), band(op, 31)
1028 if altname == "mov" and shf == 0 and imm12 == 0 and (rn == 31 or rd == 31) then
1029 name = altname
1030 x = nil
1031 elseif shf == 0 then
1032 x = imm12
1033 elseif shf == 1 then
1034 x = imm12..", lsl #12"
1035 end
1036 elseif p == "i" then
1037 x = "#0x"..decode_imm13(op)
1038 elseif p == "1" then
1039 immr = band(rshift(op, 16), 63)
1040 x = immr
1041 elseif p == "2" then
1042 x = band(rshift(op, 10), 63)
1043 if altname then
1044 local a1, a2, a3, a4, a5, a6 =
1045 match(altname, "([^|]*)|([^|]*)|([^|]*)|([^|]*)|([^|]*)|(.*)")
1046 local sf = band(rshift(op, 26), 32)
1047 local uns = band(rshift(op, 30), 1)
1048 if prefer_bfx(sf, uns, x, immr) then
1049 name = a2
1050 x = x - immr + 1
1051 elseif immr == 0 and x == 7 then
1052 local n = #operands
1053 operands[n] = nil
1054 if sf ~= 0 then
1055 operands[n-1] = gsub(operands[n-1], "x", "w")
1056 end
1057 last = operands[n-1]
1058 name = a6
1059 x = nil
1060 elseif immr == 0 and x == 15 then
1061 local n = #operands
1062 operands[n] = nil
1063 if sf ~= 0 then
1064 operands[n-1] = gsub(operands[n-1], "x", "w")
1065 end
1066 last = operands[n-1]
1067 name = a5
1068 x = nil
1069 elseif x == 31 or x == 63 then
1070 if x == 31 and immr == 0 and name == "sbfm" then
1071 name = a4
1072 local n = #operands
1073 operands[n] = nil
1074 if sf ~= 0 then
1075 operands[n-1] = gsub(operands[n-1], "x", "w")
1076 end
1077 last = operands[n-1]
1078 else
1079 name = a3
1080 end
1081 x = nil
1082 elseif band(x, 31) ~= 31 and immr == x+1 and name == "ubfm" then
1083 name = a4
1084 last = "#"..(sf+32 - immr)
1085 operands[#operands] = last
1086 x = nil
1087 elseif x < immr then
1088 name = a1
1089 last = "#"..(sf+32 - immr)
1090 operands[#operands] = last
1091 x = x + 1
1092 end
1093 end
1094 elseif p == "3" then
1095 x = band(rshift(op, 10), 63)
1096 if altname then
1097 local a1, a2 = match(altname, "([^|]*)|(.*)")
1098 if x < immr then
1099 name = a1
1100 local sf = band(rshift(op, 26), 32)
1101 last = "#"..(sf+32 - immr)
1102 operands[#operands] = last
1103 x = x + 1
1104 else
1105 name = a2
1106 x = x - immr + 1
1107 end
1108 end
1109 elseif p == "4" then
1110 x = band(rshift(op, 10), 63)
1111 local rn = band(rshift(op, 5), 31)
1112 local rm = band(rshift(op, 16), 31)
1113 if altname and rn == rm then
1114 local n = #operands
1115 operands[n] = nil
1116 last = operands[n-1]
1117 name = altname
1118 end
1119 elseif p == "5" then
1120 x = band(rshift(op, 16), 31)
1121 elseif p == "S" then
1122 x = band(rshift(op, 10), 63)
1123 if x == 0 then x = nil
1124 else x = map_shift[band(rshift(op, 22), 3)].." #"..x end
1125 elseif p == "X" then
1126 local opt = band(rshift(op, 13), 7)
1127 -- Width specifier <R>.
1128 if opt ~= 3 and opt ~= 7 then
1129 last = map_regs.w[band(rshift(op, 16), 31)]
1130 operands[#operands] = last
1131 end
1132 x = band(rshift(op, 10), 7)
1133 -- Extension.
1134 if opt == 2 + band(rshift(op, 31), 1) and
1135 band(rshift(op, second0 and 5 or 0), 31) == 31 then
1136 if x == 0 then x = nil
1137 else x = "lsl #"..x end
1138 else
1139 if x == 0 then x = map_extend[band(rshift(op, 13), 7)]
1140 else x = map_extend[band(rshift(op, 13), 7)].." #"..x end
1141 end
1142 elseif p == "R" then
1143 x = band(rshift(op,21), 3)
1144 if x == 0 then x = nil
1145 else x = "lsl #"..x*16 end
1146 elseif p == "z" then
1147 local n = #operands
1148 if operands[n] == "sp" then operands[n] = "xzr"
1149 elseif operands[n] == "wsp" then operands[n] = "wzr"
1150 end
1151 elseif p == "Z" then
1152 x = 0
1153 elseif p == "F" then
1154 x = parse_fpimm8(op)
1155 elseif p == "G" then
1156 x = "#0x"..decode_fpmovi(op)
1157 elseif p == "g" or p == "f" or p == "x" or p == "w" or
1158 p == "d" or p == "s" then
1159 -- These are handled in D/N/M/A.
1160 elseif p == "0" then
1161 if last == "sp" or last == "wsp" then
1162 local n = #operands
1163 operands[n] = nil
1164 last = operands[n-1]
1165 if altname then
1166 local a1, a2 = match(altname, "([^|]*)|(.*)")
1167 if not a1 then
1168 name = altname
1169 elseif second0 then
1170 name, altname = a2, a1
1171 else
1172 name, altname = a1, a2
1173 end
1174 end
1175 end
1176 second0 = true
1177 elseif p == " " then
1178 operands[#operands+1] = pat:match(" (.*)")
1179 break
1180 else
1181 assert(false)
1182 end
1183 if x then
1184 last = x
1185 if type(x) == "number" then x = "#"..x end
1186 operands[#operands+1] = x
1187 end
1188 end
1189
1190 return putop(ctx, name..suffix, operands)
1191end
1192
1193------------------------------------------------------------------------------
1194
1195-- Disassemble a block of code.
1196local function disass_block(ctx, ofs, len)
1197 if not ofs then ofs = 0 end
1198 local stop = len and ofs+len or #ctx.code
1199 ctx.pos = ofs
1200 ctx.rel = nil
1201 while ctx.pos < stop do disass_ins(ctx) end
1202end
1203
1204-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
1205local function create(code, addr, out)
1206 local ctx = {}
1207 ctx.code = code
1208 ctx.addr = addr or 0
1209 ctx.out = out or io.write
1210 ctx.symtab = {}
1211 ctx.disass = disass_block
1212 ctx.hexdump = 8
1213 return ctx
1214end
1215
1216-- Simple API: disassemble code (a string) at address and output via out.
1217local function disass(code, addr, out)
1218 create(code, addr, out):disass()
1219end
1220
1221-- Return register name for RID.
1222local function regname(r)
1223 if r < 32 then return map_regs.x[r] end
1224 return map_regs.d[r-32]
1225end
1226
1227-- Public module functions.
1228return {
1229 create = create,
1230 disass = disass,
1231 regname = regname
1232}
1233
diff --git a/src/jit/dis_arm64be.lua b/src/jit/dis_arm64be.lua
new file mode 100644
index 00000000..e62a28cb
--- /dev/null
+++ b/src/jit/dis_arm64be.lua
@@ -0,0 +1,12 @@
1----------------------------------------------------------------------------
2-- LuaJIT ARM64BE disassembler wrapper module.
3--
4-- Copyright (C) 2005-2026 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- ARM64 instructions are always little-endian. So just forward to the
8-- common ARM64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11return require((string.match(..., ".*%.") or "").."dis_arm64")
12
diff --git a/src/jit/dis_mips.lua b/src/jit/dis_mips.lua
index ad3dbe4d..fece8937 100644
--- a/src/jit/dis_mips.lua
+++ b/src/jit/dis_mips.lua
@@ -19,13 +19,34 @@ local band, bor, tohex = bit.band, bit.bor, bit.tohex
19local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift 19local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift
20 20
21------------------------------------------------------------------------------ 21------------------------------------------------------------------------------
22-- Primary and extended opcode maps 22-- Extended opcode maps common to all MIPS releases
23------------------------------------------------------------------------------ 23------------------------------------------------------------------------------
24 24
25local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
26local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", } 25local map_srl = { shift = 21, mask = 1, [0] = "srlDTA", "rotrDTA", }
27local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", } 26local map_srlv = { shift = 6, mask = 1, [0] = "srlvDTS", "rotrvDTS", }
28 27
28local map_cop0 = {
29 shift = 25, mask = 1,
30 [0] = {
31 shift = 21, mask = 15,
32 [0] = "mfc0TDW", [4] = "mtc0TDW",
33 [10] = "rdpgprDT",
34 [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
35 [14] = "wrpgprDT",
36 }, {
37 shift = 0, mask = 63,
38 [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
39 [24] = "eret", [31] = "deret",
40 [32] = "wait",
41 },
42}
43
44------------------------------------------------------------------------------
45-- Primary and extended opcode maps for MIPS R1-R5
46------------------------------------------------------------------------------
47
48local map_movci = { shift = 16, mask = 1, [0] = "movfDSC", "movtDSC", }
49
29local map_special = { 50local map_special = {
30 shift = 0, mask = 63, 51 shift = 0, mask = 63,
31 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" }, 52 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
@@ -34,15 +55,17 @@ local map_special = {
34 "jrS", "jalrD1S", "movzDST", "movnDST", 55 "jrS", "jalrD1S", "movzDST", "movnDST",
35 "syscallY", "breakY", false, "sync", 56 "syscallY", "breakY", false, "sync",
36 "mfhiD", "mthiS", "mfloD", "mtloS", 57 "mfhiD", "mthiS", "mfloD", "mtloS",
37 false, false, false, false, 58 "dsllvDST", false, "dsrlvDST", "dsravDST",
38 "multST", "multuST", "divST", "divuST", 59 "multST", "multuST", "divST", "divuST",
39 false, false, false, false, 60 "dmultST", "dmultuST", "ddivST", "ddivuST",
40 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T", 61 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
41 "andDST", "orDST", "xorDST", "nor|notDST0", 62 "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
42 false, false, "sltDST", "sltuDST", 63 false, false, "sltDST", "sltuDST",
43 false, false, false, false, 64 "daddDST", "dadduDST", "dsubDST", "dsubuDST",
44 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ", 65 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
45 "teqSTZ", false, "tneSTZ", 66 "teqSTZ", false, "tneSTZ", false,
67 "dsllDTA", false, "dsrlDTA", "dsraDTA",
68 "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
46} 69}
47 70
48local map_special2 = { 71local map_special2 = {
@@ -60,11 +83,17 @@ local map_bshfl = {
60 [24] = "sehDT", 83 [24] = "sehDT",
61} 84}
62 85
86local map_dbshfl = {
87 shift = 6, mask = 31,
88 [2] = "dsbhDT",
89 [5] = "dshdDT",
90}
91
63local map_special3 = { 92local map_special3 = {
64 shift = 0, mask = 63, 93 shift = 0, mask = 63,
65 [0] = "extTSAK", [4] = "insTSAL", 94 [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK",
66 [32] = map_bshfl, 95 [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL",
67 [59] = "rdhwrTD", 96 [32] = map_bshfl, [36] = map_dbshfl, [59] = "rdhwrTD",
68} 97}
69 98
70local map_regimm = { 99local map_regimm = {
@@ -79,22 +108,6 @@ local map_regimm = {
79 false, false, false, "synciSO", 108 false, false, false, "synciSO",
80} 109}
81 110
82local map_cop0 = {
83 shift = 25, mask = 1,
84 [0] = {
85 shift = 21, mask = 15,
86 [0] = "mfc0TDW", [4] = "mtc0TDW",
87 [10] = "rdpgprDT",
88 [11] = { shift = 5, mask = 1, [0] = "diT0", "eiT0", },
89 [14] = "wrpgprDT",
90 }, {
91 shift = 0, mask = 63,
92 [1] = "tlbr", [2] = "tlbwi", [6] = "tlbwr", [8] = "tlbp",
93 [24] = "eret", [31] = "deret",
94 [32] = "wait",
95 },
96}
97
98local map_cop1s = { 111local map_cop1s = {
99 shift = 0, mask = 63, 112 shift = 0, mask = 63,
100 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH", 113 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
@@ -178,8 +191,8 @@ local map_cop1bc = {
178 191
179local map_cop1 = { 192local map_cop1 = {
180 shift = 21, mask = 31, 193 shift = 21, mask = 31,
181 [0] = "mfc1TG", false, "cfc1TG", "mfhc1TG", 194 [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG",
182 "mtc1TG", false, "ctc1TG", "mthc1TG", 195 "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
183 map_cop1bc, false, false, false, 196 map_cop1bc, false, false, false,
184 false, false, false, false, 197 false, false, false, false,
185 map_cop1s, map_cop1d, false, false, 198 map_cop1s, map_cop1d, false, false,
@@ -213,16 +226,218 @@ local map_pri = {
213 "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU", 226 "andiTSU", "ori|liTS0U", "xoriTSU", "luiTU",
214 map_cop0, map_cop1, false, map_cop1x, 227 map_cop0, map_cop1, false, map_cop1x,
215 "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB", 228 "beql|beqzlST0B", "bnel|bnezlST0B", "blezlSB", "bgtzlSB",
216 false, false, false, false, 229 "daddiTSI", "daddiuTSI", false, false,
217 map_special2, false, false, map_special3, 230 map_special2, "jalxJ", false, map_special3,
218 "lbTSO", "lhTSO", "lwlTSO", "lwTSO", 231 "lbTSO", "lhTSO", "lwlTSO", "lwTSO",
219 "lbuTSO", "lhuTSO", "lwrTSO", false, 232 "lbuTSO", "lhuTSO", "lwrTSO", false,
220 "sbTSO", "shTSO", "swlTSO", "swTSO", 233 "sbTSO", "shTSO", "swlTSO", "swTSO",
221 false, false, "swrTSO", "cacheNSO", 234 false, false, "swrTSO", "cacheNSO",
222 "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO", 235 "llTSO", "lwc1HSO", "lwc2TSO", "prefNSO",
223 false, "ldc1HSO", "ldc2TSO", false, 236 false, "ldc1HSO", "ldc2TSO", "ldTSO",
224 "scTSO", "swc1HSO", "swc2TSO", false, 237 "scTSO", "swc1HSO", "swc2TSO", false,
225 false, "sdc1HSO", "sdc2TSO", false, 238 false, "sdc1HSO", "sdc2TSO", "sdTSO",
239}
240
241------------------------------------------------------------------------------
242-- Primary and extended opcode maps for MIPS R6
243------------------------------------------------------------------------------
244
245local map_mul_r6 = { shift = 6, mask = 3, [2] = "mulDST", [3] = "muhDST" }
246local map_mulu_r6 = { shift = 6, mask = 3, [2] = "muluDST", [3] = "muhuDST" }
247local map_div_r6 = { shift = 6, mask = 3, [2] = "divDST", [3] = "modDST" }
248local map_divu_r6 = { shift = 6, mask = 3, [2] = "divuDST", [3] = "moduDST" }
249local map_dmul_r6 = { shift = 6, mask = 3, [2] = "dmulDST", [3] = "dmuhDST" }
250local map_dmulu_r6 = { shift = 6, mask = 3, [2] = "dmuluDST", [3] = "dmuhuDST" }
251local map_ddiv_r6 = { shift = 6, mask = 3, [2] = "ddivDST", [3] = "dmodDST" }
252local map_ddivu_r6 = { shift = 6, mask = 3, [2] = "ddivuDST", [3] = "dmoduDST" }
253
254local map_special_r6 = {
255 shift = 0, mask = 63,
256 [0] = { shift = 0, mask = -1, [0] = "nop", _ = "sllDTA" },
257 false, map_srl, "sraDTA",
258 "sllvDTS", false, map_srlv, "sravDTS",
259 "jrS", "jalrD1S", false, false,
260 "syscallY", "breakY", false, "sync",
261 "clzDS", "cloDS", "dclzDS", "dcloDS",
262 "dsllvDST", "dlsaDSTA", "dsrlvDST", "dsravDST",
263 map_mul_r6, map_mulu_r6, map_div_r6, map_divu_r6,
264 map_dmul_r6, map_dmulu_r6, map_ddiv_r6, map_ddivu_r6,
265 "addDST", "addu|moveDST0", "subDST", "subu|neguDS0T",
266 "andDST", "or|moveDST0", "xorDST", "nor|notDST0",
267 false, false, "sltDST", "sltuDST",
268 "daddDST", "dadduDST", "dsubDST", "dsubuDST",
269 "tgeSTZ", "tgeuSTZ", "tltSTZ", "tltuSTZ",
270 "teqSTZ", "seleqzDST", "tneSTZ", "selnezDST",
271 "dsllDTA", false, "dsrlDTA", "dsraDTA",
272 "dsll32DTA", false, "dsrl32DTA", "dsra32DTA",
273}
274
275local map_bshfl_r6 = {
276 shift = 9, mask = 3,
277 [1] = "alignDSTa",
278 _ = {
279 shift = 6, mask = 31,
280 [0] = "bitswapDT",
281 [2] = "wsbhDT",
282 [16] = "sebDT",
283 [24] = "sehDT",
284 }
285}
286
287local map_dbshfl_r6 = {
288 shift = 9, mask = 3,
289 [1] = "dalignDSTa",
290 _ = {
291 shift = 6, mask = 31,
292 [0] = "dbitswapDT",
293 [2] = "dsbhDT",
294 [5] = "dshdDT",
295 }
296}
297
298local map_special3_r6 = {
299 shift = 0, mask = 63,
300 [0] = "extTSAK", [1] = "dextmTSAP", [3] = "dextTSAK",
301 [4] = "insTSAL", [6] = "dinsuTSEQ", [7] = "dinsTSAL",
302 [32] = map_bshfl_r6, [36] = map_dbshfl_r6, [59] = "rdhwrTD",
303}
304
305local map_regimm_r6 = {
306 shift = 16, mask = 31,
307 [0] = "bltzSB", [1] = "bgezSB",
308 [6] = "dahiSI", [30] = "datiSI",
309 [23] = "sigrieI", [31] = "synciSO",
310}
311
312local map_pcrel_r6 = {
313 shift = 19, mask = 3,
314 [0] = "addiupcS2", "lwpcS2", "lwupcS2", {
315 shift = 18, mask = 1,
316 [0] = "ldpcS3", { shift = 16, mask = 3, [2] = "auipcSI", [3] = "aluipcSI" }
317 }
318}
319
320local map_cop1s_r6 = {
321 shift = 0, mask = 63,
322 [0] = "add.sFGH", "sub.sFGH", "mul.sFGH", "div.sFGH",
323 "sqrt.sFG", "abs.sFG", "mov.sFG", "neg.sFG",
324 "round.l.sFG", "trunc.l.sFG", "ceil.l.sFG", "floor.l.sFG",
325 "round.w.sFG", "trunc.w.sFG", "ceil.w.sFG", "floor.w.sFG",
326 "sel.sFGH", false, false, false,
327 "seleqz.sFGH", "recip.sFG", "rsqrt.sFG", "selnez.sFGH",
328 "maddf.sFGH", "msubf.sFGH", "rint.sFG", "class.sFG",
329 "min.sFGH", "mina.sFGH", "max.sFGH", "maxa.sFGH",
330 false, "cvt.d.sFG", false, false,
331 "cvt.w.sFG", "cvt.l.sFG",
332}
333
334local map_cop1d_r6 = {
335 shift = 0, mask = 63,
336 [0] = "add.dFGH", "sub.dFGH", "mul.dFGH", "div.dFGH",
337 "sqrt.dFG", "abs.dFG", "mov.dFG", "neg.dFG",
338 "round.l.dFG", "trunc.l.dFG", "ceil.l.dFG", "floor.l.dFG",
339 "round.w.dFG", "trunc.w.dFG", "ceil.w.dFG", "floor.w.dFG",
340 "sel.dFGH", false, false, false,
341 "seleqz.dFGH", "recip.dFG", "rsqrt.dFG", "selnez.dFGH",
342 "maddf.dFGH", "msubf.dFGH", "rint.dFG", "class.dFG",
343 "min.dFGH", "mina.dFGH", "max.dFGH", "maxa.dFGH",
344 "cvt.s.dFG", false, false, false,
345 "cvt.w.dFG", "cvt.l.dFG",
346}
347
348local map_cop1w_r6 = {
349 shift = 0, mask = 63,
350 [0] = "cmp.af.sFGH", "cmp.un.sFGH", "cmp.eq.sFGH", "cmp.ueq.sFGH",
351 "cmp.lt.sFGH", "cmp.ult.sFGH", "cmp.le.sFGH", "cmp.ule.sFGH",
352 "cmp.saf.sFGH", "cmp.sun.sFGH", "cmp.seq.sFGH", "cmp.sueq.sFGH",
353 "cmp.slt.sFGH", "cmp.sult.sFGH", "cmp.sle.sFGH", "cmp.sule.sFGH",
354 false, "cmp.or.sFGH", "cmp.une.sFGH", "cmp.ne.sFGH",
355 false, false, false, false,
356 false, "cmp.sor.sFGH", "cmp.sune.sFGH", "cmp.sne.sFGH",
357 false, false, false, false,
358 "cvt.s.wFG", "cvt.d.wFG",
359}
360
361local map_cop1l_r6 = {
362 shift = 0, mask = 63,
363 [0] = "cmp.af.dFGH", "cmp.un.dFGH", "cmp.eq.dFGH", "cmp.ueq.dFGH",
364 "cmp.lt.dFGH", "cmp.ult.dFGH", "cmp.le.dFGH", "cmp.ule.dFGH",
365 "cmp.saf.dFGH", "cmp.sun.dFGH", "cmp.seq.dFGH", "cmp.sueq.dFGH",
366 "cmp.slt.dFGH", "cmp.sult.dFGH", "cmp.sle.dFGH", "cmp.sule.dFGH",
367 false, "cmp.or.dFGH", "cmp.une.dFGH", "cmp.ne.dFGH",
368 false, false, false, false,
369 false, "cmp.sor.dFGH", "cmp.sune.dFGH", "cmp.sne.dFGH",
370 false, false, false, false,
371 "cvt.s.lFG", "cvt.d.lFG",
372}
373
374local map_cop1_r6 = {
375 shift = 21, mask = 31,
376 [0] = "mfc1TG", "dmfc1TG", "cfc1TG", "mfhc1TG",
377 "mtc1TG", "dmtc1TG", "ctc1TG", "mthc1TG",
378 false, "bc1eqzHB", false, false,
379 false, "bc1nezHB", false, false,
380 map_cop1s_r6, map_cop1d_r6, false, false,
381 map_cop1w_r6, map_cop1l_r6,
382}
383
384local function maprs_popTS(rs, rt)
385 if rt == 0 then return 0 elseif rs == 0 then return 1
386 elseif rs == rt then return 2 else return 3 end
387end
388
389local map_pop06_r6 = {
390 maprs = maprs_popTS, [0] = "blezSB", "blezalcTB", "bgezalcTB", "bgeucSTB"
391}
392local map_pop07_r6 = {
393 maprs = maprs_popTS, [0] = "bgtzSB", "bgtzalcTB", "bltzalcTB", "bltucSTB"
394}
395local map_pop26_r6 = {
396 maprs = maprs_popTS, "blezcTB", "bgezcTB", "bgecSTB"
397}
398local map_pop27_r6 = {
399 maprs = maprs_popTS, "bgtzcTB", "bltzcTB", "bltcSTB"
400}
401
402local function maprs_popS(rs, rt)
403 if rs == 0 then return 0 else return 1 end
404end
405
406local map_pop66_r6 = {
407 maprs = maprs_popS, [0] = "jicTI", "beqzcSb"
408}
409local map_pop76_r6 = {
410 maprs = maprs_popS, [0] = "jialcTI", "bnezcSb"
411}
412
413local function maprs_popST(rs, rt)
414 if rs >= rt then return 0 elseif rs == 0 then return 1 else return 2 end
415end
416
417local map_pop10_r6 = {
418 maprs = maprs_popST, [0] = "bovcSTB", "beqzalcTB", "beqcSTB"
419}
420local map_pop30_r6 = {
421 maprs = maprs_popST, [0] = "bnvcSTB", "bnezalcTB", "bnecSTB"
422}
423
424local map_pri_r6 = {
425 [0] = map_special_r6, map_regimm_r6, "jJ", "jalJ",
426 "beq|beqz|bST00B", "bne|bnezST0B", map_pop06_r6, map_pop07_r6,
427 map_pop10_r6, "addiu|liTS0I", "sltiTSI", "sltiuTSI",
428 "andiTSU", "ori|liTS0U", "xoriTSU", "aui|luiTS0U",
429 map_cop0, map_cop1_r6, false, false,
430 false, false, map_pop26_r6, map_pop27_r6,
431 map_pop30_r6, "daddiuTSI", false, false,
432 false, "dauiTSI", false, map_special3_r6,
433 "lbTSO", "lhTSO", false, "lwTSO",
434 "lbuTSO", "lhuTSO", false, false,
435 "sbTSO", "shTSO", false, "swTSO",
436 false, false, false, false,
437 false, "lwc1HSO", "bc#", false,
438 false, "ldc1HSO", map_pop66_r6, "ldTSO",
439 false, "swc1HSO", "balc#", map_pcrel_r6,
440 false, "sdc1HSO", map_pop76_r6, "sdTSO",
226} 441}
227 442
228------------------------------------------------------------------------------ 443------------------------------------------------------------------------------
@@ -279,10 +494,14 @@ local function disass_ins(ctx)
279 ctx.op = op 494 ctx.op = op
280 ctx.rel = nil 495 ctx.rel = nil
281 496
282 local opat = map_pri[rshift(op, 26)] 497 local opat = ctx.map_pri[rshift(op, 26)]
283 while type(opat) ~= "string" do 498 while type(opat) ~= "string" do
284 if not opat then return unknown(ctx) end 499 if not opat then return unknown(ctx) end
285 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ 500 if opat.maprs then
501 opat = opat[opat.maprs(band(rshift(op,21),31), band(rshift(op,16),31))]
502 else
503 opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._
504 end
286 end 505 end
287 local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") 506 local name, pat = match(opat, "^([a-z0-9_.]*)(.*)")
288 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") 507 local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)")
@@ -306,6 +525,10 @@ local function disass_ins(ctx)
306 x = "f"..band(rshift(op, 21), 31) 525 x = "f"..band(rshift(op, 21), 31)
307 elseif p == "A" then 526 elseif p == "A" then
308 x = band(rshift(op, 6), 31) 527 x = band(rshift(op, 6), 31)
528 elseif p == "a" then
529 x = band(rshift(op, 6), 7)
530 elseif p == "E" then
531 x = band(rshift(op, 6), 31) + 32
309 elseif p == "M" then 532 elseif p == "M" then
310 x = band(rshift(op, 11), 31) 533 x = band(rshift(op, 11), 31)
311 elseif p == "N" then 534 elseif p == "N" then
@@ -315,10 +538,18 @@ local function disass_ins(ctx)
315 if x == 0 then x = nil end 538 if x == 0 then x = nil end
316 elseif p == "K" then 539 elseif p == "K" then
317 x = band(rshift(op, 11), 31) + 1 540 x = band(rshift(op, 11), 31) + 1
541 elseif p == "P" then
542 x = band(rshift(op, 11), 31) + 33
318 elseif p == "L" then 543 elseif p == "L" then
319 x = band(rshift(op, 11), 31) - last + 1 544 x = band(rshift(op, 11), 31) - last + 1
545 elseif p == "Q" then
546 x = band(rshift(op, 11), 31) - last + 33
320 elseif p == "I" then 547 elseif p == "I" then
321 x = arshift(lshift(op, 16), 16) 548 x = arshift(lshift(op, 16), 16)
549 elseif p == "2" then
550 x = arshift(lshift(op, 13), 11)
551 elseif p == "3" then
552 x = arshift(lshift(op, 14), 11)
322 elseif p == "U" then 553 elseif p == "U" then
323 x = band(op, 0xffff) 554 x = band(op, 0xffff)
324 elseif p == "O" then 555 elseif p == "O" then
@@ -328,13 +559,22 @@ local function disass_ins(ctx)
328 local index = map_gpr[band(rshift(op, 16), 31)] 559 local index = map_gpr[band(rshift(op, 16), 31)]
329 operands[#operands] = format("%s(%s)", index, last) 560 operands[#operands] = format("%s(%s)", index, last)
330 elseif p == "B" then 561 elseif p == "B" then
331 x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 16)*4 + 4 562 x = ctx.addr + ctx.pos + arshift(lshift(op, 16), 14) + 4
563 ctx.rel = x
564 x = format("0x%08x", x)
565 elseif p == "b" then
566 x = ctx.addr + ctx.pos + arshift(lshift(op, 11), 9) + 4
332 ctx.rel = x 567 ctx.rel = x
333 x = "0x"..tohex(x) 568 x = format("0x%08x", x)
569 elseif p == "#" then
570 x = ctx.addr + ctx.pos + arshift(lshift(op, 6), 4) + 4
571 ctx.rel = x
572 x = format("0x%08x", x)
334 elseif p == "J" then 573 elseif p == "J" then
335 x = band(ctx.addr + ctx.pos, 0xf0000000) + band(op, 0x03ffffff)*4 574 local a = ctx.addr + ctx.pos
575 x = a - band(a, 0x0fffffff) + band(op, 0x03ffffff)*4
336 ctx.rel = x 576 ctx.rel = x
337 x = "0x"..tohex(x) 577 x = format("0x%08x", x)
338 elseif p == "V" then 578 elseif p == "V" then
339 x = band(rshift(op, 8), 7) 579 x = band(rshift(op, 8), 7)
340 if x == 0 then x = nil end 580 if x == 0 then x = nil end
@@ -384,7 +624,7 @@ local function disass_block(ctx, ofs, len)
384end 624end
385 625
386-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 626-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
387local function create_(code, addr, out) 627local function create(code, addr, out)
388 local ctx = {} 628 local ctx = {}
389 ctx.code = code 629 ctx.code = code
390 ctx.addr = addr or 0 630 ctx.addr = addr or 0
@@ -393,36 +633,62 @@ local function create_(code, addr, out)
393 ctx.disass = disass_block 633 ctx.disass = disass_block
394 ctx.hexdump = 8 634 ctx.hexdump = 8
395 ctx.get = get_be 635 ctx.get = get_be
636 ctx.map_pri = map_pri
637 return ctx
638end
639
640local function create_el(code, addr, out)
641 local ctx = create(code, addr, out)
642 ctx.get = get_le
643 return ctx
644end
645
646local function create_r6(code, addr, out)
647 local ctx = create(code, addr, out)
648 ctx.map_pri = map_pri_r6
396 return ctx 649 return ctx
397end 650end
398 651
399local function create_el_(code, addr, out) 652local function create_r6_el(code, addr, out)
400 local ctx = create_(code, addr, out) 653 local ctx = create(code, addr, out)
401 ctx.get = get_le 654 ctx.get = get_le
655 ctx.map_pri = map_pri_r6
402 return ctx 656 return ctx
403end 657end
404 658
405-- Simple API: disassemble code (a string) at address and output via out. 659-- Simple API: disassemble code (a string) at address and output via out.
406local function disass_(code, addr, out) 660local function disass(code, addr, out)
407 create_(code, addr, out):disass() 661 create(code, addr, out):disass()
662end
663
664local function disass_el(code, addr, out)
665 create_el(code, addr, out):disass()
408end 666end
409 667
410local function disass_el_(code, addr, out) 668local function disass_r6(code, addr, out)
411 create_el_(code, addr, out):disass() 669 create_r6(code, addr, out):disass()
670end
671
672local function disass_r6_el(code, addr, out)
673 create_r6_el(code, addr, out):disass()
412end 674end
413 675
414-- Return register name for RID. 676-- Return register name for RID.
415local function regname_(r) 677local function regname(r)
416 if r < 32 then return map_gpr[r] end 678 if r < 32 then return map_gpr[r] end
417 return "f"..(r-32) 679 return "f"..(r-32)
418end 680end
419 681
420-- Public module functions. 682-- Public module functions.
421module(...) 683return {
422 684 create = create,
423create = create_ 685 create_el = create_el,
424create_el = create_el_ 686 create_r6 = create_r6,
425disass = disass_ 687 create_r6_el = create_r6_el,
426disass_el = disass_el_ 688 disass = disass,
427regname = regname_ 689 disass_el = disass_el,
690 disass_r6 = disass_r6,
691 disass_r6_el = disass_r6_el,
692 regname = regname
693}
428 694
diff --git a/src/jit/dis_mips64.lua b/src/jit/dis_mips64.lua
new file mode 100644
index 00000000..d7d160a0
--- /dev/null
+++ b/src/jit/dis_mips64.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64 disassembler wrapper module.
3--
4-- Copyright (C) 2005-2026 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the big-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create,
14 disass = dis_mips.disass,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64el.lua b/src/jit/dis_mips64el.lua
new file mode 100644
index 00000000..06767c01
--- /dev/null
+++ b/src/jit/dis_mips64el.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64EL disassembler wrapper module.
3--
4-- Copyright (C) 2005-2026 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the little-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_el,
14 disass = dis_mips.disass_el,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64r6.lua b/src/jit/dis_mips64r6.lua
new file mode 100644
index 00000000..ef4a125a
--- /dev/null
+++ b/src/jit/dis_mips64r6.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64R6 disassembler wrapper module.
3--
4-- Copyright (C) 2005-2026 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the r6 big-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_r6,
14 disass = dis_mips.disass_r6,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mips64r6el.lua b/src/jit/dis_mips64r6el.lua
new file mode 100644
index 00000000..4dc6a54a
--- /dev/null
+++ b/src/jit/dis_mips64r6el.lua
@@ -0,0 +1,17 @@
1----------------------------------------------------------------------------
2-- LuaJIT MIPS64R6EL disassembler wrapper module.
3--
4-- Copyright (C) 2005-2026 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7-- This module just exports the r6 little-endian functions from the
8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------
10
11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12return {
13 create = dis_mips.create_r6_el,
14 disass = dis_mips.disass_r6_el,
15 regname = dis_mips.regname
16}
17
diff --git a/src/jit/dis_mipsel.lua b/src/jit/dis_mipsel.lua
index 863d245c..1da538a6 100644
--- a/src/jit/dis_mipsel.lua
+++ b/src/jit/dis_mipsel.lua
@@ -8,13 +8,10 @@
8-- MIPS disassembler module. All the interesting stuff is there. 8-- MIPS disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_mips = require((string.match(..., ".*%.") or "").."dis_mips")
12 12return {
13module(...) 13 create = dis_mips.create_el,
14 14 disass = dis_mips.disass_el,
15local dis_mips = require(_PACKAGE.."dis_mips") 15 regname = dis_mips.regname
16 16}
17create = dis_mips.create_el
18disass = dis_mips.disass_el
19regname = dis_mips.regname
20 17
diff --git a/src/jit/dis_ppc.lua b/src/jit/dis_ppc.lua
index 162947ca..d8f4cfb7 100644
--- a/src/jit/dis_ppc.lua
+++ b/src/jit/dis_ppc.lua
@@ -560,7 +560,7 @@ local function disass_block(ctx, ofs, len)
560end 560end
561 561
562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 562-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
563local function create_(code, addr, out) 563local function create(code, addr, out)
564 local ctx = {} 564 local ctx = {}
565 ctx.code = code 565 ctx.code = code
566 ctx.addr = addr or 0 566 ctx.addr = addr or 0
@@ -572,20 +572,20 @@ local function create_(code, addr, out)
572end 572end
573 573
574-- Simple API: disassemble code (a string) at address and output via out. 574-- Simple API: disassemble code (a string) at address and output via out.
575local function disass_(code, addr, out) 575local function disass(code, addr, out)
576 create_(code, addr, out):disass() 576 create(code, addr, out):disass()
577end 577end
578 578
579-- Return register name for RID. 579-- Return register name for RID.
580local function regname_(r) 580local function regname(r)
581 if r < 32 then return map_gpr[r] end 581 if r < 32 then return map_gpr[r] end
582 return "f"..(r-32) 582 return "f"..(r-32)
583end 583end
584 584
585-- Public module functions. 585-- Public module functions.
586module(...) 586return {
587 587 create = create,
588create = create_ 588 disass = disass,
589disass = disass_ 589 regname = regname
590regname = regname_ 590}
591 591
diff --git a/src/jit/dis_x64.lua b/src/jit/dis_x64.lua
index 4db5fa1b..42ae48bf 100644
--- a/src/jit/dis_x64.lua
+++ b/src/jit/dis_x64.lua
@@ -8,13 +8,10 @@
8-- x86/x64 disassembler module. All the interesting stuff is there. 8-- x86/x64 disassembler module. All the interesting stuff is there.
9------------------------------------------------------------------------------ 9------------------------------------------------------------------------------
10 10
11local require = require 11local dis_x86 = require((string.match(..., ".*%.") or "").."dis_x86")
12 12return {
13module(...) 13 create = dis_x86.create64,
14 14 disass = dis_x86.disass64,
15local dis_x86 = require(_PACKAGE.."dis_x86") 15 regname = dis_x86.regname64
16 16}
17create = dis_x86.create64
18disass = dis_x86.disass64
19regname = dis_x86.regname64
20 17
diff --git a/src/jit/dis_x86.lua b/src/jit/dis_x86.lua
index 7f9ada79..80bf721b 100644
--- a/src/jit/dis_x86.lua
+++ b/src/jit/dis_x86.lua
@@ -15,19 +15,20 @@
15-- Intel and AMD manuals. The supported instruction set is quite extensive 15-- Intel and AMD manuals. The supported instruction set is quite extensive
16-- and reflects what a current generation Intel or AMD CPU implements in 16-- and reflects what a current generation Intel or AMD CPU implements in
17-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3, 17-- 32 bit and 64 bit mode. Yes, this includes MMX, SSE, SSE2, SSE3, SSSE3,
18-- SSE4.1, SSE4.2, SSE4a and even privileged and hypervisor (VMX/SVM) 18-- SSE4.1, SSE4.2, SSE4a, AVX, AVX2 and even privileged and hypervisor
19-- instructions. 19-- (VMX/SVM) instructions.
20-- 20--
21-- Notes: 21-- Notes:
22-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported. 22-- * The (useless) a16 prefix, 3DNow and pre-586 opcodes are unsupported.
23-- * No attempt at optimization has been made -- it's fast enough for my needs. 23-- * No attempt at optimization has been made -- it's fast enough for my needs.
24-- * The public API may change when more architectures are added.
25------------------------------------------------------------------------------ 24------------------------------------------------------------------------------
26 25
27local type = type 26local type = type
28local sub, byte, format = string.sub, string.byte, string.format 27local sub, byte, format = string.sub, string.byte, string.format
29local match, gmatch, gsub = string.match, string.gmatch, string.gsub 28local match, gmatch, gsub = string.match, string.gmatch, string.gsub
30local lower, rep = string.lower, string.rep 29local lower, rep = string.lower, string.rep
30local bit = require("bit")
31local tohex = bit.tohex
31 32
32-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on. 33-- Map for 1st opcode byte in 32 bit mode. Ugly? Well ... read on.
33local map_opc1_32 = { 34local map_opc1_32 = {
@@ -76,7 +77,7 @@ local map_opc1_32 = {
76"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi", 77"movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi","movBRi",
77"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI", 78"movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI","movVRI",
78--Cx 79--Cx
79"shift!Bmu","shift!Vmu","retBw","ret","$lesVrm","$ldsVrm","movBmi","movVmi", 80"shift!Bmu","shift!Vmu","retBw","ret","vex*3$lesVrm","vex*2$ldsVrm","movBmi","movVmi",
80"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS", 81"enterBwu","leave","retfBw","retf","int3","intBu","into","iretVS",
81--Dx 82--Dx
82"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb", 83"shift!Bm1","shift!Vm1","shift!Bmc","shift!Vmc","aamBu","aadBu","salc","xlatb",
@@ -101,7 +102,7 @@ local map_opc1_64 = setmetatable({
101 [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb", 102 [0x44]="rex*r", [0x45]="rex*rb", [0x46]="rex*rx", [0x47]="rex*rxb",
102 [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb", 103 [0x48]="rex*w", [0x49]="rex*wb", [0x4a]="rex*wx", [0x4b]="rex*wxb",
103 [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb", 104 [0x4c]="rex*wr", [0x4d]="rex*wrb", [0x4e]="rex*wrx", [0x4f]="rex*wrxb",
104 [0x82]=false, [0x9a]=false, [0xc4]=false, [0xc5]=false, [0xce]=false, 105 [0x82]=false, [0x9a]=false, [0xc4]="vex*3", [0xc5]="vex*2", [0xce]=false,
105 [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false, 106 [0xd4]=false, [0xd5]=false, [0xd6]=false, [0xea]=false,
106}, { __index = map_opc1_32 }) 107}, { __index = map_opc1_32 })
107 108
@@ -112,21 +113,21 @@ local map_opc2 = {
112[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret", 113[0]="sldt!Dmp","sgdt!Ump","larVrm","lslVrm",nil,"syscall","clts","sysret",
113"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu", 114"invd","wbinvd",nil,"ud1",nil,"$prefetch!Bm","femms","3dnowMrmu",
114--1x 115--1x
115"movupsXrm|movssXrm|movupdXrm|movsdXrm", 116"movupsXrm|movssXrvm|movupdXrm|movsdXrvm",
116"movupsXmr|movssXmr|movupdXmr|movsdXmr", 117"movupsXmr|movssXmvr|movupdXmr|movsdXmvr",
117"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm", 118"movhlpsXrm$movlpsXrm|movsldupXrm|movlpdXrm|movddupXrm",
118"movlpsXmr||movlpdXmr", 119"movlpsXmr||movlpdXmr",
119"unpcklpsXrm||unpcklpdXrm", 120"unpcklpsXrvm||unpcklpdXrvm",
120"unpckhpsXrm||unpckhpdXrm", 121"unpckhpsXrvm||unpckhpdXrvm",
121"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm", 122"movlhpsXrm$movhpsXrm|movshdupXrm|movhpdXrm",
122"movhpsXmr||movhpdXmr", 123"movhpsXmr||movhpdXmr",
123"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm", 124"$prefetcht!Bm","hintnopVm","hintnopVm","hintnopVm",
124"hintnopVm","hintnopVm","hintnopVm","hintnopVm", 125"hintnopVm","hintnopVm","endbr*hintnopVm","hintnopVm",
125--2x 126--2x
126"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil, 127"movUmx$","movUmy$","movUxm$","movUym$","movUmz$",nil,"movUzm$",nil,
127"movapsXrm||movapdXrm", 128"movapsXrm||movapdXrm",
128"movapsXmr||movapdXmr", 129"movapsXmr||movapdXmr",
129"cvtpi2psXrMm|cvtsi2ssXrVmt|cvtpi2pdXrMm|cvtsi2sdXrVmt", 130"cvtpi2psXrMm|cvtsi2ssXrvVmt|cvtpi2pdXrMm|cvtsi2sdXrvVmt",
130"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr", 131"movntpsXmr|movntssXmr|movntpdXmr|movntsdXmr",
131"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm", 132"cvttps2piMrXm|cvttss2siVrXm|cvttpd2piMrXm|cvttsd2siVrXm",
132"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm", 133"cvtps2piMrXm|cvtss2siVrXm|cvtpd2piMrXm|cvtsd2siVrXm",
@@ -142,27 +143,27 @@ local map_opc2 = {
142"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm", 143"cmovlVrm","cmovgeVrm","cmovleVrm","cmovgVrm",
143--5x 144--5x
144"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm", 145"movmskpsVrXm$||movmskpdVrXm$","sqrtpsXrm|sqrtssXrm|sqrtpdXrm|sqrtsdXrm",
145"rsqrtpsXrm|rsqrtssXrm","rcppsXrm|rcpssXrm", 146"rsqrtpsXrm|rsqrtssXrvm","rcppsXrm|rcpssXrvm",
146"andpsXrm||andpdXrm","andnpsXrm||andnpdXrm", 147"andpsXrvm||andpdXrvm","andnpsXrvm||andnpdXrvm",
147"orpsXrm||orpdXrm","xorpsXrm||xorpdXrm", 148"orpsXrvm||orpdXrvm","xorpsXrvm||xorpdXrvm",
148"addpsXrm|addssXrm|addpdXrm|addsdXrm","mulpsXrm|mulssXrm|mulpdXrm|mulsdXrm", 149"addpsXrvm|addssXrvm|addpdXrvm|addsdXrvm","mulpsXrvm|mulssXrvm|mulpdXrvm|mulsdXrvm",
149"cvtps2pdXrm|cvtss2sdXrm|cvtpd2psXrm|cvtsd2ssXrm", 150"cvtps2pdXrm|cvtss2sdXrvm|cvtpd2psXrm|cvtsd2ssXrvm",
150"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm", 151"cvtdq2psXrm|cvttps2dqXrm|cvtps2dqXrm",
151"subpsXrm|subssXrm|subpdXrm|subsdXrm","minpsXrm|minssXrm|minpdXrm|minsdXrm", 152"subpsXrvm|subssXrvm|subpdXrvm|subsdXrvm","minpsXrvm|minssXrvm|minpdXrvm|minsdXrvm",
152"divpsXrm|divssXrm|divpdXrm|divsdXrm","maxpsXrm|maxssXrm|maxpdXrm|maxsdXrm", 153"divpsXrvm|divssXrvm|divpdXrvm|divsdXrvm","maxpsXrvm|maxssXrvm|maxpdXrvm|maxsdXrvm",
153--6x 154--6x
154"punpcklbwPrm","punpcklwdPrm","punpckldqPrm","packsswbPrm", 155"punpcklbwPrvm","punpcklwdPrvm","punpckldqPrvm","packsswbPrvm",
155"pcmpgtbPrm","pcmpgtwPrm","pcmpgtdPrm","packuswbPrm", 156"pcmpgtbPrvm","pcmpgtwPrvm","pcmpgtdPrvm","packuswbPrvm",
156"punpckhbwPrm","punpckhwdPrm","punpckhdqPrm","packssdwPrm", 157"punpckhbwPrvm","punpckhwdPrvm","punpckhdqPrvm","packssdwPrvm",
157"||punpcklqdqXrm","||punpckhqdqXrm", 158"||punpcklqdqXrvm","||punpckhqdqXrvm",
158"movPrVSm","movqMrm|movdquXrm|movdqaXrm", 159"movPrVSm","movqMrm|movdquXrm|movdqaXrm",
159--7x 160--7x
160"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pmu", 161"pshufwMrmu|pshufhwXrmu|pshufdXrmu|pshuflwXrmu","pshiftw!Pvmu",
161"pshiftd!Pmu","pshiftq!Mmu||pshiftdq!Xmu", 162"pshiftd!Pvmu","pshiftq!Mvmu||pshiftdq!Xvmu",
162"pcmpeqbPrm","pcmpeqwPrm","pcmpeqdPrm","emms|", 163"pcmpeqbPrvm","pcmpeqwPrvm","pcmpeqdPrvm","emms*|",
163"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$", 164"vmreadUmr||extrqXmuu$|insertqXrmuu$","vmwriteUrm||extrqXrm$|insertqXrm$",
164nil,nil, 165nil,nil,
165"||haddpdXrm|haddpsXrm","||hsubpdXrm|hsubpsXrm", 166"||haddpdXrvm|haddpsXrvm","||hsubpdXrvm|hsubpsXrvm",
166"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr", 167"movVSmMr|movqXrm|movVSmXr","movqMmr|movdquXmr|movdqaXmr",
167--8x 168--8x
168"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj", 169"joVj","jnoVj","jbVj","jnbVj","jzVj","jnzVj","jbeVj","jaVj",
@@ -180,27 +181,27 @@ nil,nil,
180"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt", 181"bsfVrm","bsrVrm|lzcntVrm|bsrWrm","movsxVrBmt","movsxVrWmt",
181--Cx 182--Cx
182"xaddBmr","xaddVmr", 183"xaddBmr","xaddVmr",
183"cmppsXrmu|cmpssXrmu|cmppdXrmu|cmpsdXrmu","$movntiVmr|", 184"cmppsXrvmu|cmpssXrvmu|cmppdXrvmu|cmpsdXrvmu","$movntiVmr|",
184"pinsrwPrWmu","pextrwDrPmu", 185"pinsrwPrvWmu","pextrwDrPmu",
185"shufpsXrmu||shufpdXrmu","$cmpxchg!Qmp", 186"shufpsXrvmu||shufpdXrvmu","$cmpxchg!Qmp",
186"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR", 187"bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR","bswapVR",
187--Dx 188--Dx
188"||addsubpdXrm|addsubpsXrm","psrlwPrm","psrldPrm","psrlqPrm", 189"||addsubpdXrvm|addsubpsXrvm","psrlwPrvm","psrldPrvm","psrlqPrvm",
189"paddqPrm","pmullwPrm", 190"paddqPrvm","pmullwPrvm",
190"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm", 191"|movq2dqXrMm|movqXmr|movdq2qMrXm$","pmovmskbVrMm||pmovmskbVrXm",
191"psubusbPrm","psubuswPrm","pminubPrm","pandPrm", 192"psubusbPrvm","psubuswPrvm","pminubPrvm","pandPrvm",
192"paddusbPrm","padduswPrm","pmaxubPrm","pandnPrm", 193"paddusbPrvm","padduswPrvm","pmaxubPrvm","pandnPrvm",
193--Ex 194--Ex
194"pavgbPrm","psrawPrm","psradPrm","pavgwPrm", 195"pavgbPrvm","psrawPrvm","psradPrvm","pavgwPrvm",
195"pmulhuwPrm","pmulhwPrm", 196"pmulhuwPrvm","pmulhwPrvm",
196"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr", 197"|cvtdq2pdXrm|cvttpd2dqXrm|cvtpd2dqXrm","$movntqMmr||$movntdqXmr",
197"psubsbPrm","psubswPrm","pminswPrm","porPrm", 198"psubsbPrvm","psubswPrvm","pminswPrvm","porPrvm",
198"paddsbPrm","paddswPrm","pmaxswPrm","pxorPrm", 199"paddsbPrvm","paddswPrvm","pmaxswPrvm","pxorPrvm",
199--Fx 200--Fx
200"|||lddquXrm","psllwPrm","pslldPrm","psllqPrm", 201"|||lddquXrm","psllwPrvm","pslldPrvm","psllqPrvm",
201"pmuludqPrm","pmaddwdPrm","psadbwPrm","maskmovqMrm||maskmovdquXrm$", 202"pmuludqPrvm","pmaddwdPrvm","psadbwPrvm","maskmovqMrm||maskmovdquXrm$",
202"psubbPrm","psubwPrm","psubdPrm","psubqPrm", 203"psubbPrvm","psubwPrvm","psubdPrvm","psubqPrvm",
203"paddbPrm","paddwPrm","padddPrm","ud", 204"paddbPrvm","paddwPrvm","padddPrvm","ud",
204} 205}
205assert(map_opc2[255] == "ud") 206assert(map_opc2[255] == "ud")
206 207
@@ -208,49 +209,91 @@ assert(map_opc2[255] == "ud")
208local map_opc3 = { 209local map_opc3 = {
209["38"] = { -- [66] 0f 38 xx 210["38"] = { -- [66] 0f 38 xx
210--0x 211--0x
211[0]="pshufbPrm","phaddwPrm","phadddPrm","phaddswPrm", 212[0]="pshufbPrvm","phaddwPrvm","phadddPrvm","phaddswPrvm",
212"pmaddubswPrm","phsubwPrm","phsubdPrm","phsubswPrm", 213"pmaddubswPrvm","phsubwPrvm","phsubdPrvm","phsubswPrvm",
213"psignbPrm","psignwPrm","psigndPrm","pmulhrswPrm", 214"psignbPrvm","psignwPrvm","psigndPrvm","pmulhrswPrvm",
214nil,nil,nil,nil, 215"||permilpsXrvm","||permilpdXrvm",nil,nil,
215--1x 216--1x
216"||pblendvbXrma",nil,nil,nil, 217"||pblendvbXrma",nil,nil,nil,
217"||blendvpsXrma","||blendvpdXrma",nil,"||ptestXrm", 218"||blendvpsXrma","||blendvpdXrma","||permpsXrvm","||ptestXrm",
218nil,nil,nil,nil, 219"||broadcastssXrm","||broadcastsdXrm","||broadcastf128XrlXm",nil,
219"pabsbPrm","pabswPrm","pabsdPrm",nil, 220"pabsbPrm","pabswPrm","pabsdPrm",nil,
220--2x 221--2x
221"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm", 222"||pmovsxbwXrm","||pmovsxbdXrm","||pmovsxbqXrm","||pmovsxwdXrm",
222"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil, 223"||pmovsxwqXrm","||pmovsxdqXrm",nil,nil,
223"||pmuldqXrm","||pcmpeqqXrm","||$movntdqaXrm","||packusdwXrm", 224"||pmuldqXrvm","||pcmpeqqXrvm","||$movntdqaXrm","||packusdwXrvm",
224nil,nil,nil,nil, 225"||maskmovpsXrvm","||maskmovpdXrvm","||maskmovpsXmvr","||maskmovpdXmvr",
225--3x 226--3x
226"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm", 227"||pmovzxbwXrm","||pmovzxbdXrm","||pmovzxbqXrm","||pmovzxwdXrm",
227"||pmovzxwqXrm","||pmovzxdqXrm",nil,"||pcmpgtqXrm", 228"||pmovzxwqXrm","||pmovzxdqXrm","||permdXrvm","||pcmpgtqXrvm",
228"||pminsbXrm","||pminsdXrm","||pminuwXrm","||pminudXrm", 229"||pminsbXrvm","||pminsdXrvm","||pminuwXrvm","||pminudXrvm",
229"||pmaxsbXrm","||pmaxsdXrm","||pmaxuwXrm","||pmaxudXrm", 230"||pmaxsbXrvm","||pmaxsdXrvm","||pmaxuwXrvm","||pmaxudXrvm",
230--4x 231--4x
231"||pmulddXrm","||phminposuwXrm", 232"||pmulddXrvm","||phminposuwXrm",nil,nil,
233nil,"||psrlvVSXrvm","||psravdXrvm","||psllvVSXrvm",
234--5x
235[0x58] = "||pbroadcastdXrlXm",[0x59] = "||pbroadcastqXrlXm",
236[0x5a] = "||broadcasti128XrlXm",
237--7x
238[0x78] = "||pbroadcastbXrlXm",[0x79] = "||pbroadcastwXrlXm",
239--8x
240[0x8c] = "||pmaskmovXrvVSm",
241[0x8e] = "||pmaskmovVSmXvr",
242--9x
243[0x96] = "||fmaddsub132pHXrvm",[0x97] = "||fmsubadd132pHXrvm",
244[0x98] = "||fmadd132pHXrvm",[0x99] = "||fmadd132sHXrvm",
245[0x9a] = "||fmsub132pHXrvm",[0x9b] = "||fmsub132sHXrvm",
246[0x9c] = "||fnmadd132pHXrvm",[0x9d] = "||fnmadd132sHXrvm",
247[0x9e] = "||fnmsub132pHXrvm",[0x9f] = "||fnmsub132sHXrvm",
248--Ax
249[0xa6] = "||fmaddsub213pHXrvm",[0xa7] = "||fmsubadd213pHXrvm",
250[0xa8] = "||fmadd213pHXrvm",[0xa9] = "||fmadd213sHXrvm",
251[0xaa] = "||fmsub213pHXrvm",[0xab] = "||fmsub213sHXrvm",
252[0xac] = "||fnmadd213pHXrvm",[0xad] = "||fnmadd213sHXrvm",
253[0xae] = "||fnmsub213pHXrvm",[0xaf] = "||fnmsub213sHXrvm",
254--Bx
255[0xb6] = "||fmaddsub231pHXrvm",[0xb7] = "||fmsubadd231pHXrvm",
256[0xb8] = "||fmadd231pHXrvm",[0xb9] = "||fmadd231sHXrvm",
257[0xba] = "||fmsub231pHXrvm",[0xbb] = "||fmsub231sHXrvm",
258[0xbc] = "||fnmadd231pHXrvm",[0xbd] = "||fnmadd231sHXrvm",
259[0xbe] = "||fnmsub231pHXrvm",[0xbf] = "||fnmsub231sHXrvm",
260--Dx
261[0xdc] = "||aesencXrvm", [0xdd] = "||aesenclastXrvm",
262[0xde] = "||aesdecXrvm", [0xdf] = "||aesdeclastXrvm",
232--Fx 263--Fx
233[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt", 264[0xf0] = "|||crc32TrBmt",[0xf1] = "|||crc32TrVmt",
265[0xf7] = "| sarxVrmv| shlxVrmv| shrxVrmv",
234}, 266},
235 267
236["3a"] = { -- [66] 0f 3a xx 268["3a"] = { -- [66] 0f 3a xx
237--0x 269--0x
238[0x00]=nil,nil,nil,nil,nil,nil,nil,nil, 270[0x00]="||permqXrmu","||permpdXrmu","||pblenddXrvmu",nil,
239"||roundpsXrmu","||roundpdXrmu","||roundssXrmu","||roundsdXrmu", 271"||permilpsXrmu","||permilpdXrmu","||perm2f128Xrvmu",nil,
240"||blendpsXrmu","||blendpdXrmu","||pblendwXrmu","palignrPrmu", 272"||roundpsXrmu","||roundpdXrmu","||roundssXrvmu","||roundsdXrvmu",
273"||blendpsXrvmu","||blendpdXrvmu","||pblendwXrvmu","palignrPrvmu",
241--1x 274--1x
242nil,nil,nil,nil, 275nil,nil,nil,nil,
243"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru", 276"||pextrbVmXru","||pextrwVmXru","||pextrVmSXru","||extractpsVmXru",
244nil,nil,nil,nil,nil,nil,nil,nil, 277"||insertf128XrvlXmu","||extractf128XlXmYru",nil,nil,
278nil,nil,nil,nil,
245--2x 279--2x
246"||pinsrbXrVmu","||insertpsXrmu","||pinsrXrVmuS",nil, 280"||pinsrbXrvVmu","||insertpsXrvmu","||pinsrXrvVmuS",nil,
281--3x
282[0x38] = "||inserti128Xrvmu",[0x39] = "||extracti128XlXmYru",
247--4x 283--4x
248[0x40] = "||dppsXrmu", 284[0x40] = "||dppsXrvmu",
249[0x41] = "||dppdXrmu", 285[0x41] = "||dppdXrvmu",
250[0x42] = "||mpsadbwXrmu", 286[0x42] = "||mpsadbwXrvmu",
287[0x44] = "||pclmulqdqXrvmu",
288[0x46] = "||perm2i128Xrvmu",
289[0x4a] = "||blendvpsXrvmb",[0x4b] = "||blendvpdXrvmb",
290[0x4c] = "||pblendvbXrvmb",
251--6x 291--6x
252[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu", 292[0x60] = "||pcmpestrmXrmu",[0x61] = "||pcmpestriXrmu",
253[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu", 293[0x62] = "||pcmpistrmXrmu",[0x63] = "||pcmpistriXrmu",
294[0xdf] = "||aeskeygenassistXrmu",
295--Fx
296[0xf0] = "||| rorxVrmu",
254}, 297},
255} 298}
256 299
@@ -354,17 +397,19 @@ local map_regs = {
354 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext! 397 "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" }, -- No x64 ext!
355 X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", 398 X = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
356 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" }, 399 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" },
400 Y = { "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7",
401 "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" },
357} 402}
358local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" } 403local map_segregs = { "es", "cs", "ss", "ds", "fs", "gs", "segr6", "segr7" }
359 404
360-- Maps for size names. 405-- Maps for size names.
361local map_sz2n = { 406local map_sz2n = {
362 B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, 407 B = 1, W = 2, D = 4, Q = 8, M = 8, X = 16, Y = 32,
363} 408}
364local map_sz2prefix = { 409local map_sz2prefix = {
365 B = "byte", W = "word", D = "dword", 410 B = "byte", W = "word", D = "dword",
366 Q = "qword", 411 Q = "qword",
367 M = "qword", X = "xword", 412 M = "qword", X = "xword", Y = "yword",
368 F = "dword", G = "qword", -- No need for sizes/register names for these two. 413 F = "dword", G = "qword", -- No need for sizes/register names for these two.
369} 414}
370 415
@@ -387,10 +432,13 @@ local function putop(ctx, text, operands)
387 if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end 432 if ctx.rep then text = ctx.rep.." "..text; ctx.rep = false end
388 if ctx.rex then 433 if ctx.rex then
389 local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "").. 434 local t = (ctx.rexw and "w" or "")..(ctx.rexr and "r" or "")..
390 (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "") 435 (ctx.rexx and "x" or "")..(ctx.rexb and "b" or "")..
391 if t ~= "" then text = "rex."..t.." "..text end 436 (ctx.vexl and "l" or "")
437 if ctx.vexv and ctx.vexv ~= 0 then t = t.."v"..ctx.vexv end
438 if t ~= "" then text = ctx.rex.."."..t.." "..gsub(text, "^ ", "")
439 elseif ctx.rex == "vex" then text = gsub("v"..text, "^v ", "") end
392 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false 440 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
393 ctx.rex = false 441 ctx.rex = false; ctx.vexl = false; ctx.vexv = false
394 end 442 end
395 if ctx.seg then 443 if ctx.seg then
396 local text2, n = gsub(text, "%[", "["..ctx.seg..":") 444 local text2, n = gsub(text, "%[", "["..ctx.seg..":")
@@ -405,6 +453,7 @@ local function putop(ctx, text, operands)
405 end 453 end
406 ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text)) 454 ctx.out(format("%08x %s%s\n", ctx.addr+ctx.start, hex, text))
407 ctx.mrm = false 455 ctx.mrm = false
456 ctx.vexv = false
408 ctx.start = pos 457 ctx.start = pos
409 ctx.imm = nil 458 ctx.imm = nil
410end 459end
@@ -413,7 +462,7 @@ end
413local function clearprefixes(ctx) 462local function clearprefixes(ctx)
414 ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false 463 ctx.o16 = false; ctx.seg = false; ctx.lock = false; ctx.rep = false
415 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false 464 ctx.rexw = false; ctx.rexr = false; ctx.rexx = false; ctx.rexb = false
416 ctx.rex = false; ctx.a32 = false 465 ctx.rex = false; ctx.a32 = false; ctx.vexl = false
417end 466end
418 467
419-- Fallback for incomplete opcodes at the end. 468-- Fallback for incomplete opcodes at the end.
@@ -450,9 +499,9 @@ end
450-- Process pattern string and generate the operands. 499-- Process pattern string and generate the operands.
451local function putpat(ctx, name, pat) 500local function putpat(ctx, name, pat)
452 local operands, regs, sz, mode, sp, rm, sc, rx, sdisp 501 local operands, regs, sz, mode, sp, rm, sc, rx, sdisp
453 local code, pos, stop = ctx.code, ctx.pos, ctx.stop 502 local code, pos, stop, vexl = ctx.code, ctx.pos, ctx.stop, ctx.vexl
454 503
455 -- Chars used: 1DFGIMPQRSTUVWXacdfgijmoprstuwxyz 504 -- Chars used: 1DFGHIMPQRSTUVWXYabcdfgijlmoprstuvwxyz
456 for p in gmatch(pat, ".") do 505 for p in gmatch(pat, ".") do
457 local x = nil 506 local x = nil
458 if p == "V" or p == "U" then 507 if p == "V" or p == "U" then
@@ -467,12 +516,17 @@ local function putpat(ctx, name, pat)
467 elseif p == "B" then 516 elseif p == "B" then
468 sz = "B" 517 sz = "B"
469 regs = ctx.rex and map_regs.B64 or map_regs.B 518 regs = ctx.rex and map_regs.B64 or map_regs.B
470 elseif match(p, "[WDQMXFG]") then 519 elseif match(p, "[WDQMXYFG]") then
471 sz = p 520 sz = p
521 if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
472 regs = map_regs[sz] 522 regs = map_regs[sz]
473 elseif p == "P" then 523 elseif p == "P" then
474 sz = ctx.o16 and "X" or "M"; ctx.o16 = false 524 sz = ctx.o16 and "X" or "M"; ctx.o16 = false
525 if sz == "X" and vexl then sz = "Y"; ctx.vexl = false end
475 regs = map_regs[sz] 526 regs = map_regs[sz]
527 elseif p == "H" then
528 name = name..(ctx.rexw and "d" or "s")
529 ctx.rexw = false
476 elseif p == "S" then 530 elseif p == "S" then
477 name = name..lower(sz) 531 name = name..lower(sz)
478 elseif p == "s" then 532 elseif p == "s" then
@@ -484,6 +538,10 @@ local function putpat(ctx, name, pat)
484 local imm = getimm(ctx, pos, 1); if not imm then return end 538 local imm = getimm(ctx, pos, 1); if not imm then return end
485 x = format("0x%02x", imm) 539 x = format("0x%02x", imm)
486 pos = pos+1 540 pos = pos+1
541 elseif p == "b" then
542 local imm = getimm(ctx, pos, 1); if not imm then return end
543 x = regs[imm/16+1]
544 pos = pos+1
487 elseif p == "w" then 545 elseif p == "w" then
488 local imm = getimm(ctx, pos, 2); if not imm then return end 546 local imm = getimm(ctx, pos, 2); if not imm then return end
489 x = format("0x%x", imm) 547 x = format("0x%x", imm)
@@ -532,7 +590,7 @@ local function putpat(ctx, name, pat)
532 local lo = imm % 0x1000000 590 local lo = imm % 0x1000000
533 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo) 591 x = format("0x%02x%06x", (imm-lo) / 0x1000000, lo)
534 else 592 else
535 x = format("0x%08x", imm) 593 x = "0x"..tohex(imm)
536 end 594 end
537 elseif p == "R" then 595 elseif p == "R" then
538 local r = byte(code, pos-1, pos-1)%8 596 local r = byte(code, pos-1, pos-1)%8
@@ -616,8 +674,13 @@ local function putpat(ctx, name, pat)
616 else 674 else
617 x = "CR"..sp 675 x = "CR"..sp
618 end 676 end
677 elseif p == "v" then
678 if ctx.vexv then
679 x = regs[ctx.vexv+1]; ctx.vexv = false
680 end
619 elseif p == "y" then x = "DR"..sp 681 elseif p == "y" then x = "DR"..sp
620 elseif p == "z" then x = "TR"..sp 682 elseif p == "z" then x = "TR"..sp
683 elseif p == "l" then vexl = false
621 elseif p == "t" then 684 elseif p == "t" then
622 else 685 else
623 error("bad pattern `"..pat.."'") 686 error("bad pattern `"..pat.."'")
@@ -692,7 +755,8 @@ map_act = {
692 B = putpat, W = putpat, D = putpat, Q = putpat, 755 B = putpat, W = putpat, D = putpat, Q = putpat,
693 V = putpat, U = putpat, T = putpat, 756 V = putpat, U = putpat, T = putpat,
694 M = putpat, X = putpat, P = putpat, 757 M = putpat, X = putpat, P = putpat,
695 F = putpat, G = putpat, 758 F = putpat, G = putpat, Y = putpat,
759 H = putpat,
696 760
697 -- Collect prefixes. 761 -- Collect prefixes.
698 [":"] = function(ctx, name, pat) 762 [":"] = function(ctx, name, pat)
@@ -740,6 +804,24 @@ map_act = {
740 return dispatch(ctx, map_opcvm[ctx.mrm]) 804 return dispatch(ctx, map_opcvm[ctx.mrm])
741 end, 805 end,
742 806
807 -- Special NOP for endbr64/endbr32.
808 endbr = function(ctx, name, pat)
809 if ctx.rep then
810 local pos = ctx.pos
811 local b = byte(ctx.code, pos)
812 local text
813 if b == 0xfa then text = "endbr64"
814 elseif b == 0xfb then text = "endbr64"
815 end
816 if text then
817 ctx.pos = pos + 1
818 ctx.rep = nil
819 return putop(ctx, text)
820 end
821 end
822 return dispatch(ctx, pat)
823 end,
824
743 -- Floating point opcode dispatch. 825 -- Floating point opcode dispatch.
744 fp = function(ctx, name, pat) 826 fp = function(ctx, name, pat)
745 local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end 827 local mrm = getmrm(ctx); if not mrm then return incomplete(ctx) end
@@ -753,15 +835,68 @@ map_act = {
753 835
754 -- REX prefix. 836 -- REX prefix.
755 rex = function(ctx, name, pat) 837 rex = function(ctx, name, pat)
756 if ctx.rex then return unknown(ctx) end -- Only 1 REX prefix allowed. 838 if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
757 for p in gmatch(pat, ".") do ctx["rex"..p] = true end 839 for p in gmatch(pat, ".") do ctx["rex"..p] = true end
758 ctx.rex = true 840 ctx.rex = "rex"
841 end,
842
843 -- VEX prefix.
844 vex = function(ctx, name, pat)
845 if ctx.rex then return unknown(ctx) end -- Only 1 REX or VEX prefix allowed.
846 ctx.rex = "vex"
847 local pos = ctx.pos
848 if ctx.mrm then
849 ctx.mrm = nil
850 pos = pos-1
851 end
852 local b = byte(ctx.code, pos, pos)
853 if not b then return incomplete(ctx) end
854 pos = pos+1
855 if b < 128 then ctx.rexr = true end
856 local m = 1
857 if pat == "3" then
858 m = b%32; b = (b-m)/32
859 local nb = b%2; b = (b-nb)/2
860 if nb == 0 then ctx.rexb = true end
861 local nx = b%2
862 if nx == 0 then ctx.rexx = true end
863 b = byte(ctx.code, pos, pos)
864 if not b then return incomplete(ctx) end
865 pos = pos+1
866 if b >= 128 then ctx.rexw = true end
867 end
868 ctx.pos = pos
869 local map
870 if m == 1 then map = map_opc2
871 elseif m == 2 then map = map_opc3["38"]
872 elseif m == 3 then map = map_opc3["3a"]
873 else return unknown(ctx) end
874 local p = b%4; b = (b-p)/4
875 if p == 1 then ctx.o16 = "o16"
876 elseif p == 2 then ctx.rep = "rep"
877 elseif p == 3 then ctx.rep = "repne" end
878 local l = b%2; b = (b-l)/2
879 if l ~= 0 then ctx.vexl = true end
880 ctx.vexv = (-1-b)%16
881 return dispatchmap(ctx, map)
759 end, 882 end,
760 883
761 -- Special case for nop with REX prefix. 884 -- Special case for nop with REX prefix.
762 nop = function(ctx, name, pat) 885 nop = function(ctx, name, pat)
763 return dispatch(ctx, ctx.rex and pat or "nop") 886 return dispatch(ctx, ctx.rex and pat or "nop")
764 end, 887 end,
888
889 -- Special case for 0F 77.
890 emms = function(ctx, name, pat)
891 if ctx.rex ~= "vex" then
892 return putop(ctx, "emms")
893 elseif ctx.vexl then
894 ctx.vexl = false
895 return putop(ctx, "zeroall")
896 else
897 return putop(ctx, "zeroupper")
898 end
899 end,
765} 900}
766 901
767------------------------------------------------------------------------------ 902------------------------------------------------------------------------------
@@ -782,7 +917,7 @@ local function disass_block(ctx, ofs, len)
782end 917end
783 918
784-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). 919-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len).
785local function create_(code, addr, out) 920local function create(code, addr, out)
786 local ctx = {} 921 local ctx = {}
787 ctx.code = code 922 ctx.code = code
788 ctx.addr = (addr or 0) - 1 923 ctx.addr = (addr or 0) - 1
@@ -796,8 +931,8 @@ local function create_(code, addr, out)
796 return ctx 931 return ctx
797end 932end
798 933
799local function create64_(code, addr, out) 934local function create64(code, addr, out)
800 local ctx = create_(code, addr, out) 935 local ctx = create(code, addr, out)
801 ctx.x64 = true 936 ctx.x64 = true
802 ctx.map1 = map_opc1_64 937 ctx.map1 = map_opc1_64
803 ctx.aregs = map_regs.Q 938 ctx.aregs = map_regs.Q
@@ -805,32 +940,32 @@ local function create64_(code, addr, out)
805end 940end
806 941
807-- Simple API: disassemble code (a string) at address and output via out. 942-- Simple API: disassemble code (a string) at address and output via out.
808local function disass_(code, addr, out) 943local function disass(code, addr, out)
809 create_(code, addr, out):disass() 944 create(code, addr, out):disass()
810end 945end
811 946
812local function disass64_(code, addr, out) 947local function disass64(code, addr, out)
813 create64_(code, addr, out):disass() 948 create64(code, addr, out):disass()
814end 949end
815 950
816-- Return register name for RID. 951-- Return register name for RID.
817local function regname_(r) 952local function regname(r)
818 if r < 8 then return map_regs.D[r+1] end 953 if r < 8 then return map_regs.D[r+1] end
819 return map_regs.X[r-7] 954 return map_regs.X[r-7]
820end 955end
821 956
822local function regname64_(r) 957local function regname64(r)
823 if r < 16 then return map_regs.Q[r+1] end 958 if r < 16 then return map_regs.Q[r+1] end
824 return map_regs.X[r-15] 959 return map_regs.X[r-15]
825end 960end
826 961
827-- Public module functions. 962-- Public module functions.
828module(...) 963return {
829 964 create = create,
830create = create_ 965 create64 = create64,
831create64 = create64_ 966 disass = disass,
832disass = disass_ 967 disass64 = disass64,
833disass64 = disass64_ 968 regname = regname,
834regname = regname_ 969 regname64 = regname64
835regname64 = regname64_ 970}
836 971
diff --git a/src/jit/dump.lua b/src/jit/dump.lua
index 5097db94..6a700bbe 100644
--- a/src/jit/dump.lua
+++ b/src/jit/dump.lua
@@ -62,7 +62,7 @@ local traceinfo, traceir, tracek = jutil.traceinfo, jutil.traceir, jutil.tracek
62local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap 62local tracemc, tracesnap = jutil.tracemc, jutil.tracesnap
63local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr 63local traceexitstub, ircalladdr = jutil.traceexitstub, jutil.ircalladdr
64local bit = require("bit") 64local bit = require("bit")
65local band, shr = bit.band, bit.rshift 65local band, shr, tohex = bit.band, bit.rshift, bit.tohex
66local sub, gsub, format = string.sub, string.gsub, string.format 66local sub, gsub, format = string.sub, string.gsub, string.format
67local byte, rep = string.byte, string.rep 67local byte, rep = string.byte, string.rep
68local type, tostring = type, tostring 68local type, tostring = type, tostring
@@ -84,12 +84,13 @@ local nexitsym = 0
84local function fillsymtab_tr(tr, nexit) 84local function fillsymtab_tr(tr, nexit)
85 local t = {} 85 local t = {}
86 symtabmt.__index = t 86 symtabmt.__index = t
87 if jit.arch == "mips" or jit.arch == "mipsel" then 87 if jit.arch:sub(1, 4) == "mips" then
88 t[traceexitstub(tr, 0)] = "exit" 88 t[traceexitstub(tr, 0)] = "exit"
89 return 89 return
90 end 90 end
91 for i=0,nexit-1 do 91 for i=0,nexit-1 do
92 local addr = traceexitstub(tr, i) 92 local addr = traceexitstub(tr, i)
93 if addr < 0 then addr = addr + 2^32 end
93 t[addr] = tostring(i) 94 t[addr] = tostring(i)
94 end 95 end
95 local addr = traceexitstub(tr, nexit) 96 local addr = traceexitstub(tr, nexit)
@@ -100,10 +101,15 @@ end
100local function fillsymtab(tr, nexit) 101local function fillsymtab(tr, nexit)
101 local t = symtab 102 local t = symtab
102 if nexitsym == 0 then 103 if nexitsym == 0 then
104 local maskaddr = jit.arch == "arm" and -2
103 local ircall = vmdef.ircall 105 local ircall = vmdef.ircall
104 for i=0,#ircall do 106 for i=0,#ircall do
105 local addr = ircalladdr(i) 107 local addr = ircalladdr(i)
106 if addr ~= 0 then t[addr] = ircall[i] end 108 if addr ~= 0 then
109 if maskaddr then addr = band(addr, maskaddr) end
110 if addr < 0 then addr = addr + 2^32 end
111 t[addr] = ircall[i]
112 end
107 end 113 end
108 end 114 end
109 if nexitsym == 1000000 then -- Per-trace exit stubs. 115 if nexitsym == 1000000 then -- Per-trace exit stubs.
@@ -117,6 +123,7 @@ local function fillsymtab(tr, nexit)
117 nexit = 1000000 123 nexit = 1000000
118 break 124 break
119 end 125 end
126 if addr < 0 then addr = addr + 2^32 end
120 t[addr] = tostring(i) 127 t[addr] = tostring(i)
121 end 128 end
122 nexitsym = nexit 129 nexitsym = nexit
@@ -135,6 +142,7 @@ local function dump_mcode(tr)
135 local mcode, addr, loop = tracemc(tr) 142 local mcode, addr, loop = tracemc(tr)
136 if not mcode then return end 143 if not mcode then return end
137 if not disass then disass = require("jit.dis_"..jit.arch) end 144 if not disass then disass = require("jit.dis_"..jit.arch) end
145 if addr < 0 then addr = addr + 2^32 end
138 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n") 146 out:write("---- TRACE ", tr, " mcode ", #mcode, "\n")
139 local ctx = disass.create(mcode, addr, dumpwrite) 147 local ctx = disass.create(mcode, addr, dumpwrite)
140 ctx.hexdump = 0 148 ctx.hexdump = 0
@@ -210,8 +218,10 @@ local function colorize_text(s)
210 return s 218 return s
211end 219end
212 220
213local function colorize_ansi(s, t) 221local function colorize_ansi(s, t, extra)
214 return format(colortype_ansi[t], s) 222 local out = format(colortype_ansi[t], s)
223 if extra then out = "\027[3m"..out end
224 return out
215end 225end
216 226
217local irtype_ansi = setmetatable({}, 227local irtype_ansi = setmetatable({},
@@ -220,9 +230,10 @@ local irtype_ansi = setmetatable({},
220 230
221local html_escape = { ["<"] = "&lt;", [">"] = "&gt;", ["&"] = "&amp;", } 231local html_escape = { ["<"] = "&lt;", [">"] = "&gt;", ["&"] = "&amp;", }
222 232
223local function colorize_html(s, t) 233local function colorize_html(s, t, extra)
224 s = gsub(s, "[<>&]", html_escape) 234 s = gsub(s, "[<>&]", html_escape)
225 return format('<span class="irt_%s">%s</span>', irtype_text[t], s) 235 return format('<span class="irt_%s%s">%s</span>',
236 irtype_text[t], extra and " irt_extra" or "", s)
226end 237end
227 238
228local irtype_html = setmetatable({}, 239local irtype_html = setmetatable({},
@@ -247,6 +258,7 @@ span.irt_tab { color: #c00000; }
247span.irt_udt, span.irt_lud { color: #00c0c0; } 258span.irt_udt, span.irt_lud { color: #00c0c0; }
248span.irt_num { color: #4040c0; } 259span.irt_num { color: #4040c0; }
249span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; } 260span.irt_int, span.irt_i8, span.irt_u8, span.irt_i16, span.irt_u16 { color: #b040b0; }
261span.irt_extra { font-style: italic; }
250</style> 262</style>
251]] 263]]
252 264
@@ -262,6 +274,7 @@ local litname = {
262 if band(mode, 8) ~= 0 then s = s.."C" end 274 if band(mode, 8) ~= 0 then s = s.."C" end
263 if band(mode, 16) ~= 0 then s = s.."R" end 275 if band(mode, 16) ~= 0 then s = s.."R" end
264 if band(mode, 32) ~= 0 then s = s.."I" end 276 if band(mode, 32) ~= 0 then s = s.."I" end
277 if band(mode, 64) ~= 0 then s = s.."K" end
265 t[mode] = s 278 t[mode] = s
266 return s 279 return s
267 end}), 280 end}),
@@ -269,16 +282,20 @@ local litname = {
269 ["CONV "] = setmetatable({}, { __index = function(t, mode) 282 ["CONV "] = setmetatable({}, { __index = function(t, mode)
270 local s = irtype[band(mode, 31)] 283 local s = irtype[band(mode, 31)]
271 s = irtype[band(shr(mode, 5), 31)].."."..s 284 s = irtype[band(shr(mode, 5), 31)].."."..s
272 if band(mode, 0x400) ~= 0 then s = s.." trunc" 285 if band(mode, 0x800) ~= 0 then s = s.." sext" end
273 elseif band(mode, 0x800) ~= 0 then s = s.." sext" end
274 local c = shr(mode, 12) 286 local c = shr(mode, 12)
275 if c == 2 then s = s.." index" elseif c == 3 then s = s.." check" end 287 if c == 1 then s = s.." none"
288 elseif c == 2 then s = s.." index"
289 elseif c == 3 then s = s.." check" end
276 t[mode] = s 290 t[mode] = s
277 return s 291 return s
278 end}), 292 end}),
279 ["FLOAD "] = vmdef.irfield, 293 ["FLOAD "] = vmdef.irfield,
280 ["FREF "] = vmdef.irfield, 294 ["FREF "] = vmdef.irfield,
281 ["FPMATH"] = vmdef.irfpm, 295 ["FPMATH"] = vmdef.irfpm,
296 ["TMPREF"] = { [0] = "", "IN", "OUT", "INOUT", "", "", "OUT2", "INOUT2" },
297 ["BUFHDR"] = { [0] = "RESET", "APPEND", "WRITE" },
298 ["TOSTR "] = { [0] = "INT", "NUM", "CHAR" },
282} 299}
283 300
284local function ctlsub(c) 301local function ctlsub(c)
@@ -302,15 +319,19 @@ local function fmtfunc(func, pc)
302 end 319 end
303end 320end
304 321
305local function formatk(tr, idx) 322local function formatk(tr, idx, sn)
306 local k, t, slot = tracek(tr, idx) 323 local k, t, slot = tracek(tr, idx)
307 local tn = type(k) 324 local tn = type(k)
308 local s 325 local s
309 if tn == "number" then 326 if tn == "number" then
310 if k == 2^52+2^51 then 327 if t < 12 then
328 s = k == 0 and "NULL" or format("[0x%08x]", k)
329 elseif band(sn or 0, 0x30000) ~= 0 then
330 s = band(sn, 0x20000) ~= 0 and "contpc" or "ftsz"
331 elseif k == 2^52+2^51 then
311 s = "bias" 332 s = "bias"
312 else 333 else
313 s = format("%+.14g", k) 334 s = format(0 < k and k < 0x1p-1026 and "%+a" or "%+.14g", k)
314 end 335 end
315 elseif tn == "string" then 336 elseif tn == "string" then
316 s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub)) 337 s = format(#k > 20 and '"%.20s"~' or '"%s"', gsub(k, "%c", ctlsub))
@@ -328,10 +349,12 @@ local function formatk(tr, idx)
328 elseif t == 21 then -- int64_t 349 elseif t == 21 then -- int64_t
329 s = sub(tostring(k), 1, -3) 350 s = sub(tostring(k), 1, -3)
330 if sub(s, 1, 1) ~= "-" then s = "+"..s end 351 if sub(s, 1, 1) ~= "-" then s = "+"..s end
352 elseif sn == 0x1057fff then -- SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)
353 return "----" -- Special case for LJ_FR2 slot 1.
331 else 354 else
332 s = tostring(k) -- For primitives. 355 s = tostring(k) -- For primitives.
333 end 356 end
334 s = colorize(format("%-4s", s), t) 357 s = colorize(format("%-4s", s), t, band(sn or 0, 0x100000) ~= 0)
335 if slot then 358 if slot then
336 s = format("%s @%d", s, slot) 359 s = format("%s @%d", s, slot)
337 end 360 end
@@ -346,12 +369,12 @@ local function printsnap(tr, snap)
346 n = n + 1 369 n = n + 1
347 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS 370 local ref = band(sn, 0xffff) - 0x8000 -- REF_BIAS
348 if ref < 0 then 371 if ref < 0 then
349 out:write(formatk(tr, ref)) 372 out:write(formatk(tr, ref, sn))
350 elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM 373 elseif band(sn, 0x80000) ~= 0 then -- SNAP_SOFTFPNUM
351 out:write(colorize(format("%04d/%04d", ref, ref+1), 14)) 374 out:write(colorize(format("%04d/%04d", ref, ref+1), 14))
352 else 375 else
353 local m, ot, op1, op2 = traceir(tr, ref) 376 local m, ot, op1, op2 = traceir(tr, ref)
354 out:write(colorize(format("%04d", ref), band(ot, 31))) 377 out:write(colorize(format("%04d", ref), band(ot, 31), band(sn, 0x100000) ~= 0))
355 end 378 end
356 out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME 379 out:write(band(sn, 0x10000) == 0 and " " or "|") -- SNAP_FRAME
357 else 380 else
@@ -529,7 +552,12 @@ local recdepth = 0
529local function fmterr(err, info) 552local function fmterr(err, info)
530 if type(err) == "number" then 553 if type(err) == "number" then
531 if type(info) == "function" then info = fmtfunc(info) end 554 if type(info) == "function" then info = fmtfunc(info) end
532 err = format(vmdef.traceerr[err], info) 555 local fmt = vmdef.traceerr[err]
556 if fmt == "NYI: bytecode %s" then
557 local oidx = 6 * info
558 info = sub(vmdef.bcnames, oidx+1, oidx+6)
559 end
560 err = format(fmt, info)
533 end 561 end
534 return err 562 return err
535end 563end
@@ -544,7 +572,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
544 if what == "start" then 572 if what == "start" then
545 if dumpmode.H then out:write('<pre class="ljdump">\n') end 573 if dumpmode.H then out:write('<pre class="ljdump">\n') end
546 out:write("---- TRACE ", tr, " ", what) 574 out:write("---- TRACE ", tr, " ", what)
547 if otr then out:write(" ", otr, "/", oex) end 575 if otr then out:write(" ", otr, "/", oex == -1 and "stitch" or oex) end
548 out:write(" ", fmtfunc(func, pc), "\n") 576 out:write(" ", fmtfunc(func, pc), "\n")
549 elseif what == "stop" or what == "abort" then 577 elseif what == "stop" or what == "abort" then
550 out:write("---- TRACE ", tr, " ", what) 578 out:write("---- TRACE ", tr, " ", what)
@@ -594,23 +622,26 @@ end
594 622
595------------------------------------------------------------------------------ 623------------------------------------------------------------------------------
596 624
625local gpr64 = jit.arch:match("64")
626local fprmips32 = jit.arch == "mips" or jit.arch == "mipsel"
627
597-- Dump taken trace exits. 628-- Dump taken trace exits.
598local function dump_texit(tr, ex, ngpr, nfpr, ...) 629local function dump_texit(tr, ex, ngpr, nfpr, ...)
599 out:write("---- TRACE ", tr, " exit ", ex, "\n") 630 out:write("---- TRACE ", tr, " exit ", ex, "\n")
600 if dumpmode.X then 631 if dumpmode.X then
601 local regs = {...} 632 local regs = {...}
602 if jit.arch == "x64" then 633 if gpr64 then
603 for i=1,ngpr do 634 for i=1,ngpr do
604 out:write(format(" %016x", regs[i])) 635 out:write(format(" %016x", regs[i]))
605 if i % 4 == 0 then out:write("\n") end 636 if i % 4 == 0 then out:write("\n") end
606 end 637 end
607 else 638 else
608 for i=1,ngpr do 639 for i=1,ngpr do
609 out:write(format(" %08x", regs[i])) 640 out:write(" ", tohex(regs[i]))
610 if i % 8 == 0 then out:write("\n") end 641 if i % 8 == 0 then out:write("\n") end
611 end 642 end
612 end 643 end
613 if jit.arch == "mips" or jit.arch == "mipsel" then 644 if fprmips32 then
614 for i=1,nfpr,2 do 645 for i=1,nfpr,2 do
615 out:write(format(" %+17.14g", regs[ngpr+i])) 646 out:write(format(" %+17.14g", regs[ngpr+i]))
616 if i % 8 == 7 then out:write("\n") end 647 if i % 8 == 7 then out:write("\n") end
@@ -691,9 +722,9 @@ local function dumpon(opt, outfile)
691end 722end
692 723
693-- Public module functions. 724-- Public module functions.
694module(...) 725return {
695 726 on = dumpon,
696on = dumpon 727 off = dumpoff,
697off = dumpoff 728 start = dumpon -- For -j command line option.
698start = dumpon -- For -j command line option. 729}
699 730
diff --git a/src/jit/p.lua b/src/jit/p.lua
new file mode 100644
index 00000000..9d938ce5
--- /dev/null
+++ b/src/jit/p.lua
@@ -0,0 +1,309 @@
1----------------------------------------------------------------------------
2-- LuaJIT profiler.
3--
4-- Copyright (C) 2005-2026 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module is a simple command line interface to the built-in
9-- low-overhead profiler of LuaJIT.
10--
11-- The lower-level API of the profiler is accessible via the "jit.profile"
12-- module or the luaJIT_profile_* C API.
13--
14-- Example usage:
15--
16-- luajit -jp myapp.lua
17-- luajit -jp=s myapp.lua
18-- luajit -jp=-s myapp.lua
19-- luajit -jp=vl myapp.lua
20-- luajit -jp=G,profile.txt myapp.lua
21--
22-- The following dump features are available:
23--
24-- f Stack dump: function name, otherwise module:line. Default mode.
25-- F Stack dump: ditto, but always prepend module.
26-- l Stack dump: module:line.
27-- <number> stack dump depth (callee < caller). Default: 1.
28-- -<number> Inverse stack dump depth (caller > callee).
29-- s Split stack dump after first stack level. Implies abs(depth) >= 2.
30-- p Show full path for module names.
31-- v Show VM states. Can be combined with stack dumps, e.g. vf or fv.
32-- z Show zones. Can be combined with stack dumps, e.g. zf or fz.
33-- r Show raw sample counts. Default: show percentages.
34-- a Annotate excerpts from source code files.
35-- A Annotate complete source code files.
36-- G Produce raw output suitable for graphical tools (e.g. flame graphs).
37-- m<number> Minimum sample percentage to be shown. Default: 3.
38-- i<number> Sampling interval in milliseconds. Default: 10.
39--
40----------------------------------------------------------------------------
41
42-- Cache some library functions and objects.
43local jit = require("jit")
44local profile = require("jit.profile")
45local vmdef = require("jit.vmdef")
46local math = math
47local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
48local sort, format = table.sort, string.format
49local stdout = io.stdout
50local zone -- Load jit.zone module on demand.
51
52-- Output file handle.
53local out
54
55------------------------------------------------------------------------------
56
57local prof_ud
58local prof_states, prof_split, prof_min, prof_raw, prof_fmt, prof_depth
59local prof_ann, prof_count1, prof_count2, prof_samples
60
61local map_vmmode = {
62 N = "Compiled",
63 I = "Interpreted",
64 C = "C code",
65 G = "Garbage Collector",
66 J = "JIT Compiler",
67}
68
69-- Profiler callback.
70local function prof_cb(th, samples, vmmode)
71 prof_samples = prof_samples + samples
72 local key_stack, key_stack2, key_state
73 -- Collect keys for sample.
74 if prof_states then
75 if prof_states == "v" then
76 key_state = map_vmmode[vmmode] or vmmode
77 else
78 key_state = zone:get() or "(none)"
79 end
80 end
81 if prof_fmt then
82 key_stack = profile.dumpstack(th, prof_fmt, prof_depth)
83 key_stack = key_stack:gsub("%[builtin#(%d+)%]", function(x)
84 return vmdef.ffnames[tonumber(x)]
85 end)
86 if prof_split == 2 then
87 local k1, k2 = key_stack:match("(.-) [<>] (.*)")
88 if k2 then key_stack, key_stack2 = k1, k2 end
89 elseif prof_split == 3 then
90 key_stack2 = profile.dumpstack(th, "l", 1)
91 end
92 end
93 -- Order keys.
94 local k1, k2
95 if prof_split == 1 then
96 if key_state then
97 k1 = key_state
98 if key_stack then k2 = key_stack end
99 end
100 elseif key_stack then
101 k1 = key_stack
102 if key_stack2 then k2 = key_stack2 elseif key_state then k2 = key_state end
103 end
104 -- Coalesce samples in one or two levels.
105 if k1 then
106 local t1 = prof_count1
107 t1[k1] = (t1[k1] or 0) + samples
108 if k2 then
109 local t2 = prof_count2
110 local t3 = t2[k1]
111 if not t3 then t3 = {}; t2[k1] = t3 end
112 t3[k2] = (t3[k2] or 0) + samples
113 end
114 end
115end
116
117------------------------------------------------------------------------------
118
119-- Show top N list.
120local function prof_top(count1, count2, samples, indent)
121 local t, n = {}, 0
122 for k in pairs(count1) do
123 n = n + 1
124 t[n] = k
125 end
126 sort(t, function(a, b) return count1[a] > count1[b] end)
127 for i=1,n do
128 local k = t[i]
129 local v = count1[k]
130 local pct = floor(v*100/samples + 0.5)
131 if pct < prof_min then break end
132 if not prof_raw then
133 out:write(format("%s%2d%% %s\n", indent, pct, k))
134 elseif prof_raw == "r" then
135 out:write(format("%s%5d %s\n", indent, v, k))
136 else
137 out:write(format("%s %d\n", k, v))
138 end
139 if count2 then
140 local r = count2[k]
141 if r then
142 prof_top(r, nil, v, (prof_split == 3 or prof_split == 1) and " -- " or
143 (prof_depth < 0 and " -> " or " <- "))
144 end
145 end
146 end
147end
148
149-- Annotate source code
150local function prof_annotate(count1, samples)
151 local files = {}
152 local ms = 0
153 for k, v in pairs(count1) do
154 local pct = floor(v*100/samples + 0.5)
155 ms = math.max(ms, v)
156 if pct >= prof_min then
157 local file, line = k:match("^(.*):(%d+)$")
158 if not file then file = k; line = 0 end
159 local fl = files[file]
160 if not fl then fl = {}; files[file] = fl; files[#files+1] = file end
161 line = tonumber(line)
162 fl[line] = prof_raw and v or pct
163 end
164 end
165 sort(files)
166 local fmtv, fmtn = " %3d%% | %s\n", " | %s\n"
167 if prof_raw then
168 local n = math.max(5, math.ceil(math.log10(ms)))
169 fmtv = "%"..n.."d | %s\n"
170 fmtn = (" "):rep(n).." | %s\n"
171 end
172 local ann = prof_ann
173 for _, file in ipairs(files) do
174 local f0 = file:byte()
175 if f0 == 40 or f0 == 91 then
176 out:write(format("\n====== %s ======\n[Cannot annotate non-file]\n", file))
177 break
178 end
179 local fp, err = io.open(file)
180 if not fp then
181 out:write(format("====== ERROR: %s: %s\n", file, err))
182 break
183 end
184 out:write(format("\n====== %s ======\n", file))
185 local fl = files[file]
186 local n, show = 1, false
187 if ann ~= 0 then
188 for i=1,ann do
189 if fl[i] then show = true; out:write("@@ 1 @@\n"); break end
190 end
191 end
192 for line in fp:lines() do
193 if line:byte() == 27 then
194 out:write("[Cannot annotate bytecode file]\n")
195 break
196 end
197 local v = fl[n]
198 if ann ~= 0 then
199 local v2 = fl[n+ann]
200 if show then
201 if v2 then show = n+ann elseif v then show = n
202 elseif show+ann < n then show = false end
203 elseif v2 then
204 show = n+ann
205 out:write(format("@@ %d @@\n", n))
206 end
207 if not show then goto next end
208 end
209 if v then
210 out:write(format(fmtv, v, line))
211 else
212 out:write(format(fmtn, line))
213 end
214 ::next::
215 n = n + 1
216 end
217 fp:close()
218 end
219end
220
221------------------------------------------------------------------------------
222
223-- Finish profiling and dump result.
224local function prof_finish()
225 if prof_ud then
226 profile.stop()
227 local samples = prof_samples
228 if samples == 0 then
229 if prof_raw ~= true then out:write("[No samples collected]\n") end
230 elseif prof_ann then
231 prof_annotate(prof_count1, samples)
232 else
233 prof_top(prof_count1, prof_count2, samples, "")
234 end
235 prof_count1 = nil
236 prof_count2 = nil
237 prof_ud = nil
238 if out ~= stdout then out:close() end
239 end
240end
241
242-- Start profiling.
243local function prof_start(mode)
244 local interval = ""
245 mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
246 prof_min = 3
247 mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
248 prof_depth = 1
249 mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
250 local m = {}
251 for c in mode:gmatch(".") do m[c] = c end
252 prof_states = m.z or m.v
253 if prof_states == "z" then zone = require("jit.zone") end
254 local scope = m.l or m.f or m.F or (prof_states and "" or "f")
255 local flags = (m.p or "")
256 prof_raw = m.r
257 if m.s then
258 prof_split = 2
259 if prof_depth == -1 or m["-"] then prof_depth = -2
260 elseif prof_depth == 1 then prof_depth = 2 end
261 elseif mode:find("[fF].*l") then
262 scope = "l"
263 prof_split = 3
264 else
265 prof_split = (scope == "" or mode:find("[zv].*[lfF]")) and 1 or 0
266 end
267 prof_ann = m.A and 0 or (m.a and 3)
268 if prof_ann then
269 scope = "l"
270 prof_fmt = "pl"
271 prof_split = 0
272 prof_depth = 1
273 elseif m.G and scope ~= "" then
274 prof_fmt = flags..scope.."Z;"
275 prof_depth = -100
276 prof_raw = true
277 prof_min = 0
278 elseif scope == "" then
279 prof_fmt = false
280 else
281 local sc = prof_split == 3 and m.f or m.F or scope
282 prof_fmt = flags..sc..(prof_depth >= 0 and "Z < " or "Z > ")
283 end
284 prof_count1 = {}
285 prof_count2 = {}
286 prof_samples = 0
287 profile.start(scope:lower()..interval, prof_cb)
288 prof_ud = newproxy(true)
289 getmetatable(prof_ud).__gc = prof_finish
290end
291
292------------------------------------------------------------------------------
293
294local function start(mode, outfile)
295 if not outfile then outfile = os.getenv("LUAJIT_PROFILEFILE") end
296 if outfile then
297 out = outfile == "-" and stdout or assert(io.open(outfile, "w"))
298 else
299 out = stdout
300 end
301 prof_start(mode or "f")
302end
303
304-- Public module functions.
305return {
306 start = start, -- For -j command line option.
307 stop = prof_finish
308}
309
diff --git a/src/jit/v.lua b/src/jit/v.lua
index 43cbedff..69443d31 100644
--- a/src/jit/v.lua
+++ b/src/jit/v.lua
@@ -62,7 +62,7 @@ local jit = require("jit")
62local jutil = require("jit.util") 62local jutil = require("jit.util")
63local vmdef = require("jit.vmdef") 63local vmdef = require("jit.vmdef")
64local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo 64local funcinfo, traceinfo = jutil.funcinfo, jutil.traceinfo
65local type, format = type, string.format 65local type, sub, format = type, string.sub, string.format
66local stdout, stderr = io.stdout, io.stderr 66local stdout, stderr = io.stdout, io.stderr
67 67
68-- Active flag and output file handle. 68-- Active flag and output file handle.
@@ -89,7 +89,12 @@ end
89local function fmterr(err, info) 89local function fmterr(err, info)
90 if type(err) == "number" then 90 if type(err) == "number" then
91 if type(info) == "function" then info = fmtfunc(info) end 91 if type(info) == "function" then info = fmtfunc(info) end
92 err = format(vmdef.traceerr[err], info) 92 local fmt = vmdef.traceerr[err]
93 if fmt == "NYI: bytecode %s" then
94 local oidx = 6 * info
95 info = sub(vmdef.bcnames, oidx+1, oidx+6)
96 end
97 err = format(fmt, info)
93 end 98 end
94 return err 99 return err
95end 100end
@@ -98,7 +103,7 @@ end
98local function dump_trace(what, tr, func, pc, otr, oex) 103local function dump_trace(what, tr, func, pc, otr, oex)
99 if what == "start" then 104 if what == "start" then
100 startloc = fmtfunc(func, pc) 105 startloc = fmtfunc(func, pc)
101 startex = otr and "("..otr.."/"..oex..") " or "" 106 startex = otr and "("..otr.."/"..(oex == -1 and "stitch" or oex)..") " or ""
102 else 107 else
103 if what == "abort" then 108 if what == "abort" then
104 local loc = fmtfunc(func, pc) 109 local loc = fmtfunc(func, pc)
@@ -115,6 +120,9 @@ local function dump_trace(what, tr, func, pc, otr, oex)
115 if ltype == "interpreter" then 120 if ltype == "interpreter" then
116 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n", 121 out:write(format("[TRACE %3s %s%s -- fallback to interpreter]\n",
117 tr, startex, startloc)) 122 tr, startex, startloc))
123 elseif ltype == "stitch" then
124 out:write(format("[TRACE %3s %s%s %s %s]\n",
125 tr, startex, startloc, ltype, fmtfunc(func, pc)))
118 elseif link == tr or link == 0 then 126 elseif link == tr or link == 0 then
119 out:write(format("[TRACE %3s %s%s %s]\n", 127 out:write(format("[TRACE %3s %s%s %s]\n",
120 tr, startex, startloc, ltype)) 128 tr, startex, startloc, ltype))
@@ -158,9 +166,9 @@ local function dumpon(outfile)
158end 166end
159 167
160-- Public module functions. 168-- Public module functions.
161module(...) 169return {
162 170 on = dumpon,
163on = dumpon 171 off = dumpoff,
164off = dumpoff 172 start = dumpon -- For -j command line option.
165start = dumpon -- For -j command line option. 173}
166 174
diff --git a/src/jit/zone.lua b/src/jit/zone.lua
new file mode 100644
index 00000000..0ed6935a
--- /dev/null
+++ b/src/jit/zone.lua
@@ -0,0 +1,45 @@
1----------------------------------------------------------------------------
2-- LuaJIT profiler zones.
3--
4-- Copyright (C) 2005-2026 Mike Pall. All rights reserved.
5-- Released under the MIT license. See Copyright Notice in luajit.h
6----------------------------------------------------------------------------
7--
8-- This module implements a simple hierarchical zone model.
9--
10-- Example usage:
11--
12-- local zone = require("jit.zone")
13-- zone("AI")
14-- ...
15-- zone("A*")
16-- ...
17-- print(zone:get()) --> "A*"
18-- ...
19-- zone()
20-- ...
21-- print(zone:get()) --> "AI"
22-- ...
23-- zone()
24--
25----------------------------------------------------------------------------
26
27local remove = table.remove
28
29return setmetatable({
30 flush = function(t)
31 for i=#t,1,-1 do t[i] = nil end
32 end,
33 get = function(t)
34 return t[#t]
35 end
36}, {
37 __call = function(t, zone)
38 if zone then
39 t[#t+1] = zone
40 else
41 return (assert(remove(t), "empty zone stack"))
42 end
43 end
44})
45