aboutsummaryrefslogtreecommitdiff
path: root/win32
diff options
context:
space:
mode:
authorHisham Muhammad <hisham@gobolinux.org>2018-10-30 10:10:41 -0300
committerHisham Muhammad <hisham@gobolinux.org>2018-10-30 10:10:41 -0300
commitb37835d9375d903d51fe8af67aa70031191675ff (patch)
tree37c911d53ab348aa22dd16cde9c953d574f03c8a /win32
parent760ad7c98a4a322b1f028ea9585cd17c5b74b2e2 (diff)
downloadluarocks-b37835d9375d903d51fe8af67aa70031191675ff.tar.gz
luarocks-b37835d9375d903d51fe8af67aa70031191675ff.tar.bz2
luarocks-b37835d9375d903d51fe8af67aa70031191675ff.zip
Unbundle pe-parser from LuaRocks sources
Diffstat (limited to 'win32')
-rw-r--r--win32/pe-parser.lua568
1 files changed, 568 insertions, 0 deletions
diff --git a/win32/pe-parser.lua b/win32/pe-parser.lua
new file mode 100644
index 00000000..34556812
--- /dev/null
+++ b/win32/pe-parser.lua
@@ -0,0 +1,568 @@
1---------------------------------------------------------------------------------------
2-- Lua module to parse a Portable Executable (.exe , .dll, etc.) file and extract metadata.
3--
4-- NOTE: numerical information is extracted as strings (hex) to prevent numerical overflows in
5-- case of 64 bit fields (bit/flag fields). Pointer arithmetic is still done numerically, so for
6-- very large files this could lead to undefined results. Use with care!
7--
8-- Version 0.4, [copyright (c) 2013-2015 Thijs Schreijer](http://www.thijsschreijer.nl)
9-- @name pe-parser
10-- @class module
11
12local M = {}
13
14--- Table with named constants/flag-constants.
15-- Named elements can be looked up by their name in the `const` table. The sub tables are index by value.
16-- For flag fields the name is extended with `_flags`.
17-- @usage -- lookup descriptive name for the myobj.Magic value
18-- local desc = pe.const.Magic(myobj.Magic)
19--
20-- -- get list of flag names, indexed by flag values, for the Characteristics field
21-- local flag_list = pe.const.Characteristics_flags
22M.const = {
23 Magic = {
24 ["10b"] = "PE32",
25 ["20b"] = "PE32+",
26 },
27 Machine = {
28 ["0"] = "IMAGE_FILE_MACHINE_UNKNOWN",
29 ["1d3"] = "IMAGE_FILE_MACHINE_AM33",
30 ["8664"] = "IMAGE_FILE_MACHINE_AMD64",
31 ["1c0"] = "IMAGE_FILE_MACHINE_ARM",
32 ["1c4"] = "IMAGE_FILE_MACHINE_ARMNT",
33 ["aa64"] = "IMAGE_FILE_MACHINE_ARM64",
34 ["ebc"] = "IMAGE_FILE_MACHINE_EBC",
35 ["14c"] = "IMAGE_FILE_MACHINE_I386",
36 ["200"] = "IMAGE_FILE_MACHINE_IA64",
37 ["9041"] = "IMAGE_FILE_MACHINE_M32R",
38 ["266"] = "IMAGE_FILE_MACHINE_MIPS16",
39 ["366"] = "IMAGE_FILE_MACHINE_MIPSFPU",
40 ["466"] = "IMAGE_FILE_MACHINE_MIPSFPU16",
41 ["1f0"] = "IMAGE_FILE_MACHINE_POWERPC",
42 ["1f1"] = "IMAGE_FILE_MACHINE_POWERPCFP",
43 ["166"] = "IMAGE_FILE_MACHINE_R4000",
44 ["1a2"] = "IMAGE_FILE_MACHINE_SH3",
45 ["1a3"] = "IMAGE_FILE_MACHINE_SH3DSP",
46 ["1a6"] = "IMAGE_FILE_MACHINE_SH4",
47 ["1a8"] = "IMAGE_FILE_MACHINE_SH5",
48 ["1c2"] = "IMAGE_FILE_MACHINE_THUMB",
49 ["169"] = "IMAGE_FILE_MACHINE_WCEMIPSV2",
50 },
51 Characteristics_flags = {
52 ["1"] = "IMAGE_FILE_RELOCS_STRIPPED",
53 ["2"] = "IMAGE_FILE_EXECUTABLE_IMAGE",
54 ["4"] = "IMAGE_FILE_LINE_NUMS_STRIPPED",
55 ["8"] = "IMAGE_FILE_LOCAL_SYMS_STRIPPED",
56 ["10"] = "IMAGE_FILE_AGGRESSIVE_WS_TRIM",
57 ["20"] = "IMAGE_FILE_LARGE_ADDRESS_AWARE",
58 ["40"] = "Reserved for future use",
59 ["80"] = "IMAGE_FILE_BYTES_REVERSED_LO",
60 ["100"] = "IMAGE_FILE_32BIT_MACHINE",
61 ["200"] = "IMAGE_FILE_DEBUG_STRIPPED",
62 ["400"] = "IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP",
63 ["800"] = "IMAGE_FILE_NET_RUN_FROM_SWAP",
64 ["1000"] = "IMAGE_FILE_SYSTEM",
65 ["2000"] = "IMAGE_FILE_DLL",
66 ["4000"] = "IMAGE_FILE_UP_SYSTEM_ONLY",
67 ["8000"] = "IMAGE_FILE_BYTES_REVERSED_HI",
68 },
69 Subsystem = {
70 ["0"] = "IMAGE_SUBSYSTEM_UNKNOWN",
71 ["1"] = "IMAGE_SUBSYSTEM_NATIVE",
72 ["2"] = "IMAGE_SUBSYSTEM_WINDOWS_GUI",
73 ["3"] = "IMAGE_SUBSYSTEM_WINDOWS_CUI",
74 ["7"] = "IMAGE_SUBSYSTEM_POSIX_CUI",
75 ["9"] = "IMAGE_SUBSYSTEM_WINDOWS_CE_GUI",
76 ["a"] = "IMAGE_SUBSYSTEM_EFI_APPLICATION",
77 ["b"] = "IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER",
78 ["c"] = "IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER",
79 ["d"] = "IMAGE_SUBSYSTEM_EFI_ROM",
80 ["e"] = "IMAGE_SUBSYSTEM_XBOX",
81 },
82 DllCharacteristics_flags = {
83 ["40"] = "IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE",
84 ["80"] = "IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY",
85 ["100"] = "IMAGE_DLL_CHARACTERISTICS_NX_COMPAT",
86 ["200"] = "IMAGE_DLLCHARACTERISTICS_NO_ISOLATION",
87 ["400"] = "IMAGE_DLLCHARACTERISTICS_NO_SEH",
88 ["800"] = "IMAGE_DLLCHARACTERISTICS_NO_BIND",
89 ["2000"] = "IMAGE_DLLCHARACTERISTICS_WDM_DRIVER",
90 ["8000"] = "IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE",
91 },
92 Sections = {
93 Characteristics_flags = {
94 ["8"] = "IMAGE_SCN_TYPE_NO_PAD",
95 ["20"] = "IMAGE_SCN_CNT_CODE",
96 ["40"] = "IMAGE_SCN_CNT_INITIALIZED_DATA",
97 ["80"] = "IMAGE_SCN_CNT_UNINITIALIZED_ DATA",
98 ["100"] = "IMAGE_SCN_LNK_OTHER",
99 ["200"] = "IMAGE_SCN_LNK_INFO",
100 ["800"] = "IMAGE_SCN_LNK_REMOVE",
101 ["1000"] = "IMAGE_SCN_LNK_COMDAT",
102 ["8000"] = "IMAGE_SCN_GPREL",
103 ["20000"] = "IMAGE_SCN_MEM_PURGEABLE",
104 ["20000"] = "IMAGE_SCN_MEM_16BIT",
105 ["40000"] = "IMAGE_SCN_MEM_LOCKED",
106 ["80000"] = "IMAGE_SCN_MEM_PRELOAD",
107 ["100000"] = "IMAGE_SCN_ALIGN_1BYTES",
108 ["200000"] = "IMAGE_SCN_ALIGN_2BYTES",
109 ["300000"] = "IMAGE_SCN_ALIGN_4BYTES",
110 ["400000"] = "IMAGE_SCN_ALIGN_8BYTES",
111 ["500000"] = "IMAGE_SCN_ALIGN_16BYTES",
112 ["600000"] = "IMAGE_SCN_ALIGN_32BYTES",
113 ["700000"] = "IMAGE_SCN_ALIGN_64BYTES",
114 ["800000"] = "IMAGE_SCN_ALIGN_128BYTES",
115 ["900000"] = "IMAGE_SCN_ALIGN_256BYTES",
116 ["a00000"] = "IMAGE_SCN_ALIGN_512BYTES",
117 ["b00000"] = "IMAGE_SCN_ALIGN_1024BYTES",
118 ["c00000"] = "IMAGE_SCN_ALIGN_2048BYTES",
119 ["d00000"] = "IMAGE_SCN_ALIGN_4096BYTES",
120 ["e00000"] = "IMAGE_SCN_ALIGN_8192BYTES",
121 ["1000000"] = "IMAGE_SCN_LNK_NRELOC_OVFL",
122 ["2000000"] = "IMAGE_SCN_MEM_DISCARDABLE",
123 ["4000000"] = "IMAGE_SCN_MEM_NOT_CACHED",
124 ["8000000"] = "IMAGE_SCN_MEM_NOT_PAGED",
125 ["10000000"] = "IMAGE_SCN_MEM_SHARED",
126 ["20000000"] = "IMAGE_SCN_MEM_EXECUTE",
127 ["40000000"] = "IMAGE_SCN_MEM_READ",
128 ["80000000"] = "IMAGE_SCN_MEM_WRITE",
129 },
130 },
131
132}
133
134
135--- convert integer to HEX representation
136-- @param IN the number to convert to hex
137-- @param len the size to return, any result smaller will be prefixed by "0"s
138-- @return string containing hex representation
139function M.toHex(IN, len)
140 local B,K,OUT,I,D=16,"0123456789abcdef","",0
141 while IN>0 do
142 I=I+1
143 IN,D=math.floor(IN/B),math.fmod(IN,B)+1
144 OUT=string.sub(K,D,D)..OUT
145 end
146 len = len or string.len(OUT)
147 if len<1 then len = 1 end
148 return (string.rep("0",len) .. OUT):sub(-len,-1)
149end
150
151--- convert HEX to integer
152-- @param IN the string to convert to dec
153-- @return number in dec format
154function M.toDec(IN)
155 assert(type(IN)=="string")
156 local OUT = 0
157 IN = IN:lower()
158 while #IN > 0 do
159 local b = string.find("0123456789abcdef",IN:sub(1,1))
160 OUT = OUT * 16 + (b-1)
161 IN = IN:sub(2,-1)
162 end
163 return OUT
164end
165
166local function get_int(str)
167 -- convert a byte-sequence to an integer
168 assert(str)
169 local r = 0
170 for i = #str, 1, -1 do
171 r = r*256 + string.byte(str,i,i)
172 end
173 return r
174end
175
176local function get_hex(str)
177 -- convert a byte-sequence to a hex string
178 assert(str)
179 local r = ""
180 for i = #str, 1, -1 do
181 r = r .. M.toHex(string.byte(str,i,i),2)
182 end
183 while (#r > 1) and (r:sub(1,1) == "0") do
184 r = r:sub(2, -1)
185 end
186 return r
187end
188
189local function get_list(list, f, add_to)
190 -- list: list of tables with 'size' and 'name' and is_str
191 -- f: file to read from
192 -- add_to: table to add results to (optional)
193 local r = add_to or {}
194 for i, t in ipairs(list) do
195 assert(r[t.name] == nil, "Value for '"..t.name.."' already set")
196 local val,err = f:read(t.size) -- read specified size in bytes
197 val = val or "\0"
198 if t.is_str then -- entry is marked as a string value, read as such
199 for i = 1, #val do
200 if val:sub(i,i) == "\0" then
201 r[t.name] = val:sub(1,i-1)
202 break
203 end
204 end
205 r[t.name] = r[t.name] or val
206 else -- entry not marked, so always read as hex value
207 r[t.name] = get_hex(val)
208 end
209 end
210 return r
211end
212
213--- Calculates the fileoffset of a given RVA.
214-- This function is also available as a method on the parsed output table
215-- @param obj a parsed object (return value from `parse`)
216-- @param RVA an RVA value to convert to a fileoffset (either number or hex-string)
217-- @return fileoffset of the given RVA (number)
218M.get_fileoffset = function(obj, RVA)
219 -- given an object with a section table, and an RVA, it returns
220 -- the fileoffset for the data
221 if type(RVA)=="string" then RVA = M.toDec(RVA) end
222 local section
223 for i, s in ipairs(obj.Sections) do
224 if M.toDec(s.VirtualAddress) <= RVA and M.toDec(s.VirtualAddress) + M.toDec(s.VirtualSize) >= RVA then
225 section = s
226 break
227 end
228 end
229 if not section then return nil, "No match RVA with Section list, RVA out of bounds" end
230 return RVA - M.toDec(section.VirtualAddress) + M.toDec(section.PointerToRawData)
231end
232
233local function readstring(f)
234 -- reads a null-terminated string from the current file posistion
235 local name = ""
236 while true do
237 local c = f:read(1)
238 if c == "\0" then break end
239 name = name .. c
240 end
241 return name
242end
243
244--- Parses a file and extracts the information.
245-- All numbers are delivered as "string" types containing hex values (to prevent numerical overflows in case of 64bit sizes or bit-fields), see `toHex` and `toDec` conversion functions.
246-- @return table with data, or nil + error
247-- @usage local pe = require("pe-parser")
248-- local obj = pe.parse("c:\lua\lua.exe")
249-- obj:dump()
250M.parse = function(target)
251
252 local list = { -- list of known architectures
253 [332] = "x86", -- IMAGE_FILE_MACHINE_I386
254 [512] = "x86_64", -- IMAGE_FILE_MACHINE_IA64
255 [34404] = "x86_64", -- IMAGE_FILE_MACHINE_AMD64
256 }
257
258 local f, err = io.open(target, "rb")
259 if not f then return nil, err end
260
261 local MZ = f:read(2)
262 if MZ ~= "MZ" then
263 f:close()
264 return nil, "Not a valid image"
265 end
266
267 f:seek("set", 60) -- position of PE header position
268 local peoffset = get_int(f:read(4)) -- read position of PE header
269
270 f:seek("set", peoffset) -- move to position of PE header
271 local out = get_list({
272 { size = 4,
273 name = "PEheader",
274 is_str = true },
275 { size = 2,
276 name = "Machine" },
277 { size = 2,
278 name = "NumberOfSections"},
279 { size = 4,
280 name = "TimeDateStamp" },
281 { size = 4,
282 name = "PointerToSymbolTable"},
283 { size = 4,
284 name = "NumberOfSymbols"},
285 { size = 2,
286 name = "SizeOfOptionalHeader"},
287 { size = 2,
288 name = "Characteristics"},
289 }, f)
290
291 if out.PEheader ~= "PE" then
292 f:close()
293 return nil, "Invalid PE header"
294 end
295 out.PEheader = nil -- remove it, has no value
296 out.dump = M.dump -- export dump function as a method
297
298 if M.toDec(out.SizeOfOptionalHeader) > 0 then
299 -- parse optional header; standard
300 get_list({
301 { size = 2,
302 name = "Magic" },
303 { size = 1,
304 name = "MajorLinkerVersion"},
305 { size = 1,
306 name = "MinorLinkerVersion"},
307 { size = 4,
308 name = "SizeOfCode"},
309 { size = 4,
310 name = "SizeOfInitializedData"},
311 { size = 4,
312 name = "SizeOfUninitializedData"},
313 { size = 4,
314 name = "AddressOfEntryPoint"},
315 { size = 4,
316 name = "BaseOfCode"},
317 }, f, out)
318 local plus = (out.Magic == "20b")
319 if not plus then -- plain PE32, not PE32+
320 get_list({
321 { size = 4,
322 name = "BaseOfData" },
323 }, f, out)
324 end
325 -- parse optional header; windows-fields
326 local plussize = 4
327 if plus then plussize = 8 end
328 get_list({
329 { size = plussize,
330 name = "ImageBase"},
331 { size = 4,
332 name = "SectionAlignment"},
333 { size = 4,
334 name = "FileAlignment"},
335 { size = 2,
336 name = "MajorOperatingSystemVersion"},
337 { size = 2,
338 name = "MinorOperatingSystemVersion"},
339 { size = 2,
340 name = "MajorImageVersion"},
341 { size = 2,
342 name = "MinorImageVersion"},
343 { size = 2,
344 name = "MajorSubsystemVersion"},
345 { size = 2,
346 name = "MinorSubsystemVersion"},
347 { size = 4,
348 name = "Win32VersionValue"},
349 { size = 4,
350 name = "SizeOfImage"},
351 { size = 4,
352 name = "SizeOfHeaders"},
353 { size = 4,
354 name = "CheckSum"},
355 { size = 2,
356 name = "Subsystem"},
357 { size = 2,
358 name = "DllCharacteristics"},
359 { size = plussize,
360 name = "SizeOfStackReserve"},
361 { size = plussize,
362 name = "SizeOfStackCommit"},
363 { size = plussize,
364 name = "SizeOfHeapReserve"},
365 { size = plussize,
366 name = "SizeOfHeapCommit"},
367 { size = 4,
368 name = "LoaderFlags"},
369 { size = 4,
370 name = "NumberOfRvaAndSizes"},
371 }, f, out)
372 -- Read data directory entries
373 for i = 1, M.toDec(out.NumberOfRvaAndSizes) do
374 out.DataDirectory = out.DataDirectory or {}
375 out.DataDirectory[i] = get_list({
376 { size = 4,
377 name = "VirtualAddress"},
378 { size = 4,
379 name = "Size"},
380 }, f)
381 end
382 for i, name in ipairs{"ExportTable", "ImportTable", "ResourceTable",
383 "ExceptionTable", "CertificateTable", "BaseRelocationTable",
384 "Debug", "Architecture", "GlobalPtr", "TLSTable",
385 "LoadConfigTable", "BoundImport", "IAT",
386 "DelayImportDescriptor", "CLRRuntimeHeader", "Reserved"} do
387 out.DataDirectory[name] = out.DataDirectory[i]
388 if out.DataDirectory[name] then out.DataDirectory[name].name = name end
389 end
390 end
391
392 -- parse section table
393 for i = 1, M.toDec(out.NumberOfSections) do
394 out.Sections = out.Sections or {}
395 out.Sections[i] = get_list({
396 { size = 8,
397 name = "Name",
398 is_str = true},
399 { size = 4,
400 name = "VirtualSize"},
401 { size = 4,
402 name = "VirtualAddress"},
403 { size = 4,
404 name = "SizeOfRawData"},
405 { size = 4,
406 name = "PointerToRawData"},
407 { size = 4,
408 name = "PointerToRelocations"},
409 { size = 4,
410 name = "PointerToLinenumbers"},
411 { size = 2,
412 name = "NumberOfRelocations"},
413 { size = 2,
414 name = "NumberOfLinenumbers"},
415 { size = 4,
416 name = "Characteristics"},
417 }, f)
418 end
419 -- we now have section data, so add RVA convertion method
420 out.get_fileoffset = M.get_fileoffset
421
422 -- get the import table
423 f:seek("set", out:get_fileoffset(out.DataDirectory.ImportTable.VirtualAddress))
424 local done = false
425 local cnt = 1
426 while not done do
427 local dll = get_list({
428 { size = 4,
429 name = "ImportLookupTableRVA"},
430 { size = 4,
431 name = "TimeDateStamp"},
432 { size = 4,
433 name = "ForwarderChain"},
434 { size = 4,
435 name = "NameRVA"},
436 { size = 4,
437 name = "ImportAddressTableRVA"},
438 }, f)
439 if M.toDec(dll.NameRVA) == 0 then
440 -- this is the final NULL entry, so we're done
441 done = true
442 else
443 -- store the import entry
444 out.DataDirectory.ImportTable[cnt] = dll
445 cnt = cnt + 1
446 end
447 end
448 -- resolve imported DLL names
449 for i, dll in ipairs(out.DataDirectory.ImportTable) do
450 f:seek("set", out:get_fileoffset(dll.NameRVA))
451 dll.Name = readstring(f)
452 end
453
454 f:close()
455 return out
456end
457
458-- pad a string (prefix) to a specific length
459local function pad(str, l, chr)
460 chr = chr or " "
461 l = l or 0
462 return string.rep(chr,l-#str)..str
463end
464
465--- Dumps the output parsed.
466-- This function is also available as a method on the parsed output table
467M.dump = function(obj)
468 local l = 0
469 for k,v in pairs(obj) do if #k > l then l = #k end end
470
471 for k,v in pairs(obj) do
472 if (M.const[k] and type(v)=="string") then
473 -- look up named value
474 print(k..string.rep(" ", l - #k + 1)..": "..M.const[k][v])
475 elseif M.const[k.."_flags"] then
476 -- flags should be listed
477 print(k..string.rep(" ", l - #k + 1)..": "..v.." (flag field)")
478 else
479 -- regular values
480 if type(v) == "number" then
481 print(k..string.rep(" ", l - #k + 1)..": "..v.." (dec)")
482 else
483 if (type(v)=="string") and (k ~= "DataDirectory") and (k ~= "Sections") then
484 print(k..string.rep(" ", l - #k + 1)..": "..v)
485 end
486 end
487 end
488 end
489
490 if obj.DataDirectory then
491 print("DataDirectory (RVA, size):")
492 for i, v in ipairs(obj.DataDirectory) do
493 print(" Entry "..M.toHex(i-1).." "..pad(v.VirtualAddress,8,"0").." "..pad(v.Size,8,"0").." "..v.name)
494 end
495 end
496
497 if obj.Sections then
498 print("Sections:")
499 print("idx name RVA VSize Offset RawSize")
500 for i, v in ipairs(obj.Sections) do
501 print(" "..i.." "..v.Name.. string.rep(" ",9-#v.Name)..pad(v.VirtualAddress,8,"0").." "..pad(v.VirtualSize,8,"0").." "..pad(v.PointerToRawData,8,"0").." "..pad(v.SizeOfRawData,8,"0"))
502 end
503 end
504
505 print("Imports:")
506 for i, dll in ipairs(obj.DataDirectory.ImportTable) do
507 print(" "..dll.Name)
508 end
509end
510
511--- Checks the msvcrt dll the binary was linked against.
512-- Mixing and matching dlls only works when they all are using the same runtime, if
513-- not unexpected errors will probably occur.
514-- Checks the binary provided and then traverses all imported dlls to find the msvcrt
515-- used (it will only look for the dlls in the same directory).
516-- @param infile binary file to check
517-- @return msvcrt name (uppercase, without extension) + file where the reference was found, or nil + error
518function M.msvcrt(infile)
519 local path, file = infile:match("(.+)\\(.+)$")
520 if not path then
521 path = ""
522 file = infile
523 else
524 path=path .. "\\"
525 end
526 local obj, err = M.parse(path..file)
527 if not obj then return obj, err end
528
529 for i, dll in ipairs(obj.DataDirectory.ImportTable) do
530 dll = dll.Name:upper()
531 local result = dll:match('(MSVCR%d*D?)%.DLL')
532 if not result then
533 result = dll:match('(MSVCRTD?)%.DLL')
534 end
535 if not result then
536 result = dll:match('(VCRUNTIME%d*D?)%.DLL')
537 end
538 -- success, found it return name + binary where it was found
539 if result then return result, infile end
540 end
541
542 -- not found, so traverse all imported dll's
543 for i, dll in ipairs(obj.DataDirectory.ImportTable) do
544 local rt, ref = M.msvcrt(path..dll.Name)
545 if rt then
546 return rt, ref -- found it
547 end
548 end
549
550 return nil, "No msvcrt found"
551end
552
553function M.get_architecture(program)
554 -- detect processor arch interpreter was compiled for
555 local proc = (M.parse(program) or {}).Machine
556 if not proc then
557 return nil, "Could not detect processor architecture used in "..program
558 end
559 proc = M.const.Machine[proc] -- collect name from constant value
560 if proc == "IMAGE_FILE_MACHINE_I386" then
561 proc = "x86"
562 else
563 proc = "x86_64"
564 end
565 return proc
566end
567
568return M