From b37835d9375d903d51fe8af67aa70031191675ff Mon Sep 17 00:00:00 2001 From: Hisham Muhammad Date: Tue, 30 Oct 2018 10:10:41 -0300 Subject: Unbundle pe-parser from LuaRocks sources --- install.bat | 2 +- src/luarocks/fs/win32/pe-parser.lua | 568 ------------------------------------ win32/pe-parser.lua | 568 ++++++++++++++++++++++++++++++++++++ 3 files changed, 569 insertions(+), 569 deletions(-) delete mode 100644 src/luarocks/fs/win32/pe-parser.lua create mode 100644 win32/pe-parser.lua diff --git a/install.bat b/install.bat index 4120db49..9aef7941 100644 --- a/install.bat +++ b/install.bat @@ -51,7 +51,7 @@ local lua_version_set = false -- Some helpers -- -local pe = assert(loadfile(".\\src\\luarocks\\fs\\win32\\pe-parser.lua"))() +local pe = assert(loadfile(".\\win32\\pe-parser.lua"))() local function die(message) if message then print(message) end diff --git a/src/luarocks/fs/win32/pe-parser.lua b/src/luarocks/fs/win32/pe-parser.lua deleted file mode 100644 index 34556812..00000000 --- a/src/luarocks/fs/win32/pe-parser.lua +++ /dev/null @@ -1,568 +0,0 @@ ---------------------------------------------------------------------------------------- --- Lua module to parse a Portable Executable (.exe , .dll, etc.) file and extract metadata. --- --- NOTE: numerical information is extracted as strings (hex) to prevent numerical overflows in --- case of 64 bit fields (bit/flag fields). Pointer arithmetic is still done numerically, so for --- very large files this could lead to undefined results. Use with care! --- --- Version 0.4, [copyright (c) 2013-2015 Thijs Schreijer](http://www.thijsschreijer.nl) --- @name pe-parser --- @class module - -local M = {} - ---- Table with named constants/flag-constants. --- Named elements can be looked up by their name in the `const` table. The sub tables are index by value. --- For flag fields the name is extended with `_flags`. --- @usage -- lookup descriptive name for the myobj.Magic value --- local desc = pe.const.Magic(myobj.Magic) --- --- -- get list of flag names, indexed by flag values, for the Characteristics field --- local flag_list = pe.const.Characteristics_flags -M.const = { - Magic = { - ["10b"] = "PE32", - ["20b"] = "PE32+", - }, - Machine = { - ["0"] = "IMAGE_FILE_MACHINE_UNKNOWN", - ["1d3"] = "IMAGE_FILE_MACHINE_AM33", - ["8664"] = "IMAGE_FILE_MACHINE_AMD64", - ["1c0"] = "IMAGE_FILE_MACHINE_ARM", - ["1c4"] = "IMAGE_FILE_MACHINE_ARMNT", - ["aa64"] = "IMAGE_FILE_MACHINE_ARM64", - ["ebc"] = "IMAGE_FILE_MACHINE_EBC", - ["14c"] = "IMAGE_FILE_MACHINE_I386", - ["200"] = "IMAGE_FILE_MACHINE_IA64", - ["9041"] = "IMAGE_FILE_MACHINE_M32R", - ["266"] = "IMAGE_FILE_MACHINE_MIPS16", - ["366"] = "IMAGE_FILE_MACHINE_MIPSFPU", - ["466"] = "IMAGE_FILE_MACHINE_MIPSFPU16", - ["1f0"] = "IMAGE_FILE_MACHINE_POWERPC", - ["1f1"] = "IMAGE_FILE_MACHINE_POWERPCFP", - ["166"] = "IMAGE_FILE_MACHINE_R4000", - ["1a2"] = "IMAGE_FILE_MACHINE_SH3", - ["1a3"] = "IMAGE_FILE_MACHINE_SH3DSP", - ["1a6"] = "IMAGE_FILE_MACHINE_SH4", - ["1a8"] = "IMAGE_FILE_MACHINE_SH5", - ["1c2"] = "IMAGE_FILE_MACHINE_THUMB", - ["169"] = "IMAGE_FILE_MACHINE_WCEMIPSV2", - }, - Characteristics_flags = { - ["1"] = "IMAGE_FILE_RELOCS_STRIPPED", - ["2"] = "IMAGE_FILE_EXECUTABLE_IMAGE", - ["4"] = "IMAGE_FILE_LINE_NUMS_STRIPPED", - ["8"] = "IMAGE_FILE_LOCAL_SYMS_STRIPPED", - ["10"] = "IMAGE_FILE_AGGRESSIVE_WS_TRIM", - ["20"] = "IMAGE_FILE_LARGE_ADDRESS_AWARE", - ["40"] = "Reserved for future use", - ["80"] = "IMAGE_FILE_BYTES_REVERSED_LO", - ["100"] = "IMAGE_FILE_32BIT_MACHINE", - ["200"] = "IMAGE_FILE_DEBUG_STRIPPED", - ["400"] = "IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP", - ["800"] = "IMAGE_FILE_NET_RUN_FROM_SWAP", - ["1000"] = "IMAGE_FILE_SYSTEM", - ["2000"] = "IMAGE_FILE_DLL", - ["4000"] = "IMAGE_FILE_UP_SYSTEM_ONLY", - ["8000"] = "IMAGE_FILE_BYTES_REVERSED_HI", - }, - Subsystem = { - ["0"] = "IMAGE_SUBSYSTEM_UNKNOWN", - ["1"] = "IMAGE_SUBSYSTEM_NATIVE", - ["2"] = "IMAGE_SUBSYSTEM_WINDOWS_GUI", - ["3"] = "IMAGE_SUBSYSTEM_WINDOWS_CUI", - ["7"] = "IMAGE_SUBSYSTEM_POSIX_CUI", - ["9"] = "IMAGE_SUBSYSTEM_WINDOWS_CE_GUI", - ["a"] = "IMAGE_SUBSYSTEM_EFI_APPLICATION", - ["b"] = "IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER", - ["c"] = "IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER", - ["d"] = "IMAGE_SUBSYSTEM_EFI_ROM", - ["e"] = "IMAGE_SUBSYSTEM_XBOX", - }, - DllCharacteristics_flags = { - ["40"] = "IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE", - ["80"] = "IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY", - ["100"] = "IMAGE_DLL_CHARACTERISTICS_NX_COMPAT", - ["200"] = "IMAGE_DLLCHARACTERISTICS_NO_ISOLATION", - ["400"] = "IMAGE_DLLCHARACTERISTICS_NO_SEH", - ["800"] = "IMAGE_DLLCHARACTERISTICS_NO_BIND", - ["2000"] = "IMAGE_DLLCHARACTERISTICS_WDM_DRIVER", - ["8000"] = "IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE", - }, - Sections = { - Characteristics_flags = { - ["8"] = "IMAGE_SCN_TYPE_NO_PAD", - ["20"] = "IMAGE_SCN_CNT_CODE", - ["40"] = "IMAGE_SCN_CNT_INITIALIZED_DATA", - ["80"] = "IMAGE_SCN_CNT_UNINITIALIZED_ DATA", - ["100"] = "IMAGE_SCN_LNK_OTHER", - ["200"] = "IMAGE_SCN_LNK_INFO", - ["800"] = "IMAGE_SCN_LNK_REMOVE", - ["1000"] = "IMAGE_SCN_LNK_COMDAT", - ["8000"] = "IMAGE_SCN_GPREL", - ["20000"] = "IMAGE_SCN_MEM_PURGEABLE", - ["20000"] = "IMAGE_SCN_MEM_16BIT", - ["40000"] = "IMAGE_SCN_MEM_LOCKED", - ["80000"] = "IMAGE_SCN_MEM_PRELOAD", - ["100000"] = "IMAGE_SCN_ALIGN_1BYTES", - ["200000"] = "IMAGE_SCN_ALIGN_2BYTES", - ["300000"] = "IMAGE_SCN_ALIGN_4BYTES", - ["400000"] = "IMAGE_SCN_ALIGN_8BYTES", - ["500000"] = "IMAGE_SCN_ALIGN_16BYTES", - ["600000"] = "IMAGE_SCN_ALIGN_32BYTES", - ["700000"] = "IMAGE_SCN_ALIGN_64BYTES", - ["800000"] = "IMAGE_SCN_ALIGN_128BYTES", - ["900000"] = "IMAGE_SCN_ALIGN_256BYTES", - ["a00000"] = "IMAGE_SCN_ALIGN_512BYTES", - ["b00000"] = "IMAGE_SCN_ALIGN_1024BYTES", - ["c00000"] = "IMAGE_SCN_ALIGN_2048BYTES", - ["d00000"] = "IMAGE_SCN_ALIGN_4096BYTES", - ["e00000"] = "IMAGE_SCN_ALIGN_8192BYTES", - ["1000000"] = "IMAGE_SCN_LNK_NRELOC_OVFL", - ["2000000"] = "IMAGE_SCN_MEM_DISCARDABLE", - ["4000000"] = "IMAGE_SCN_MEM_NOT_CACHED", - ["8000000"] = "IMAGE_SCN_MEM_NOT_PAGED", - ["10000000"] = "IMAGE_SCN_MEM_SHARED", - ["20000000"] = "IMAGE_SCN_MEM_EXECUTE", - ["40000000"] = "IMAGE_SCN_MEM_READ", - ["80000000"] = "IMAGE_SCN_MEM_WRITE", - }, - }, - -} - - ---- convert integer to HEX representation --- @param IN the number to convert to hex --- @param len the size to return, any result smaller will be prefixed by "0"s --- @return string containing hex representation -function M.toHex(IN, len) - local B,K,OUT,I,D=16,"0123456789abcdef","",0 - while IN>0 do - I=I+1 - IN,D=math.floor(IN/B),math.fmod(IN,B)+1 - OUT=string.sub(K,D,D)..OUT - end - len = len or string.len(OUT) - if len<1 then len = 1 end - return (string.rep("0",len) .. OUT):sub(-len,-1) -end - ---- convert HEX to integer --- @param IN the string to convert to dec --- @return number in dec format -function M.toDec(IN) - assert(type(IN)=="string") - local OUT = 0 - IN = IN:lower() - while #IN > 0 do - local b = string.find("0123456789abcdef",IN:sub(1,1)) - OUT = OUT * 16 + (b-1) - IN = IN:sub(2,-1) - end - return OUT -end - -local function get_int(str) - -- convert a byte-sequence to an integer - assert(str) - local r = 0 - for i = #str, 1, -1 do - r = r*256 + string.byte(str,i,i) - end - return r -end - -local function get_hex(str) - -- convert a byte-sequence to a hex string - assert(str) - local r = "" - for i = #str, 1, -1 do - r = r .. M.toHex(string.byte(str,i,i),2) - end - while (#r > 1) and (r:sub(1,1) == "0") do - r = r:sub(2, -1) - end - return r -end - -local function get_list(list, f, add_to) - -- list: list of tables with 'size' and 'name' and is_str - -- f: file to read from - -- add_to: table to add results to (optional) - local r = add_to or {} - for i, t in ipairs(list) do - assert(r[t.name] == nil, "Value for '"..t.name.."' already set") - local val,err = f:read(t.size) -- read specified size in bytes - val = val or "\0" - if t.is_str then -- entry is marked as a string value, read as such - for i = 1, #val do - if val:sub(i,i) == "\0" then - r[t.name] = val:sub(1,i-1) - break - end - end - r[t.name] = r[t.name] or val - else -- entry not marked, so always read as hex value - r[t.name] = get_hex(val) - end - end - return r -end - ---- Calculates the fileoffset of a given RVA. --- This function is also available as a method on the parsed output table --- @param obj a parsed object (return value from `parse`) --- @param RVA an RVA value to convert to a fileoffset (either number or hex-string) --- @return fileoffset of the given RVA (number) -M.get_fileoffset = function(obj, RVA) - -- given an object with a section table, and an RVA, it returns - -- the fileoffset for the data - if type(RVA)=="string" then RVA = M.toDec(RVA) end - local section - for i, s in ipairs(obj.Sections) do - if M.toDec(s.VirtualAddress) <= RVA and M.toDec(s.VirtualAddress) + M.toDec(s.VirtualSize) >= RVA then - section = s - break - end - end - if not section then return nil, "No match RVA with Section list, RVA out of bounds" end - return RVA - M.toDec(section.VirtualAddress) + M.toDec(section.PointerToRawData) -end - -local function readstring(f) - -- reads a null-terminated string from the current file posistion - local name = "" - while true do - local c = f:read(1) - if c == "\0" then break end - name = name .. c - end - return name -end - ---- Parses a file and extracts the information. --- All numbers are delivered as "string" types containing hex values (to prevent numerical overflows in case of 64bit sizes or bit-fields), see `toHex` and `toDec` conversion functions. --- @return table with data, or nil + error --- @usage local pe = require("pe-parser") --- local obj = pe.parse("c:\lua\lua.exe") --- obj:dump() -M.parse = function(target) - - local list = { -- list of known architectures - [332] = "x86", -- IMAGE_FILE_MACHINE_I386 - [512] = "x86_64", -- IMAGE_FILE_MACHINE_IA64 - [34404] = "x86_64", -- IMAGE_FILE_MACHINE_AMD64 - } - - local f, err = io.open(target, "rb") - if not f then return nil, err end - - local MZ = f:read(2) - if MZ ~= "MZ" then - f:close() - return nil, "Not a valid image" - end - - f:seek("set", 60) -- position of PE header position - local peoffset = get_int(f:read(4)) -- read position of PE header - - f:seek("set", peoffset) -- move to position of PE header - local out = get_list({ - { size = 4, - name = "PEheader", - is_str = true }, - { size = 2, - name = "Machine" }, - { size = 2, - name = "NumberOfSections"}, - { size = 4, - name = "TimeDateStamp" }, - { size = 4, - name = "PointerToSymbolTable"}, - { size = 4, - name = "NumberOfSymbols"}, - { size = 2, - name = "SizeOfOptionalHeader"}, - { size = 2, - name = "Characteristics"}, - }, f) - - if out.PEheader ~= "PE" then - f:close() - return nil, "Invalid PE header" - end - out.PEheader = nil -- remove it, has no value - out.dump = M.dump -- export dump function as a method - - if M.toDec(out.SizeOfOptionalHeader) > 0 then - -- parse optional header; standard - get_list({ - { size = 2, - name = "Magic" }, - { size = 1, - name = "MajorLinkerVersion"}, - { size = 1, - name = "MinorLinkerVersion"}, - { size = 4, - name = "SizeOfCode"}, - { size = 4, - name = "SizeOfInitializedData"}, - { size = 4, - name = "SizeOfUninitializedData"}, - { size = 4, - name = "AddressOfEntryPoint"}, - { size = 4, - name = "BaseOfCode"}, - }, f, out) - local plus = (out.Magic == "20b") - if not plus then -- plain PE32, not PE32+ - get_list({ - { size = 4, - name = "BaseOfData" }, - }, f, out) - end - -- parse optional header; windows-fields - local plussize = 4 - if plus then plussize = 8 end - get_list({ - { size = plussize, - name = "ImageBase"}, - { size = 4, - name = "SectionAlignment"}, - { size = 4, - name = "FileAlignment"}, - { size = 2, - name = "MajorOperatingSystemVersion"}, - { size = 2, - name = "MinorOperatingSystemVersion"}, - { size = 2, - name = "MajorImageVersion"}, - { size = 2, - name = "MinorImageVersion"}, - { size = 2, - name = "MajorSubsystemVersion"}, - { size = 2, - name = "MinorSubsystemVersion"}, - { size = 4, - name = "Win32VersionValue"}, - { size = 4, - name = "SizeOfImage"}, - { size = 4, - name = "SizeOfHeaders"}, - { size = 4, - name = "CheckSum"}, - { size = 2, - name = "Subsystem"}, - { size = 2, - name = "DllCharacteristics"}, - { size = plussize, - name = "SizeOfStackReserve"}, - { size = plussize, - name = "SizeOfStackCommit"}, - { size = plussize, - name = "SizeOfHeapReserve"}, - { size = plussize, - name = "SizeOfHeapCommit"}, - { size = 4, - name = "LoaderFlags"}, - { size = 4, - name = "NumberOfRvaAndSizes"}, - }, f, out) - -- Read data directory entries - for i = 1, M.toDec(out.NumberOfRvaAndSizes) do - out.DataDirectory = out.DataDirectory or {} - out.DataDirectory[i] = get_list({ - { size = 4, - name = "VirtualAddress"}, - { size = 4, - name = "Size"}, - }, f) - end - for i, name in ipairs{"ExportTable", "ImportTable", "ResourceTable", - "ExceptionTable", "CertificateTable", "BaseRelocationTable", - "Debug", "Architecture", "GlobalPtr", "TLSTable", - "LoadConfigTable", "BoundImport", "IAT", - "DelayImportDescriptor", "CLRRuntimeHeader", "Reserved"} do - out.DataDirectory[name] = out.DataDirectory[i] - if out.DataDirectory[name] then out.DataDirectory[name].name = name end - end - end - - -- parse section table - for i = 1, M.toDec(out.NumberOfSections) do - out.Sections = out.Sections or {} - out.Sections[i] = get_list({ - { size = 8, - name = "Name", - is_str = true}, - { size = 4, - name = "VirtualSize"}, - { size = 4, - name = "VirtualAddress"}, - { size = 4, - name = "SizeOfRawData"}, - { size = 4, - name = "PointerToRawData"}, - { size = 4, - name = "PointerToRelocations"}, - { size = 4, - name = "PointerToLinenumbers"}, - { size = 2, - name = "NumberOfRelocations"}, - { size = 2, - name = "NumberOfLinenumbers"}, - { size = 4, - name = "Characteristics"}, - }, f) - end - -- we now have section data, so add RVA convertion method - out.get_fileoffset = M.get_fileoffset - - -- get the import table - f:seek("set", out:get_fileoffset(out.DataDirectory.ImportTable.VirtualAddress)) - local done = false - local cnt = 1 - while not done do - local dll = get_list({ - { size = 4, - name = "ImportLookupTableRVA"}, - { size = 4, - name = "TimeDateStamp"}, - { size = 4, - name = "ForwarderChain"}, - { size = 4, - name = "NameRVA"}, - { size = 4, - name = "ImportAddressTableRVA"}, - }, f) - if M.toDec(dll.NameRVA) == 0 then - -- this is the final NULL entry, so we're done - done = true - else - -- store the import entry - out.DataDirectory.ImportTable[cnt] = dll - cnt = cnt + 1 - end - end - -- resolve imported DLL names - for i, dll in ipairs(out.DataDirectory.ImportTable) do - f:seek("set", out:get_fileoffset(dll.NameRVA)) - dll.Name = readstring(f) - end - - f:close() - return out -end - --- pad a string (prefix) to a specific length -local function pad(str, l, chr) - chr = chr or " " - l = l or 0 - return string.rep(chr,l-#str)..str -end - ---- Dumps the output parsed. --- This function is also available as a method on the parsed output table -M.dump = function(obj) - local l = 0 - for k,v in pairs(obj) do if #k > l then l = #k end end - - for k,v in pairs(obj) do - if (M.const[k] and type(v)=="string") then - -- look up named value - print(k..string.rep(" ", l - #k + 1)..": "..M.const[k][v]) - elseif M.const[k.."_flags"] then - -- flags should be listed - print(k..string.rep(" ", l - #k + 1)..": "..v.." (flag field)") - else - -- regular values - if type(v) == "number" then - print(k..string.rep(" ", l - #k + 1)..": "..v.." (dec)") - else - if (type(v)=="string") and (k ~= "DataDirectory") and (k ~= "Sections") then - print(k..string.rep(" ", l - #k + 1)..": "..v) - end - end - end - end - - if obj.DataDirectory then - print("DataDirectory (RVA, size):") - for i, v in ipairs(obj.DataDirectory) do - print(" Entry "..M.toHex(i-1).." "..pad(v.VirtualAddress,8,"0").." "..pad(v.Size,8,"0").." "..v.name) - end - end - - if obj.Sections then - print("Sections:") - print("idx name RVA VSize Offset RawSize") - for i, v in ipairs(obj.Sections) do - print(" "..i.." "..v.Name.. string.rep(" ",9-#v.Name)..pad(v.VirtualAddress,8,"0").." "..pad(v.VirtualSize,8,"0").." "..pad(v.PointerToRawData,8,"0").." "..pad(v.SizeOfRawData,8,"0")) - end - end - - print("Imports:") - for i, dll in ipairs(obj.DataDirectory.ImportTable) do - print(" "..dll.Name) - end -end - ---- Checks the msvcrt dll the binary was linked against. --- Mixing and matching dlls only works when they all are using the same runtime, if --- not unexpected errors will probably occur. --- Checks the binary provided and then traverses all imported dlls to find the msvcrt --- used (it will only look for the dlls in the same directory). --- @param infile binary file to check --- @return msvcrt name (uppercase, without extension) + file where the reference was found, or nil + error -function M.msvcrt(infile) - local path, file = infile:match("(.+)\\(.+)$") - if not path then - path = "" - file = infile - else - path=path .. "\\" - end - local obj, err = M.parse(path..file) - if not obj then return obj, err end - - for i, dll in ipairs(obj.DataDirectory.ImportTable) do - dll = dll.Name:upper() - local result = dll:match('(MSVCR%d*D?)%.DLL') - if not result then - result = dll:match('(MSVCRTD?)%.DLL') - end - if not result then - result = dll:match('(VCRUNTIME%d*D?)%.DLL') - end - -- success, found it return name + binary where it was found - if result then return result, infile end - end - - -- not found, so traverse all imported dll's - for i, dll in ipairs(obj.DataDirectory.ImportTable) do - local rt, ref = M.msvcrt(path..dll.Name) - if rt then - return rt, ref -- found it - end - end - - return nil, "No msvcrt found" -end - -function M.get_architecture(program) - -- detect processor arch interpreter was compiled for - local proc = (M.parse(program) or {}).Machine - if not proc then - return nil, "Could not detect processor architecture used in "..program - end - proc = M.const.Machine[proc] -- collect name from constant value - if proc == "IMAGE_FILE_MACHINE_I386" then - proc = "x86" - else - proc = "x86_64" - end - return proc -end - -return M diff --git a/win32/pe-parser.lua b/win32/pe-parser.lua new file mode 100644 index 00000000..34556812 --- /dev/null +++ b/win32/pe-parser.lua @@ -0,0 +1,568 @@ +--------------------------------------------------------------------------------------- +-- Lua module to parse a Portable Executable (.exe , .dll, etc.) file and extract metadata. +-- +-- NOTE: numerical information is extracted as strings (hex) to prevent numerical overflows in +-- case of 64 bit fields (bit/flag fields). Pointer arithmetic is still done numerically, so for +-- very large files this could lead to undefined results. Use with care! +-- +-- Version 0.4, [copyright (c) 2013-2015 Thijs Schreijer](http://www.thijsschreijer.nl) +-- @name pe-parser +-- @class module + +local M = {} + +--- Table with named constants/flag-constants. +-- Named elements can be looked up by their name in the `const` table. The sub tables are index by value. +-- For flag fields the name is extended with `_flags`. +-- @usage -- lookup descriptive name for the myobj.Magic value +-- local desc = pe.const.Magic(myobj.Magic) +-- +-- -- get list of flag names, indexed by flag values, for the Characteristics field +-- local flag_list = pe.const.Characteristics_flags +M.const = { + Magic = { + ["10b"] = "PE32", + ["20b"] = "PE32+", + }, + Machine = { + ["0"] = "IMAGE_FILE_MACHINE_UNKNOWN", + ["1d3"] = "IMAGE_FILE_MACHINE_AM33", + ["8664"] = "IMAGE_FILE_MACHINE_AMD64", + ["1c0"] = "IMAGE_FILE_MACHINE_ARM", + ["1c4"] = "IMAGE_FILE_MACHINE_ARMNT", + ["aa64"] = "IMAGE_FILE_MACHINE_ARM64", + ["ebc"] = "IMAGE_FILE_MACHINE_EBC", + ["14c"] = "IMAGE_FILE_MACHINE_I386", + ["200"] = "IMAGE_FILE_MACHINE_IA64", + ["9041"] = "IMAGE_FILE_MACHINE_M32R", + ["266"] = "IMAGE_FILE_MACHINE_MIPS16", + ["366"] = "IMAGE_FILE_MACHINE_MIPSFPU", + ["466"] = "IMAGE_FILE_MACHINE_MIPSFPU16", + ["1f0"] = "IMAGE_FILE_MACHINE_POWERPC", + ["1f1"] = "IMAGE_FILE_MACHINE_POWERPCFP", + ["166"] = "IMAGE_FILE_MACHINE_R4000", + ["1a2"] = "IMAGE_FILE_MACHINE_SH3", + ["1a3"] = "IMAGE_FILE_MACHINE_SH3DSP", + ["1a6"] = "IMAGE_FILE_MACHINE_SH4", + ["1a8"] = "IMAGE_FILE_MACHINE_SH5", + ["1c2"] = "IMAGE_FILE_MACHINE_THUMB", + ["169"] = "IMAGE_FILE_MACHINE_WCEMIPSV2", + }, + Characteristics_flags = { + ["1"] = "IMAGE_FILE_RELOCS_STRIPPED", + ["2"] = "IMAGE_FILE_EXECUTABLE_IMAGE", + ["4"] = "IMAGE_FILE_LINE_NUMS_STRIPPED", + ["8"] = "IMAGE_FILE_LOCAL_SYMS_STRIPPED", + ["10"] = "IMAGE_FILE_AGGRESSIVE_WS_TRIM", + ["20"] = "IMAGE_FILE_LARGE_ADDRESS_AWARE", + ["40"] = "Reserved for future use", + ["80"] = "IMAGE_FILE_BYTES_REVERSED_LO", + ["100"] = "IMAGE_FILE_32BIT_MACHINE", + ["200"] = "IMAGE_FILE_DEBUG_STRIPPED", + ["400"] = "IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP", + ["800"] = "IMAGE_FILE_NET_RUN_FROM_SWAP", + ["1000"] = "IMAGE_FILE_SYSTEM", + ["2000"] = "IMAGE_FILE_DLL", + ["4000"] = "IMAGE_FILE_UP_SYSTEM_ONLY", + ["8000"] = "IMAGE_FILE_BYTES_REVERSED_HI", + }, + Subsystem = { + ["0"] = "IMAGE_SUBSYSTEM_UNKNOWN", + ["1"] = "IMAGE_SUBSYSTEM_NATIVE", + ["2"] = "IMAGE_SUBSYSTEM_WINDOWS_GUI", + ["3"] = "IMAGE_SUBSYSTEM_WINDOWS_CUI", + ["7"] = "IMAGE_SUBSYSTEM_POSIX_CUI", + ["9"] = "IMAGE_SUBSYSTEM_WINDOWS_CE_GUI", + ["a"] = "IMAGE_SUBSYSTEM_EFI_APPLICATION", + ["b"] = "IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER", + ["c"] = "IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER", + ["d"] = "IMAGE_SUBSYSTEM_EFI_ROM", + ["e"] = "IMAGE_SUBSYSTEM_XBOX", + }, + DllCharacteristics_flags = { + ["40"] = "IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE", + ["80"] = "IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY", + ["100"] = "IMAGE_DLL_CHARACTERISTICS_NX_COMPAT", + ["200"] = "IMAGE_DLLCHARACTERISTICS_NO_ISOLATION", + ["400"] = "IMAGE_DLLCHARACTERISTICS_NO_SEH", + ["800"] = "IMAGE_DLLCHARACTERISTICS_NO_BIND", + ["2000"] = "IMAGE_DLLCHARACTERISTICS_WDM_DRIVER", + ["8000"] = "IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE", + }, + Sections = { + Characteristics_flags = { + ["8"] = "IMAGE_SCN_TYPE_NO_PAD", + ["20"] = "IMAGE_SCN_CNT_CODE", + ["40"] = "IMAGE_SCN_CNT_INITIALIZED_DATA", + ["80"] = "IMAGE_SCN_CNT_UNINITIALIZED_ DATA", + ["100"] = "IMAGE_SCN_LNK_OTHER", + ["200"] = "IMAGE_SCN_LNK_INFO", + ["800"] = "IMAGE_SCN_LNK_REMOVE", + ["1000"] = "IMAGE_SCN_LNK_COMDAT", + ["8000"] = "IMAGE_SCN_GPREL", + ["20000"] = "IMAGE_SCN_MEM_PURGEABLE", + ["20000"] = "IMAGE_SCN_MEM_16BIT", + ["40000"] = "IMAGE_SCN_MEM_LOCKED", + ["80000"] = "IMAGE_SCN_MEM_PRELOAD", + ["100000"] = "IMAGE_SCN_ALIGN_1BYTES", + ["200000"] = "IMAGE_SCN_ALIGN_2BYTES", + ["300000"] = "IMAGE_SCN_ALIGN_4BYTES", + ["400000"] = "IMAGE_SCN_ALIGN_8BYTES", + ["500000"] = "IMAGE_SCN_ALIGN_16BYTES", + ["600000"] = "IMAGE_SCN_ALIGN_32BYTES", + ["700000"] = "IMAGE_SCN_ALIGN_64BYTES", + ["800000"] = "IMAGE_SCN_ALIGN_128BYTES", + ["900000"] = "IMAGE_SCN_ALIGN_256BYTES", + ["a00000"] = "IMAGE_SCN_ALIGN_512BYTES", + ["b00000"] = "IMAGE_SCN_ALIGN_1024BYTES", + ["c00000"] = "IMAGE_SCN_ALIGN_2048BYTES", + ["d00000"] = "IMAGE_SCN_ALIGN_4096BYTES", + ["e00000"] = "IMAGE_SCN_ALIGN_8192BYTES", + ["1000000"] = "IMAGE_SCN_LNK_NRELOC_OVFL", + ["2000000"] = "IMAGE_SCN_MEM_DISCARDABLE", + ["4000000"] = "IMAGE_SCN_MEM_NOT_CACHED", + ["8000000"] = "IMAGE_SCN_MEM_NOT_PAGED", + ["10000000"] = "IMAGE_SCN_MEM_SHARED", + ["20000000"] = "IMAGE_SCN_MEM_EXECUTE", + ["40000000"] = "IMAGE_SCN_MEM_READ", + ["80000000"] = "IMAGE_SCN_MEM_WRITE", + }, + }, + +} + + +--- convert integer to HEX representation +-- @param IN the number to convert to hex +-- @param len the size to return, any result smaller will be prefixed by "0"s +-- @return string containing hex representation +function M.toHex(IN, len) + local B,K,OUT,I,D=16,"0123456789abcdef","",0 + while IN>0 do + I=I+1 + IN,D=math.floor(IN/B),math.fmod(IN,B)+1 + OUT=string.sub(K,D,D)..OUT + end + len = len or string.len(OUT) + if len<1 then len = 1 end + return (string.rep("0",len) .. OUT):sub(-len,-1) +end + +--- convert HEX to integer +-- @param IN the string to convert to dec +-- @return number in dec format +function M.toDec(IN) + assert(type(IN)=="string") + local OUT = 0 + IN = IN:lower() + while #IN > 0 do + local b = string.find("0123456789abcdef",IN:sub(1,1)) + OUT = OUT * 16 + (b-1) + IN = IN:sub(2,-1) + end + return OUT +end + +local function get_int(str) + -- convert a byte-sequence to an integer + assert(str) + local r = 0 + for i = #str, 1, -1 do + r = r*256 + string.byte(str,i,i) + end + return r +end + +local function get_hex(str) + -- convert a byte-sequence to a hex string + assert(str) + local r = "" + for i = #str, 1, -1 do + r = r .. M.toHex(string.byte(str,i,i),2) + end + while (#r > 1) and (r:sub(1,1) == "0") do + r = r:sub(2, -1) + end + return r +end + +local function get_list(list, f, add_to) + -- list: list of tables with 'size' and 'name' and is_str + -- f: file to read from + -- add_to: table to add results to (optional) + local r = add_to or {} + for i, t in ipairs(list) do + assert(r[t.name] == nil, "Value for '"..t.name.."' already set") + local val,err = f:read(t.size) -- read specified size in bytes + val = val or "\0" + if t.is_str then -- entry is marked as a string value, read as such + for i = 1, #val do + if val:sub(i,i) == "\0" then + r[t.name] = val:sub(1,i-1) + break + end + end + r[t.name] = r[t.name] or val + else -- entry not marked, so always read as hex value + r[t.name] = get_hex(val) + end + end + return r +end + +--- Calculates the fileoffset of a given RVA. +-- This function is also available as a method on the parsed output table +-- @param obj a parsed object (return value from `parse`) +-- @param RVA an RVA value to convert to a fileoffset (either number or hex-string) +-- @return fileoffset of the given RVA (number) +M.get_fileoffset = function(obj, RVA) + -- given an object with a section table, and an RVA, it returns + -- the fileoffset for the data + if type(RVA)=="string" then RVA = M.toDec(RVA) end + local section + for i, s in ipairs(obj.Sections) do + if M.toDec(s.VirtualAddress) <= RVA and M.toDec(s.VirtualAddress) + M.toDec(s.VirtualSize) >= RVA then + section = s + break + end + end + if not section then return nil, "No match RVA with Section list, RVA out of bounds" end + return RVA - M.toDec(section.VirtualAddress) + M.toDec(section.PointerToRawData) +end + +local function readstring(f) + -- reads a null-terminated string from the current file posistion + local name = "" + while true do + local c = f:read(1) + if c == "\0" then break end + name = name .. c + end + return name +end + +--- Parses a file and extracts the information. +-- All numbers are delivered as "string" types containing hex values (to prevent numerical overflows in case of 64bit sizes or bit-fields), see `toHex` and `toDec` conversion functions. +-- @return table with data, or nil + error +-- @usage local pe = require("pe-parser") +-- local obj = pe.parse("c:\lua\lua.exe") +-- obj:dump() +M.parse = function(target) + + local list = { -- list of known architectures + [332] = "x86", -- IMAGE_FILE_MACHINE_I386 + [512] = "x86_64", -- IMAGE_FILE_MACHINE_IA64 + [34404] = "x86_64", -- IMAGE_FILE_MACHINE_AMD64 + } + + local f, err = io.open(target, "rb") + if not f then return nil, err end + + local MZ = f:read(2) + if MZ ~= "MZ" then + f:close() + return nil, "Not a valid image" + end + + f:seek("set", 60) -- position of PE header position + local peoffset = get_int(f:read(4)) -- read position of PE header + + f:seek("set", peoffset) -- move to position of PE header + local out = get_list({ + { size = 4, + name = "PEheader", + is_str = true }, + { size = 2, + name = "Machine" }, + { size = 2, + name = "NumberOfSections"}, + { size = 4, + name = "TimeDateStamp" }, + { size = 4, + name = "PointerToSymbolTable"}, + { size = 4, + name = "NumberOfSymbols"}, + { size = 2, + name = "SizeOfOptionalHeader"}, + { size = 2, + name = "Characteristics"}, + }, f) + + if out.PEheader ~= "PE" then + f:close() + return nil, "Invalid PE header" + end + out.PEheader = nil -- remove it, has no value + out.dump = M.dump -- export dump function as a method + + if M.toDec(out.SizeOfOptionalHeader) > 0 then + -- parse optional header; standard + get_list({ + { size = 2, + name = "Magic" }, + { size = 1, + name = "MajorLinkerVersion"}, + { size = 1, + name = "MinorLinkerVersion"}, + { size = 4, + name = "SizeOfCode"}, + { size = 4, + name = "SizeOfInitializedData"}, + { size = 4, + name = "SizeOfUninitializedData"}, + { size = 4, + name = "AddressOfEntryPoint"}, + { size = 4, + name = "BaseOfCode"}, + }, f, out) + local plus = (out.Magic == "20b") + if not plus then -- plain PE32, not PE32+ + get_list({ + { size = 4, + name = "BaseOfData" }, + }, f, out) + end + -- parse optional header; windows-fields + local plussize = 4 + if plus then plussize = 8 end + get_list({ + { size = plussize, + name = "ImageBase"}, + { size = 4, + name = "SectionAlignment"}, + { size = 4, + name = "FileAlignment"}, + { size = 2, + name = "MajorOperatingSystemVersion"}, + { size = 2, + name = "MinorOperatingSystemVersion"}, + { size = 2, + name = "MajorImageVersion"}, + { size = 2, + name = "MinorImageVersion"}, + { size = 2, + name = "MajorSubsystemVersion"}, + { size = 2, + name = "MinorSubsystemVersion"}, + { size = 4, + name = "Win32VersionValue"}, + { size = 4, + name = "SizeOfImage"}, + { size = 4, + name = "SizeOfHeaders"}, + { size = 4, + name = "CheckSum"}, + { size = 2, + name = "Subsystem"}, + { size = 2, + name = "DllCharacteristics"}, + { size = plussize, + name = "SizeOfStackReserve"}, + { size = plussize, + name = "SizeOfStackCommit"}, + { size = plussize, + name = "SizeOfHeapReserve"}, + { size = plussize, + name = "SizeOfHeapCommit"}, + { size = 4, + name = "LoaderFlags"}, + { size = 4, + name = "NumberOfRvaAndSizes"}, + }, f, out) + -- Read data directory entries + for i = 1, M.toDec(out.NumberOfRvaAndSizes) do + out.DataDirectory = out.DataDirectory or {} + out.DataDirectory[i] = get_list({ + { size = 4, + name = "VirtualAddress"}, + { size = 4, + name = "Size"}, + }, f) + end + for i, name in ipairs{"ExportTable", "ImportTable", "ResourceTable", + "ExceptionTable", "CertificateTable", "BaseRelocationTable", + "Debug", "Architecture", "GlobalPtr", "TLSTable", + "LoadConfigTable", "BoundImport", "IAT", + "DelayImportDescriptor", "CLRRuntimeHeader", "Reserved"} do + out.DataDirectory[name] = out.DataDirectory[i] + if out.DataDirectory[name] then out.DataDirectory[name].name = name end + end + end + + -- parse section table + for i = 1, M.toDec(out.NumberOfSections) do + out.Sections = out.Sections or {} + out.Sections[i] = get_list({ + { size = 8, + name = "Name", + is_str = true}, + { size = 4, + name = "VirtualSize"}, + { size = 4, + name = "VirtualAddress"}, + { size = 4, + name = "SizeOfRawData"}, + { size = 4, + name = "PointerToRawData"}, + { size = 4, + name = "PointerToRelocations"}, + { size = 4, + name = "PointerToLinenumbers"}, + { size = 2, + name = "NumberOfRelocations"}, + { size = 2, + name = "NumberOfLinenumbers"}, + { size = 4, + name = "Characteristics"}, + }, f) + end + -- we now have section data, so add RVA convertion method + out.get_fileoffset = M.get_fileoffset + + -- get the import table + f:seek("set", out:get_fileoffset(out.DataDirectory.ImportTable.VirtualAddress)) + local done = false + local cnt = 1 + while not done do + local dll = get_list({ + { size = 4, + name = "ImportLookupTableRVA"}, + { size = 4, + name = "TimeDateStamp"}, + { size = 4, + name = "ForwarderChain"}, + { size = 4, + name = "NameRVA"}, + { size = 4, + name = "ImportAddressTableRVA"}, + }, f) + if M.toDec(dll.NameRVA) == 0 then + -- this is the final NULL entry, so we're done + done = true + else + -- store the import entry + out.DataDirectory.ImportTable[cnt] = dll + cnt = cnt + 1 + end + end + -- resolve imported DLL names + for i, dll in ipairs(out.DataDirectory.ImportTable) do + f:seek("set", out:get_fileoffset(dll.NameRVA)) + dll.Name = readstring(f) + end + + f:close() + return out +end + +-- pad a string (prefix) to a specific length +local function pad(str, l, chr) + chr = chr or " " + l = l or 0 + return string.rep(chr,l-#str)..str +end + +--- Dumps the output parsed. +-- This function is also available as a method on the parsed output table +M.dump = function(obj) + local l = 0 + for k,v in pairs(obj) do if #k > l then l = #k end end + + for k,v in pairs(obj) do + if (M.const[k] and type(v)=="string") then + -- look up named value + print(k..string.rep(" ", l - #k + 1)..": "..M.const[k][v]) + elseif M.const[k.."_flags"] then + -- flags should be listed + print(k..string.rep(" ", l - #k + 1)..": "..v.." (flag field)") + else + -- regular values + if type(v) == "number" then + print(k..string.rep(" ", l - #k + 1)..": "..v.." (dec)") + else + if (type(v)=="string") and (k ~= "DataDirectory") and (k ~= "Sections") then + print(k..string.rep(" ", l - #k + 1)..": "..v) + end + end + end + end + + if obj.DataDirectory then + print("DataDirectory (RVA, size):") + for i, v in ipairs(obj.DataDirectory) do + print(" Entry "..M.toHex(i-1).." "..pad(v.VirtualAddress,8,"0").." "..pad(v.Size,8,"0").." "..v.name) + end + end + + if obj.Sections then + print("Sections:") + print("idx name RVA VSize Offset RawSize") + for i, v in ipairs(obj.Sections) do + print(" "..i.." "..v.Name.. string.rep(" ",9-#v.Name)..pad(v.VirtualAddress,8,"0").." "..pad(v.VirtualSize,8,"0").." "..pad(v.PointerToRawData,8,"0").." "..pad(v.SizeOfRawData,8,"0")) + end + end + + print("Imports:") + for i, dll in ipairs(obj.DataDirectory.ImportTable) do + print(" "..dll.Name) + end +end + +--- Checks the msvcrt dll the binary was linked against. +-- Mixing and matching dlls only works when they all are using the same runtime, if +-- not unexpected errors will probably occur. +-- Checks the binary provided and then traverses all imported dlls to find the msvcrt +-- used (it will only look for the dlls in the same directory). +-- @param infile binary file to check +-- @return msvcrt name (uppercase, without extension) + file where the reference was found, or nil + error +function M.msvcrt(infile) + local path, file = infile:match("(.+)\\(.+)$") + if not path then + path = "" + file = infile + else + path=path .. "\\" + end + local obj, err = M.parse(path..file) + if not obj then return obj, err end + + for i, dll in ipairs(obj.DataDirectory.ImportTable) do + dll = dll.Name:upper() + local result = dll:match('(MSVCR%d*D?)%.DLL') + if not result then + result = dll:match('(MSVCRTD?)%.DLL') + end + if not result then + result = dll:match('(VCRUNTIME%d*D?)%.DLL') + end + -- success, found it return name + binary where it was found + if result then return result, infile end + end + + -- not found, so traverse all imported dll's + for i, dll in ipairs(obj.DataDirectory.ImportTable) do + local rt, ref = M.msvcrt(path..dll.Name) + if rt then + return rt, ref -- found it + end + end + + return nil, "No msvcrt found" +end + +function M.get_architecture(program) + -- detect processor arch interpreter was compiled for + local proc = (M.parse(program) or {}).Machine + if not proc then + return nil, "Could not detect processor architecture used in "..program + end + proc = M.const.Machine[proc] -- collect name from constant value + if proc == "IMAGE_FILE_MACHINE_I386" then + proc = "x86" + else + proc = "x86_64" + end + return proc +end + +return M -- cgit v1.2.3-55-g6feb