"""
Basic MS COFF object loader based on https://docs.microsoft.com/en-us/windows/win32/debug/pe-format
"""
import ctypes
import logging
import struct
from enum import IntEnum, IntFlag
import archinfo
from .backend import Backend, register_backend
from .region import Section, Segment
from .relocation import Relocation
from .symbol import Symbol, SymbolType
log = logging.getLogger(__name__)
[docs]
class IMAGE_FILE_MACHINE(IntEnum):
"""
Machine Types
"""
I386 = 0x14C
AMD64 = 0x8664
[docs]
class IMAGE_SCN(IntFlag):
"""
Section Flags (Characteristics field)
"""
MEM_EXECUTE = 0x20000000
MEM_READ = 0x40000000
MEM_WRITE = 0x80000000
CNT_UNINITIALIZED_DATA = 0x00000080
[docs]
class CoffSectionTableEntry(ctypes.LittleEndianStructure):
"""
COFF Section Header
"""
_pack_ = 1
_fields_ = [
("Name", ctypes.c_uint8 * 8),
("VirtualSize", ctypes.c_uint32),
("VirtualAddress", ctypes.c_uint32),
("SizeOfRawData", ctypes.c_uint32),
("PointerToRawData", ctypes.c_uint32),
("PointerToRelocations", ctypes.c_uint32),
("PointerToLinenumbers", ctypes.c_uint32),
("NumberOfRelocations", ctypes.c_uint16),
("NumberOfLinenumbers", ctypes.c_uint16),
("Characteristics", ctypes.c_uint32),
]
[docs]
class IMAGE_SYM_CLASS(IntEnum):
"""
Symbol Storage Class
"""
EXTERNAL = 2
STATIC = 3
LABEL = 6
FUNCTION = 101
[docs]
class CoffSymbolTableEntry(ctypes.LittleEndianStructure):
"""
COFF Symbol Table Entry
"""
_pack_ = 1
_fields_ = [
("Name", ctypes.c_uint8 * 8),
("Value", ctypes.c_uint32),
("SectionNumber", ctypes.c_int16),
("Type", ctypes.c_uint16),
("StorageClass", ctypes.c_uint8),
("NumberOfAuxSymbols", ctypes.c_uint8),
]
[docs]
class IMAGE_REL_I386(IntEnum):
"""
i386 Relocation Types
"""
DIR32 = 0x0006
DIR32NB = 0x0007
REL32 = 0x0014
SECTION = 0x000A
SECREL = 0x000B
[docs]
class IMAGE_REL_AMD64(IntEnum):
"""
AMD64 Relocation Types
"""
ADDR64 = 0x0001
ADDR32NB = 0x0003
REL32 = 0x0004
SECTION = 0x000A
SECREL = 0x000B
[docs]
class CoffRelocationTableEntry(ctypes.LittleEndianStructure):
"""
COFF Relocations
"""
_pack_ = 1
_fields_ = [
("VirtualAddress", ctypes.c_uint32),
("SymbolTableIndex", ctypes.c_uint32),
("Type", ctypes.c_uint16),
]
[docs]
class CoffParser:
"""
Parses COFF object files.
"""
data: bytes
header: CoffFileHeader
sections: list[CoffSectionTableEntry]
relocations: list[list[CoffRelocationTableEntry]]
symbols: list[CoffSymbolTableEntry]
# Note: Symbols are uniquely identified by their index. It is possible for multiple symbols to have the same name so
# in idx_to_symbol_name and symbol_name_to_idx, numeric suffixes are appended when necessary. To get the true name
# of a symbol at index `symbol_idx`, call get_symbol_name(symbol_idx, true_name=True).
idx_to_symbol_name: dict[int, str]
symbol_name_to_idx: dict[str, int]
[docs]
def __init__(self, data: bytes):
if data.startswith(b"\x00\x00\xff\xff"):
raise ValueError(
"This object file appears to have been compiled with whole program optimization (/GL flag)"
" and cannot be parsed by this library"
)
self.data: bytes = data
self._parse()
def _parse(self) -> None:
self.header = CoffFileHeader.from_buffer_copy(self.data)
if self.header.Machine not in {
IMAGE_FILE_MACHINE.I386,
IMAGE_FILE_MACHINE.AMD64,
}:
raise NotImplementedError("Unsupported machine type")
strings_offset = (
self.header.PointerToSymbolTable + ctypes.sizeof(CoffSymbolTableEntry) * self.header.NumberOfSymbols
)
strings_size = struct.unpack("<I", self.data[strings_offset : strings_offset + 4])[0]
self.strings: bytes = self.data[strings_offset : strings_offset + strings_size]
self.symbols = []
self.symbol_name_to_idx = {}
self.idx_to_symbol_name = {}
offset = self.header.PointerToSymbolTable
aux = 0
for i in range(self.header.NumberOfSymbols):
symbol = CoffSymbolTableEntry.from_buffer_copy(self.data, offset)
offset += ctypes.sizeof(CoffSymbolTableEntry)
self.symbols.append(symbol)
if aux:
aux -= 1
continue
idx = len(self.symbols) - 1
name = self.get_symbol_name(idx)
aux = symbol.NumberOfAuxSymbols
# Ensure unique symbol names
i = 1
base_name = name
while name in self.symbol_name_to_idx:
name = base_name + f"__{i}"
i += 1
self.symbol_name_to_idx[name] = idx
self.idx_to_symbol_name[idx] = name
self.sections = []
self.relocations = []
for i in range(self.header.NumberOfSections):
offset = ctypes.sizeof(self.header) + ctypes.sizeof(CoffSectionTableEntry) * i
section = CoffSectionTableEntry.from_buffer_copy(self.data, offset)
self.sections.append(section)
# Relocations
relocs = []
offset = section.PointerToRelocations
for i in range(section.NumberOfRelocations):
reloc = CoffRelocationTableEntry.from_buffer_copy(self.data, offset)
relocs.append(reloc)
offset += ctypes.sizeof(reloc)
self.relocations.append(relocs)
@staticmethod
def _decode_cstring(data: bytes, offset: int, encoding: str | None = None) -> str:
name = bytearray()
while True:
x = data[offset]
if x == 0:
break
name.append(x)
offset += 1
return str(name, encoding=(encoding or "ascii"))
[docs]
def get_symbol_name(self, symbol_idx: int, true_name: bool = False) -> str:
if symbol_idx in self.idx_to_symbol_name and not true_name:
return self.idx_to_symbol_name[symbol_idx]
name_encoded = bytes(self.symbols[symbol_idx].Name)
if name_encoded[0:4] == b"\x00\x00\x00\x00":
offset = struct.unpack("<II", name_encoded)[1]
return self._decode_cstring(self.strings, offset)
return name_encoded.rstrip(b"\x00").decode("ascii")
[docs]
def get_section_name(self, section_idx: int) -> str:
name = bytes(self.sections[section_idx].Name).rstrip(b"\x00").decode("ascii")
if name.startswith("/"):
return self.get_symbol_name(int(name[1:]))
return name
[docs]
class CoffSection(Section):
"""
Section of the COFF object.
"""
[docs]
def __init__(
self,
name: str,
file_offset: int,
file_size: int,
virtual_addr: int,
virtual_size: int,
coff_sec: CoffSectionTableEntry,
):
super().__init__(name, file_offset, virtual_addr, virtual_size)
self.filesize = file_size
self._coff_sec = coff_sec
@property
def is_readable(self):
return (self._coff_sec.Characteristics & IMAGE_SCN.MEM_READ) != 0
@property
def is_writable(self):
return (self._coff_sec.Characteristics & IMAGE_SCN.MEM_WRITE) != 0
@property
def is_executable(self):
return (self._coff_sec.Characteristics & IMAGE_SCN.MEM_EXECUTE) != 0
@property
def only_contains_uninitialized_data(self):
return (self._coff_sec.Characteristics & IMAGE_SCN.CNT_UNINITIALIZED_DATA) != 0
[docs]
class CoffRelocation(Relocation):
"""
Relocation for a COFF object.
"""
[docs]
def relocate(self):
value = self.value
if value is None:
log.debug("Unresolved relocation with no symbol.")
return
self.owner.memory.store(self.relative_addr, value)
[docs]
class CoffRelocationREL32(CoffRelocation):
"""
Relocation for IMAGE_REL_*_REL32
"""
@property
def value(self):
org_bytes = self.owner.memory.load(self.relative_addr, 4)
org_value = struct.unpack("<I", org_bytes)[0]
return struct.pack("<i", org_value + self.resolvedby.rebased_addr - (self.rebased_addr + 4))
[docs]
class CoffRelocationDIR32(CoffRelocation):
"""
Relocation for IMAGE_REL_*_DIR32
"""
@property
def value(self):
org_bytes = self.owner.memory.load(self.relative_addr, 4)
org_value = struct.unpack("<I", org_bytes)[0]
return struct.pack("<i", org_value + self.resolvedby.rebased_addr)
[docs]
class CoffRelocationDIR32NB(CoffRelocation):
"""
Relocation for IMAGE_REL_*_DIR32
"""
@property
def value(self):
org_bytes = self.owner.memory.load(self.relative_addr, 4)
org_value = struct.unpack("<I", org_bytes)[0]
return struct.pack("<i", org_value + self.resolvedby.relative_addr)
[docs]
class CoffRelocationADDR32NB(CoffRelocation):
"""
Relocation for IMAGE_REL_AMD64_ADDR32NB
"""
@property
def value(self):
return struct.pack("<I", self.resolvedby.relative_addr)
[docs]
class CoffRelocationADDR64(CoffRelocation):
"""
Relocation for IMAGE_REL_AMD64_ADDR64
"""
@property
def value(self):
return struct.pack("<Q", self.resolvedby.rebased_addr)
[docs]
class CoffRelocationSECTION(CoffRelocation):
"""
Relocation for IMAGE_REL_*_SECTION
"""
@property
def value(self):
assert isinstance(self.owner, Coff)
section_idx = 0 # FIXME
return struct.pack("<H", section_idx)
[docs]
class CoffRelocationSECREL(CoffRelocation):
"""
Relocation for IMAGE_REL_*_SECREL
"""
@property
def value(self):
assert isinstance(self.owner, Coff)
offset_to_symbol = 0 # FIXME
return struct.pack("<I", offset_to_symbol)
RELOC_CLASSES: dict[IntEnum, dict[IntEnum, type[Relocation]]] = {
IMAGE_FILE_MACHINE.I386: {
IMAGE_REL_I386.REL32: CoffRelocationREL32,
IMAGE_REL_I386.DIR32: CoffRelocationDIR32,
IMAGE_REL_I386.DIR32NB: CoffRelocationDIR32NB,
IMAGE_REL_I386.SECTION: CoffRelocationSECTION,
IMAGE_REL_I386.SECREL: CoffRelocationSECREL,
},
IMAGE_FILE_MACHINE.AMD64: {
IMAGE_REL_AMD64.ADDR64: CoffRelocationADDR64,
IMAGE_REL_AMD64.ADDR32NB: CoffRelocationADDR32NB,
IMAGE_REL_AMD64.REL32: CoffRelocationREL32,
IMAGE_REL_AMD64.SECTION: CoffRelocationSECTION,
IMAGE_REL_AMD64.SECREL: CoffRelocationSECREL,
},
}
COFF_MACHINE_TO_ARCH_NAME = {
IMAGE_FILE_MACHINE.I386: "x86",
IMAGE_FILE_MACHINE.AMD64: "AMD64",
}
[docs]
class Coff(Backend):
"""
COFF object loader.
"""
is_default = True
[docs]
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.binary is None:
self._data = self._binary_stream.read()
else:
with open(self.binary, "rb") as f:
self._data = f.read()
self._coff = CoffParser(self._data)
arch = archinfo.arch_from_id(COFF_MACHINE_TO_ARCH_NAME[self._coff.header.Machine])
self.set_arch(arch)
# FIXME: Currently we just map the whole object file for convenience. Create a better memory map, discard object
# file structure data.
self._image_vmem = self._data
# Add each section
for section_idx, section in enumerate(self._coff.sections):
section_name = self._coff.get_section_name(section_idx)
vaddr = section.PointerToRawData
vsize = section.SizeOfRawData
self.segments.append(Segment(section.PointerToRawData, vaddr, section.SizeOfRawData, vsize))
self.sections.append(
CoffSection(
section_name,
section.PointerToRawData,
section.SizeOfRawData,
vaddr,
vsize,
section,
)
)
self.memory.add_backer(0, bytes(self._image_vmem))
self.mapped_base = self.linked_base = 0
self.pic = True
# assume windows, this can be wrong, but is more often right.
self.os = "windows"
self._add_defined_symbols()
self._add_relocs()
# FIXME: Expose __imp_* symbols through self.imports
def _add_defined_symbols(self) -> None:
for sym_name, sym_idx in self._coff.symbol_name_to_idx.items():
sym = self._coff.symbols[sym_idx]
if sym.SectionNumber > 0 and sym.StorageClass in {
IMAGE_SYM_CLASS.STATIC,
IMAGE_SYM_CLASS.LABEL,
IMAGE_SYM_CLASS.EXTERNAL,
}:
self.symbols.add(self.get_symbol(sym_name))
def _add_relocs(self) -> None:
for section_idx, section in enumerate(self._coff.sections):
for reloc in self._coff.relocations[section_idx]:
sym = self._coff.symbols[reloc.SymbolTableIndex]
sym_name = self._coff.get_symbol_name(reloc.SymbolTableIndex)
patch_offset = section.PointerToRawData + reloc.VirtualAddress
if sym.StorageClass in {
IMAGE_SYM_CLASS.STATIC,
IMAGE_SYM_CLASS.LABEL,
IMAGE_SYM_CLASS.EXTERNAL,
}:
reloc_class = RELOC_CLASSES[self._coff.header.Machine].get(reloc.Type, None)
if reloc_class is not None:
cle_symbol = self.get_symbol(sym_name, produce_extern_symbols=True)
self.relocs.append(reloc_class(self, cle_symbol, patch_offset))
continue
log.warning("Skipped relocation type %#x at %#x for symbol %s", reloc.Type, patch_offset, sym_name)
[docs]
@staticmethod
def is_compatible(stream):
stream.seek(0)
identstring = stream.read(2)
stream.seek(0)
return int.from_bytes(identstring, "little") in (IMAGE_FILE_MACHINE.I386, IMAGE_FILE_MACHINE.AMD64)
[docs]
def get_symbol(self, name: str, produce_extern_symbols: bool = False) -> Symbol | None:
if name not in self._coff.symbol_name_to_idx:
return None
if name == "__ImageBase":
return Symbol(self, name, 0, 0, SymbolType.TYPE_OTHER)
sym = self._coff.symbols[self._coff.symbol_name_to_idx[name]]
if sym.StorageClass in {
IMAGE_SYM_CLASS.STATIC,
IMAGE_SYM_CLASS.LABEL,
IMAGE_SYM_CLASS.EXTERNAL,
}:
symbol_type = SymbolType.TYPE_FUNCTION if sym.Type == 0x20 else SymbolType.TYPE_OTHER
if sym.SectionNumber > 0:
sym_addr = self._coff.sections[sym.SectionNumber - 1].PointerToRawData + sym.Value
return Symbol(self, name, sym_addr, 1, symbol_type)
elif sym.SectionNumber == 0:
if produce_extern_symbols:
return Symbol(self, name, 0, sym.Value, symbol_type)
return None
raise NotImplementedError("Unsupported symbol")
register_backend("COFF", Coff)