Source code for cle.backends.binja

import logging

import archinfo

from cle.address_translator import AT
from cle.errors import CLEError

from .backend import Backend, register_backend
from .relocation import Relocation
from .symbol import Symbol, SymbolType

log = logging.getLogger(name=__name__)

try:
    import binaryninja as bn
except ImportError:
    bn = None
    log.debug("Unable to import binaryninja module")
    BINJA_NOT_INSTALLED_STR = (
        "Binary Ninja does not appear to be installed. Please ensure Binary Ninja "
        "and its Python API are properly installed before using this backend."
    )


[docs] class BinjaSymbol(Symbol): BINJA_FUNC_SYM_TYPES = ( [bn.SymbolType.ImportedFunctionSymbol, bn.SymbolType.FunctionSymbol, bn.SymbolType.ImportAddressSymbol] if bn else [] ) BINJA_DATA_SYM_TYPES = [bn.SymbolType.ImportedDataSymbol, bn.SymbolType.DataSymbol] if bn else [] BINJA_IMPORT_TYPES = ( [bn.SymbolType.ImportedFunctionSymbol, bn.SymbolType.ImportAddressSymbol, bn.SymbolType.ImportedDataSymbol] if bn else [] )
[docs] def __init__(self, owner, sym): if not bn: raise CLEError(BINJA_NOT_INSTALLED_STR) if sym.type in self.BINJA_FUNC_SYM_TYPES: symtype = SymbolType.TYPE_FUNCTION elif sym.type in self.BINJA_DATA_SYM_TYPES: symtype = SymbolType.TYPE_OBJECT else: symtype = SymbolType.TYPE_OTHER super().__init__(owner, sym.raw_name, AT.from_rva(sym.address, owner).to_rva(), owner.bv.address_size, symtype) if sym.type in self.BINJA_IMPORT_TYPES: self.is_import = True
# TODO: set is_weak appropriately
[docs] class BinjaReloc(Relocation): @property def value(self): return self.relative_addr
[docs] class BinjaBin(Backend): """ Get information from binaries using Binary Ninja. Basing this on idabin.py, but will try to be more complete. TODO: add more features as Binary Ninja's feature set improves """ is_default = True # Tell CLE to automatically consider using the BinjaBin backend BINJA_ARCH_MAP = { "aarch64": archinfo.ArchAArch64(endness="Iend_LE"), "armv7": archinfo.ArchARMEL(endness="Iend_LE"), "thumb2": archinfo.ArchARMEL(endness="Iend_LE"), "armv7eb": archinfo.ArchARMEL(endness="Iend_BE"), "thumb2eb": archinfo.ArchARMEL(endness="Iend_BE"), "mipsel32": archinfo.ArchMIPS32(endness="Iend_LE"), "mips32": archinfo.ArchMIPS32(endness="Iend_BE"), "ppc": archinfo.ArchPPC32(endness="Iend_BE"), "ppc_le": archinfo.ArchPPC32(endness="Iend_LE"), "x86": archinfo.ArchX86(), "x86_64": archinfo.ArchAMD64(), }
[docs] def __init__(self, binary, *args, **kwargs): super().__init__(binary, *args, **kwargs) if not bn: raise CLEError(BINJA_NOT_INSTALLED_STR) # get_view_of_file can take a bndb or binary - wait for autoanalysis to complete self.bv = bn.BinaryViewType.get_view_of_file(binary, False) log.info("Analyzing %s, this may take some time...", binary) self.bv.update_analysis_and_wait() log.info("Analysis complete") # Note may want to add option to kick off linear sweep try: self.set_arch(self.BINJA_ARCH_MAP[self.bv.arch.name]) except KeyError: log.error("Architecture %s is not supported.", self.bv.arch.name) for seg in self.bv.segments: log.info("Adding memory for segment at %x.", seg.start) br = bn.BinaryReader(self.bv) br.seek(seg.start) data = br.read(len(seg)) self.memory.add_backer(seg.start, data) self._find_got() self._symbol_cache = {} self._init_symbol_cache() # Note: this represents the plt stub. ImportAddressSymbol refers to .got entries # Since we're not trying to import and load dependencies directly, but want to run SimProcedures, # We should use the binaryninja.SymbolType.ImportedFunctionSymbol # Also this should be generalized to get data imports, too self.raw_imports = { i.name: i.address for i in self.bv.get_symbols_of_type(bn.SymbolType.ImportedFunctionSymbol) } self._process_imports() self.exports = {} self.linking = "static" if len(self.raw_imports) == 0 else "dynamic" # We'll look for this attribute to see if we need to do SimProcedures for any imports in this binary # This is an ugly hack, but will have to use this for now until Binary Ninja exposes dependencies self.guess_simprocs = True self.guess_simprocs_hint = "nix" if self.bv.get_section_by_name(".plt") else "win" log.warning( "This backend is based on idabin.py.\n" "You may encounter unexpected behavior if:\n" "\tyour target depends on library data symbol imports, or\n" "\tlibrary imports that don't have a guess-able SimProcedure\n" "Good luck!" )
def _process_imports(self): """Process self.raw_imports into list of Relocation objects""" if not self.raw_imports: log.warning("No imports found - if this is a dynamically-linked binary, something probably went wrong.") for name, addr in self.raw_imports.items(): BinjaReloc(self, self._symbol_cache[name], addr) def _init_symbol_cache(self): # Note that we could also access name, short_name, or full_name attributes for sym in self.bv.get_symbols(): cle_sym = BinjaSymbol(self, sym) self._symbol_cache[sym.raw_name] = cle_sym self.symbols.add(cle_sym) def _find_got(self): """ Locate the section (e.g. .got) that should be updated when relocating functions (that's where we want to write absolute addresses). """ sec_name = self.arch.got_section_name self.got_begin = None self.got_end = None try: got_sec = self.bv.sections[self.arch.got_section_name] self.got_begin = got_sec.start self.got_end = got_sec.end except KeyError: log.warning("No got section mapping found!") # If we reach this point, we should have the addresses if self.got_begin is None or self.got_end is None: log.warning("No section %s, is this a static binary ? (or stripped)", sec_name) return False return True
[docs] @staticmethod def is_compatible(stream): if not bn: return False magic = stream.read(100) stream.seek(0) # bndb files are SQlite 3 if magic.startswith(b"SQLite format 3") and stream.name.endswith("bndb"): return True return False
[docs] def in_which_segment(self, addr): """ Return the segment name at address `addr`. """ # WARNING: if there are overlapping sections, we choose the first name. # The only scenario I've seen here is a NOBITS section that "overlaps" with another one, but # I'm not sure if that's a heurstic that should be applied here. # https://stackoverflow.com/questions/25501044/gcc-ld-overlapping-sections-tbss-init-array-in-statically-linked-elf-bin#25771838 seg = self.bv.get_sections_at(addr)[0].name return "unknown" if len(seg) == 0 else seg
[docs] def get_symbol_addr(self, sym): """ Get the address of the symbol `sym` from IDA. :returns: An address. """ # sym is assumed to be the raw_name of the symbol return self.bv.get_symbol_by_raw_name(sym)
[docs] def function_name(self, addr): """ Return the function name at address `addr`. """ func = self.bv.get_function_at(addr) if not func: return "UNKNOWN" return func.name
@property def min_addr(self): """ Get the min address of the binary. (note: this is probably not "right") """ return self.bv.start @property def max_addr(self): """ Get the max address of the binary. """ return self.bv.end @property def entry(self): if self._custom_entry_point is not None: return self._custom_entry_point + self.mapped_base return self.bv.entry_point + self.mapped_base
[docs] def get_strings(self): """ Extract strings from binary (Binary Ninja). :returns: An array of strings. """ return self.bv.get_strings()
[docs] def set_got_entry(self, name, newaddr): """ Resolve import `name` with address `newaddr`. That is, update the GOT entry for `name` with `newaddr`. """ if name not in self.imports: log.warning("%s not in imports", name) return addr = self.imports[name] self.memory.pack_word(addr, newaddr)
[docs] def close(self): """ Release the BinaryView we created in __init__ :return: None """ self.bv.file.close()
register_backend("binja", BinjaBin)