Source code for cle.backends.macho.binding

# This file is part of Mach-O Loader for CLE.
# Contributed December 2016 by Fraunhofer SIT (https://www.sit.fraunhofer.de/en/) and updated in September 2019.

import logging
import struct
from collections.abc import Callable
from typing import TYPE_CHECKING

from cle.address_translator import AT
from cle.backends.relocation import Relocation
from cle.errors import CLEInvalidBinaryError

from .macho_enums import RebaseOpcode, RebaseType
from .symbol import AbstractMachOSymbol, BindingSymbol, DyldBoundSymbol, SymbolTableSymbol

if TYPE_CHECKING:
    from .macho import MachO

log = logging.getLogger(name=__name__)

OPCODE_MASK = 0xF0
IMM_MASK = 0x0F
BIND_TYPE_POINTER = 1
BIND_TYPE_TEXT_ABSOLUTE32 = 2
BIND_TYPE_TEXT_PCREL32 = 3
BIND_OPCODE_DONE = 0x00
BIND_OPCODE_SET_DYLIB_ORDINAL_IMM = 0x10
BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB = 0x20
BIND_OPCODE_SET_DYLIB_SPECIAL_IMM = 0x30
BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM = 0x40
BIND_OPCODE_SET_TYPE_IMM = 0x50
BIND_OPCODE_SET_ADDEND_SLEB = 0x60
BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB = 0x70
BIND_OPCODE_ADD_ADDR_ULEB = 0x80
BIND_OPCODE_DO_BIND = 0x90
BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB = 0xA0
BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED = 0xB0
BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB = 0xC0

if bytes is not str:

[docs] def chh(x): return x
else: chh = ord
[docs] def read_uleb(blob: bytes, offset: int) -> tuple[int, int]: """Reads a number encoded as uleb128""" result = 0 shift = 0 index = offset while index < len(blob): b = chh(blob[index]) result |= (b & 0x7F) << shift shift += 7 index += 1 if b & 0x80 == 0: break return result, index - offset
[docs] def read_sleb(blob, offset): """Reads a number encoded as sleb128""" result = 0 shift = 0 index = offset while index < len(blob): b = chh(blob[index]) result |= (b & 0x7F) << shift shift += 7 index += 1 if b & 0x80 == 0: if b & 0x40: # two's complement result -= 1 << shift break return result, index - offset
[docs] class BindingState: """State object"""
[docs] def __init__(self, is_64): self.index = 0 self.done = False self.lib_ord = 0 self.sym_name = "" self.sym_flags = 0 self.binding_type = 0 self.addend = 0 self.segment_index = 0 self.address = 0 self.seg_end_address = 0 # TODO: no rebasing support # address is expected to properly overflow and address is uintptr_t (unsigned long according to _uintptr_t.h) self.wraparound = 2**64 self.sizeof_intptr_t = 8 if is_64 else 4 # experimentally determined self.bind_handler = None # function(state,binary) => None
[docs] def add_address_ov(self, address, addend): """this is a very ugly klugde. It is needed because dyld relies on overflow semantics and represents several negative offsets through BIG ulebs""" tmp = address + addend if tmp > self.wraparound: tmp -= self.wraparound self.address = tmp
[docs] def check_address_bounds(self): if self.address >= self.seg_end_address: log.error( "index %d: address >= seg_end_address (%#x >= %#x)", self.index, self.address, self.seg_end_address ) raise CLEInvalidBinaryError()
[docs] class BindingHelper: """Factors out binding logic from MachO. Intended to work in close conjunction with MachO not for standalone use""" binary: "MachO"
[docs] def __init__(self, binary): self.binary = binary
[docs] def do_normal_bind(self, blob: bytes): """Performs non-lazy, non-weak bindings :param blob: Blob containing binding opcodes""" if blob is None: return # skip log.debug("Binding non-lazy, non-weak symbols") s = BindingState(self.binary.arch.bits == 64) seg = self.binary.segments[0] s.seg_end_address = seg.vaddr + seg.memsize s.bind_handler = default_binding_handler self._do_bind_generic( blob, s, { 0: n_opcode_done, 0x10: n_opcode_set_dylib_ordinal_imm, 0x20: n_opcode_set_dylib_ordinal_uleb, 0x30: n_opcode_set_dylib_special_imm, 0x40: n_opcode_set_trailing_flags_imm, 0x50: n_opcode_set_type_imm, 0x60: n_opcode_set_addend_sleb, 0x70: n_opcode_set_segment_and_offset_uleb, 0x80: n_opcode_add_addr_uleb, 0x90: n_opcode_do_bind, 0xA0: n_opcode_do_bind_add_addr_uleb, 0xB0: n_opcode_do_bind_add_addr_imm_scaled, 0xC0: n_opcode_do_bind_uleb_times_skipping_uleb, }, ) log.debug("Done binding non-lazy, non-weak symbols ")
[docs] def do_lazy_bind(self, blob): """ Performs lazy binding """ if blob is None: return # skip log.debug("Binding lazy symbols") s = BindingState(self.binary.arch.bits == 64) s.index = 0 s.bind_handler = default_binding_handler end = len(blob) # We need to iterate the iteration as every lazy binding entry ends with BIND_OPCODE_DONE while s.index < end: # re-initialise state (except index) s.binding_type = 1 s.address = 0 s.sym_name = "" s.sym_flags = 0 s.lib_ord = 0 s.done = False s.addend = 0 s.segment_index = 0 s.seg_end_address = 0 # TODO: no rebasing support self._do_bind_generic( blob, s, { 0x00: n_opcode_done, 0x10: n_opcode_set_dylib_ordinal_imm, 0x20: n_opcode_set_dylib_ordinal_uleb, 0x30: n_opcode_set_dylib_special_imm, 0x40: n_opcode_set_trailing_flags_imm, 0x50: n_opcode_set_type_imm, 0x70: l_opcode_set_segment_and_offset_uleb, 0x90: l_opcode_do_bind, }, ) log.debug("Done binding lazy symbols")
[docs] def do_rebases(self, blob: bytes): """ Handles the rebase blob Implementation based closely on ImageLoaderMachOCompressed::rebase from dyld https://github.com/apple-opensource/dyld/blob/e3f88907bebb8421f50f0943595f6874de70ebe0/src/ImageLoaderMachOCompressed.cpp#L382-L463 :param blob: :return: """ if blob is None: return # State variables reloc_type: RebaseType | None = None done = False segment = None address = None index = 0 end = len(blob) while not done and index < end: opcode, immediate = RebaseOpcode.parse_byte(blob[index]) index += 1 if opcode == RebaseOpcode.DONE: done = True elif opcode == RebaseOpcode.SET_TYPE_IMM: reloc_type = RebaseType(immediate) elif opcode == RebaseOpcode.SET_SEGMENT_AND_OFFSET_ULEB: segment = self.binary.segments[immediate] offset, index = self.read_uleb(blob, index) address = segment.vaddr + offset elif opcode == RebaseOpcode.ADD_ADDR_ULEB: uleb, index = self.read_uleb(blob, index) address += uleb elif opcode == RebaseOpcode.ADD_ADDR_IMM_SCALED: address += immediate * self.binary.arch.bytes elif opcode == RebaseOpcode.DO_REBASE_IMM_TIMES: for _ in range(immediate): self.rebase_at(address, reloc_type) address += self.binary.arch.bytes elif opcode == RebaseOpcode.DO_REBASE_ULEB_TIMES: count, index = self.read_uleb(blob, index) for _ in range(count): if address >= segment.vaddr + segment.memsize: raise CLEInvalidBinaryError() self.rebase_at(address, reloc_type) address += self.binary.arch.bytes elif opcode == RebaseOpcode.DO_REBASE_ADD_ADDR_ULEB: self.rebase_at(address, reloc_type) uleb, index = self.read_uleb(blob, index) address += uleb + self.binary.arch.bytes elif opcode == RebaseOpcode.DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: count, index = self.read_uleb(blob, index) skip, index = self.read_uleb(blob, index) for _ in range(count): if address >= segment.vaddr + segment.memsize: raise CLEInvalidBinaryError() self.rebase_at(address, reloc_type) address += skip + self.binary.arch.bytes else: raise CLEInvalidBinaryError("Invalid opcode for current binding: %#x" % opcode)
[docs] @staticmethod def read_uleb(blob, offset) -> tuple[int, int]: """ little helper to read ulebs, that also returns the new index :param blob: :param offset: :return: """ uleb, length = read_uleb(blob, offset) return uleb, offset + length
[docs] def rebase_at(self, address: int, ty: RebaseType): relative_rebase_location = AT.from_lva(address, self.binary).to_rva() unslid_pointer = self.binary.memory.unpack_word(relative_rebase_location) relative_pointer = AT.from_lva(unslid_pointer, self.binary).to_rva() if ty == RebaseType.POINTER: reloc = MachOPointerRelocation(self.binary, relative_rebase_location, relative_pointer) elif ty == RebaseType.TEXT_ABSOLUTE32: reloc = MachOPointerRelocation(self.binary, relative_rebase_location, relative_pointer) elif ty == RebaseType.TEXT_PCREL32: raise NotImplementedError() else: raise ValueError("Invalid rebase type: %#x" % ty) self.binary.relocs.append(reloc)
def _do_bind_generic( self, blob, init_state: BindingState, opcode_dict: dict[int, Callable[[BindingState, "MachO", int, bytes], BindingState]], ): """ Does the actual binding work. Represents a generic framework for interpreting binding opcodes :param blob: blob of binding opcodes :param init_state: Initial BindingState :param opcode_dict: Dictionary opcode=> handler :return: resulting binding state """ s = init_state seg = self.binary.segments[s.segment_index] s.seg_end_address = seg.vaddr + seg.memsize # TODO: no rebasing support end = len(blob) while not s.done and s.index < end: log.debug("Current address: %#x, blob index (offset): %#x", s.address, s.index) raw_opcode = blob[s.index] opcode = raw_opcode & OPCODE_MASK immediate = raw_opcode & IMM_MASK s.index += 1 try: h = opcode_dict[opcode] s = h(s, self.binary, immediate, blob) except KeyError: log.error("Invalid opcode for current binding: %#x", opcode) return s
# pylint: disable=unused-argument # The following functions realize different variants of handling binding opcodes # the format is def X(state,binary,immediate,blob) => state
[docs] def n_opcode_done(s: BindingState, _b: "MachO", _i: int, _blob: bytes) -> BindingState: log.debug("BIND_OPCODE_DONE @ %#x", s.index) s.done = True return s
[docs] def n_opcode_set_dylib_ordinal_imm(s: BindingState, _b: "MachO", i: int, _blob: bytes) -> BindingState: log.debug("SET_DYLIB_ORDINAL_IMM @ %#x: %d", s.index, i) s.lib_ord = i return s
[docs] def n_opcode_set_dylib_ordinal_uleb(s: BindingState, _b: "MachO", _i: int, blob: bytes) -> BindingState: uleb = read_uleb(blob, s.index) s.lib_ord = uleb[0] s.index += uleb[1] log.debug("SET_DYLIB_ORDINAL_ULEB @ %#x: %d", s.index, s.lib_ord) return s
[docs] def n_opcode_set_dylib_special_imm(s: BindingState, _b: "MachO", i: int, _blob: bytes) -> BindingState: if i == 0: s.lib_ord = 0 else: s.lib_ord = (i | OPCODE_MASK) - 256 log.debug("SET_DYLIB_SPECIAL_IMM @ %#x: %d", s.index, s.lib_ord) return s
[docs] def n_opcode_set_trailing_flags_imm(s: BindingState, _b: "MachO", i: int, blob: bytes) -> BindingState: s.sym_name = "" s.sym_flags = i while blob[s.index] != 0: s.sym_name += chr(blob[s.index]) s.index += 1 s.index += 1 # move past 0 byte log.debug("SET_SYMBOL_TRAILING_FLAGS_IMM @ %#x: %r,%#x", s.index - len(s.sym_name) - 1, s.sym_name, s.sym_flags) return s
[docs] def n_opcode_set_type_imm(s: BindingState, _b: "MachO", i: int, _blob: bytes) -> BindingState: # pylint: disable=unused-argument s.binding_type = i log.debug("SET_TYPE_IMM @ %#x: %d", s.index, s.binding_type) return s
[docs] def n_opcode_set_addend_sleb(s: BindingState, _b: "MachO", _i: int, blob: bytes) -> BindingState: sleb = read_sleb(blob, s.index) s.addend = sleb[0] log.debug("SET_ADDEND_SLEB @ %#x: %d", s.index, s.addend) s.index += sleb[1] return s
[docs] def n_opcode_set_segment_and_offset_uleb(s: BindingState, b: "MachO", i: int, blob: bytes) -> BindingState: s.segment_index = i uleb = read_uleb(blob, s.index) log.debug("(n)SET_SEGMENT_AND_OFFSET_ULEB @ %#x: %d, %d", s.index, s.segment_index, uleb[0]) s.index += uleb[1] seg = b.segments[s.segment_index] s.add_address_ov(seg.vaddr, uleb[0]) s.seg_end_address = seg.vaddr + seg.memsize return s
[docs] def l_opcode_set_segment_and_offset_uleb(s: BindingState, b: "MachO", i: int, blob: bytes) -> BindingState: uleb = read_uleb(blob, s.index) log.debug("(l)SET_SEGMENT_AND_OFFSET_ULEB @ %#x: %d, %d", s.index, i, uleb[0]) seg = b.segments[i] s.add_address_ov(seg.vaddr, uleb[0]) s.index += uleb[1] return s
[docs] def n_opcode_add_addr_uleb(s: BindingState, _b: "MachO", _i: int, blob: bytes) -> BindingState: uleb = read_uleb(blob, s.index) s.add_address_ov(s.address, uleb[0]) log.debug("ADD_ADDR_ULEB @ %#x: %d", s.index, uleb[0]) s.index += uleb[1] return s
[docs] def n_opcode_do_bind(s: BindingState, b: "MachO", _i: int, _blob: bytes) -> BindingState: log.debug("(n)DO_BIND @ %#x", s.index) s.check_address_bounds() s.bind_handler(s, b) s.add_address_ov(s.address, s.sizeof_intptr_t) return s
[docs] def l_opcode_do_bind(s: BindingState, b: "MachO", _i: int, _blob: bytes) -> BindingState: log.debug("(l)DO_BIND @ %#x", s.index) s.bind_handler(s, b) return s
[docs] def n_opcode_do_bind_add_addr_uleb(s: BindingState, b: "MachO", _i: int, blob: bytes) -> BindingState: uleb = read_uleb(blob, s.index) log.debug("DO_BIND_ADD_ADDR_ULEB @ %#x: %d", s.index, uleb[0]) if s.address >= s.seg_end_address: log.error( "DO_BIND_ADD_ADDR_ULEB @ %#x: address >= seg_end_address (%#x>=%#x)", s.index, s.address, s.seg_end_address ) raise CLEInvalidBinaryError() s.index += uleb[1] s.bind_handler(s, b) # this is done AFTER binding in preparation for the NEXT step s.add_address_ov(s.address, uleb[0] + s.sizeof_intptr_t) return s
[docs] def n_opcode_do_bind_add_addr_imm_scaled(s: BindingState, b: "MachO", i: int, _blob: bytes) -> BindingState: log.debug("DO_BIND_ADD_ADDR_IMM_SCALED @ %#x: %d", s.index, i) if s.address >= s.seg_end_address: log.error( "DO_BIND_ADD_ADDR_IMM_SCALED @ %#x: address >= seg_end_address (%#x>=%#x)", s.index, s.address, s.seg_end_address, ) raise CLEInvalidBinaryError() s.bind_handler(s, b) # this is done AFTER binding in preparation for the NEXT step s.add_address_ov(s.address, (i * s.sizeof_intptr_t) + s.sizeof_intptr_t) return s
[docs] def n_opcode_do_bind_uleb_times_skipping_uleb(s: BindingState, b: "MachO", _i: int, blob: bytes) -> BindingState: count = read_uleb(blob, s.index) s.index += count[1] skip = read_uleb(blob, s.index) s.index += skip[1] log.debug("DO_BIND_ULEB_TIMES_SKIPPING_ULEB @ %#x: %d,%d", s.index - skip[1] - count[1], count[0], skip[0]) for _ in range(0, count[0]): if s.address >= s.seg_end_address: log.error( "DO_BIND_ADD_ADDR_IMM_SCALED @ %#x: address >= seg_end_address (%#x >= %#x)", s.index - skip[1] - count[1], s.address, s.seg_end_address, ) raise CLEInvalidBinaryError() s.bind_handler(s, b) s.add_address_ov(s.address, skip[0] + s.sizeof_intptr_t) return s
[docs] class MachOSymbolRelocation(Relocation): """ Generic Relocation for MachO. It handles relocations that point to symbols """
[docs] def __init__(self, owner: "MachO", symbol: AbstractMachOSymbol, relative_addr: int, data): super().__init__(owner, symbol, relative_addr) self.data = data
[docs] def resolve_symbol(self, solist, thumb=False, extern_object=None, **kwargs): if isinstance(self.symbol, (SymbolTableSymbol, BindingSymbol, DyldBoundSymbol)): for so in solist: if self.symbol.library_base_name == so.binary_basename: [symbol] = so.get_symbol(self.symbol.name) assert symbol.is_export self.resolve(symbol, extern_object=extern_object) log.info("Resolved %s to %s", self.symbol.name, symbol) return # None of the available libraries contain it, so we create an extern symbol for it new_symbol = extern_object.make_extern(self.symbol.name, sym_type=self.symbol._type, thumb=thumb) self.resolve(new_symbol, extern_object=extern_object) else: raise NotImplementedError("Did not expect this to happen")
@property def dest_addr(self): return self.relative_addr @property def value(self): return self.resolvedby.rebased_addr def __repr__(self): return f"<MachO Reloc for {self.symbol} at {hex(self.relative_addr)}>"
[docs] class MachOPointerRelocation(Relocation): """ A relocation for a pointer without any associated symbol These are either generated while handling the rebase blob, or while parsing chained fixups """
[docs] def __init__(self, owner: "MachO", relative_addr: int, data): """ :param owner: :param relative_addr: the relative addr where this relocation is located :param data: the rebase offset relative to the linked base """ super().__init__(owner, None, relative_addr) self.data = data
@property def value(self): return self.owner.mapped_base + self.data
[docs] def resolve_symbol(self, solist, thumb=False, extern_object=None, **kwargs): """ This relocation has no associated symbol, so we don't need to resolve it. :param solist: :param thumb: :param extern_object: :param kwargs: :return: """ # This needs to be set to true, so that the rebase will actually be applied later self.resolved = True
def __repr__(self): return f"<MachO Ptr Fixup at {hex(self.relative_addr)} to {hex(self.data)}>"
# default binding handler
[docs] def default_binding_handler(state: BindingState, binary: "MachO"): """Binds location to the symbol with the given name and library ordinal""" # locate the symbol: matches = binary.symbols.get_by_name_and_ordinal(state.sym_name, state.lib_ord) if len(matches) > 1: log.error("Cannot bind: More than one match for (%r,%d)", state.sym_name, state.lib_ord) raise CLEInvalidBinaryError() if len(matches) < 1: log.info("No match for (%r,%d), generating BindingSymbol ...", state.sym_name, state.lib_ord) matches = [BindingSymbol(binary, state.sym_name, state.lib_ord)] binary.symbols.add(matches[0]) binary._ordered_symbols.append(matches[0]) symbol = matches[0] location = state.address # If the linked_addr is equal to zero, it's an imported symbol which is by that time unresolved. # Don't write addend's there value = symbol.linked_addr + state.addend if symbol.linked_addr != 0 else 0x0 if state.binding_type == 1: # POINTER log.debug("Updating address %#x with symobl %r @ %#x", location, state.sym_name, value) addr = AT.from_lva(location, binary).to_rva() data = struct.pack(binary.struct_byteorder + ("Q" if binary.arch.bits == 64 else "I"), value) reloc = MachOSymbolRelocation(binary, symbol, addr, data) binary.relocs.append(reloc) symbol.bind_xrefs.append(location) elif state.binding_type == 2: # ABSOLUTE32 location_32 = location % (2**32) value_32 = value % (2**32) log.debug("Updating address %#x with symobl %r @ %#x", state.sym_name, location_32, value_32) binary.memory.store( AT.from_lva(location_32, binary).to_rva(), struct.pack(binary.struct_byteorder + "I", value_32) ) symbol.bind_xrefs.append(location_32) elif state.binding_type == 3: # PCREL32 location_32 = location % (2**32) value_32 = (value - (location + 4)) % (2**32) log.debug("Updating address %#x with symobl %r @ %#x", state.sym_name, location_32, value_32) binary.memory.store( AT.from_lva(location_32, binary).to_rva(), struct.pack(binary.struct_byteorder + "I", value_32) ) symbol.bind_xrefs.append(location_32) else: log.error("Unknown BIND_TYPE: %d", state.binding_type) raise CLEInvalidBinaryError()