Source code for cle.backends.ihex

import binascii
import logging
import re
import struct
from typing import List, Optional, Tuple

import archinfo

from cle.errors import CLEError

from .backend import Backend, register_backend

log = logging.getLogger(name=__name__)

__all__ = ("Hex",)

intel_hex_re = re.compile(
    b":([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F])"
    b"([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F]+)*([0-9a-fA-F][0-9a-fA-F])"
)

HEX_TYPE_DATA = 0x00
HEX_TYPE_EOF = 0x01
HEX_TYPE_EXTSEGADDR = 0x02
HEX_TYPE_STARTSEGADDR = 0x03
HEX_TYPE_EXTLINEARADDR = 0x04
HEX_TYPE_STARTLINEARADDR = 0x05


[docs]class Hex(Backend): """ A loader for Intel Hex Objects See https://en.wikipedia.org/wiki/Intel_HEX """ is_default = True # Tell CLE to automatically consider using the Hex backend
[docs] @staticmethod def parse_record(line): m = intel_hex_re.match(line) if not m: raise CLEError(f"Invalid HEX record: {line}") my_cksum = 0 count, addr, rectype, data, cksum = m.groups() cksum = int(cksum, 16) for d in binascii.unhexlify(line[1:-2]): my_cksum = (my_cksum + d) % 256 my_cksum = ((my_cksum ^ 0xFF) + 1) % 256 if my_cksum != cksum: raise CLEError(f"Invalid checksum: Computed {hex(my_cksum)}, found {hex(cksum)}") count = int(count, 16) addr = int(addr, 16) rectype = int(rectype, 16) if data: data = binascii.unhexlify(data) if data and count != len(data): raise CLEError("Data length field does not match length of actual data: " + line) return rectype, addr, data
[docs] @staticmethod def coalesce_regions(regions): # Lots of tiny memory regions is bad! # The greedy algorithm to smash them together: result = [] last_addr: Optional[int] = None last_data: Optional[List[bytes]] = None last_size: Optional[int] = None for addr, region in sorted(regions): if last_addr is not None and last_addr + last_size == addr: last_data.append(region) last_size += len(region) else: if last_addr is not None: result.append((last_addr, b"".join(last_data))) last_addr, last_data, last_size = addr, [region], len(region) if last_addr is not None: result.append((last_addr, b"".join(last_data))) return result
[docs] def __init__(self, *args, ignore_missing_arch: bool = False, **kwargs): super().__init__(*args, **kwargs) if self._arch is None: if ignore_missing_arch: # used internally for testing. we use a default architecture self.set_arch(archinfo.arch_from_id("amd64")) else: raise CLEError( "To use the Hex binary backend, you need to specify an architecture in the loader options." ) # Do the whole thing in one shot. self.os = "unknown" got_base = False got_entry = False self._binary_stream.seek(0) string = self._binary_stream.read() recs = string.splitlines() regions = [] max_addr = 0 min_addr = 0xFFFFFFFFFFFFFFFF self._base_address = 0 for rec in recs: rectype, addr, data = Hex.parse_record(rec) if rectype == HEX_TYPE_DATA: addr += self._base_address # l.debug("Loading %d bytes at " % len(data) + hex(addr)) # Raw data. Put the bytes regions.append((addr, data)) # We have to be careful about the min and max addrs if addr < min_addr: min_addr = addr max_addr = max(max_addr, addr + len(data) - 1) elif rectype == HEX_TYPE_EOF: # EOF log.debug("Got EOF record.") break elif rectype == HEX_TYPE_EXTSEGADDR: # "Extended Mode" Segment address, take this value, multiply by 16, make the base self._base_address = struct.unpack(">H", data)[0] * 16 got_base = True log.debug("Loading a segment at %#x", self._base_address) elif rectype == HEX_TYPE_STARTSEGADDR: # Four bytes, the segment and the initial IP got_base = True got_entry = True self._initial_cs, self._initial_ip = struct.unpack(">HH", data) # The whole thing is the entry, as far as angr is concerned. self._entry = struct.unpack(">I", data)[0] log.debug("Got entry point at %#x", self._entry) elif rectype == HEX_TYPE_EXTLINEARADDR: got_base = True # Specifies the base for all future data bytes. self._base_address = struct.unpack(">H", data)[0] << 16 log.debug("Loading a segment at %#x", self._base_address) elif rectype == HEX_TYPE_STARTLINEARADDR: got_entry = True # The 32-bit EIP, really the same as STARTSEGADDR, but some compilers pick one over the other. self._entry = struct.unpack(">I", data)[0] log.debug("Found entry point at %#x", self._entry) self._initial_eip = self._entry else: raise CLEError("This HEX Object type is not implemented: " + hex(rectype)) if not got_base: log.warning("No base address was found in this HEX object file. It is assumed to be 0") if not got_entry: log.warning( "No entry point was found in this HEX object file, and it is assumed to be 0. " "Specify one with `entry_point` to override." ) # HEX specifies a ton of tiny little memory regions. We now smash them together to make things faster. new_regions = Hex.coalesce_regions(regions) self.regions: List[Tuple[int, int]] = [] # A list of (addr, size) for addr, data in new_regions: self.memory.add_backer(addr, data) self.regions.append((addr, len(data))) self._max_addr = max_addr self._min_addr = min_addr
[docs] @staticmethod def is_compatible(stream): stream.seek(0) s = stream.read(0x10) stream.seek(0) return s.startswith(b":")
register_backend("hex", Hex)