Source code for angr.analyses.boyscout
import logging
import re
from collections import defaultdict
from archinfo import all_arches
from archinfo.arch_arm import is_arm_arch
from . import Analysis
l = logging.getLogger(name=__name__)
[docs]class BoyScout(Analysis):
"""
Try to determine the architecture and endieness of a binary blob
"""
[docs] def __init__(self, cookiesize=1):
self.arch = None
self.endianness = None
self.votes = None
self.cookiesize = cookiesize
self._reconnoiter()
def _reconnoiter(self):
"""
The implementation here is simple - just perform a pattern matching of all different architectures we support,
and then perform a vote.
"""
# Retrieve the binary string of main binary
votes = defaultdict(int)
for arch in all_arches:
regexes = set()
if not arch.function_prologs:
continue
# TODO: BoyScout does not support Thumb-only / Cortex-M binaries yet.
for ins_regex in set(arch.function_prologs).union(arch.function_epilogs):
r = re.compile(ins_regex)
regexes.add(r)
for start_, data in self.project.loader.main_object.memory.backers():
for regex in regexes:
# Match them!
for mo in regex.finditer(data):
position = mo.start() + start_
if position % arch.instruction_alignment == 0:
if is_arm_arch(arch):
votes[("ARM", arch.memory_endness)] += 1
else:
votes[(arch.name, arch.memory_endness)] += 1
l.debug("%s %s hits %d times", arch.name, arch.memory_endness, votes[(arch.name, arch.memory_endness)])
arch_name, endianness, hits = sorted(
[(k[0], k[1], v) for k, v in votes.items()], key=lambda x: x[2], reverse=True
)[0]
if hits < self.cookiesize * 2:
# this cannot possibly be code
arch_name = "DATA"
endianness = ""
self.arch = arch_name
self.endianness = endianness
# Save it as well for debugging
self.votes = votes
l.debug("The architecture should be %s with %s", self.arch, self.endianness)
from angr.analyses import AnalysesHub
AnalysesHub.register_default("BoyScout", BoyScout)