Source code for angr.analyses.typehoon.typehoon

from typing import List, Set, Optional, Dict, Union, TYPE_CHECKING

from ...sim_type import SimStruct, SimTypePointer, SimTypeArray
from ..analysis import Analysis, AnalysesHub
from .simple_solver import SimpleSolver
from .translator import TypeTranslator
from .typeconsts import Struct, Pointer, TypeConstant, Array
from .typevars import Equivalence

if TYPE_CHECKING:
    from angr.sim_variable import SimVariable
    from angr.sim_type import SimType
    from .typevars import TypeVariable, TypeConstraint


[docs]class Typehoon(Analysis): """ A spiritual tribute to the long-standing typehoon project that @jmg (John Grosen) worked on during his days in the angr team. Now I feel really bad of asking the poor guy to work directly on VEX IR without any fancy static analysis support as we have right now... Typehoon analysis implements a pushdown system that simplifies and solves type constraints. Our type constraints are largely an implementation of the paper Polymorphic Type Inference for Machine Code by Noonan, Loginov, and Cok from GrammaTech (with missing functionality support and bugs, of course). Type constraints are collected by running VariableRecoveryFast (maybe VariableRecovery later as well) on a function, and then solved using this analysis. User may specify ground truth, which will override all types at certain program points during constraint solving. """
[docs] def __init__( self, constraints, ground_truth=None, var_mapping: Optional[Dict["SimVariable", Set["TypeVariable"]]] = None, must_struct: Optional[Set["TypeVariable"]] = None, ): """ :param constraints: :param ground_truth: A set of SimType-style solutions for some or all type variables. They will be respected during type solving. :param var_mapping: :param must_struct: """ self._constraints: Set["TypeConstraint"] = constraints self._ground_truth: Optional[Dict["TypeVariable", "SimType"]] = ground_truth self._var_mapping = var_mapping # variable mapping is only used for debugging purposes self._must_struct = must_struct self.bits = self.project.arch.bits self.solution = None self.structs = None self.simtypes_solution = None # import pprint # pprint.pprint(self._var_mapping) # pprint.pprint(self._constraints) self._analyze()
# pprint.pprint(self.solution) # # Public methods #
[docs] def update_variable_types(self, func_addr: Union[int, str], var_to_typevars): for var, typevars in var_to_typevars.items(): for typevar in typevars: type_ = self.simtypes_solution.get(typevar, None) if type_ is not None: # print("{} -> {}: {}".format(var, typevar, type_)) # Hack: if a global address is of a pointer type and it is not an array, we unpack the type if ( func_addr == "global" and isinstance(type_, SimTypePointer) and not isinstance(type_.pts_to, SimTypeArray) ): type_ = type_.pts_to name = None if isinstance(type_, SimStruct): name = type_.name self.kb.variables[func_addr].set_variable_type(var, type_, name=name)
[docs] def pp_constraints(self) -> None: """ Pretty-print constraints between *variables* using the variable mapping. """ if self._var_mapping is None: raise ValueError("Variable mapping does not exist.") typevar_to_var = {} for k, typevars in self._var_mapping.items(): for tv in typevars: typevar_to_var[tv] = k print(f"### {len(self._constraints)} constraints") for constraint in self._constraints: print(" " + constraint.pp_str(typevar_to_var)) print("### end of constraints ###")
[docs] def pp_solution(self) -> None: """ Pretty-print solutions using the variable mapping. """ if self._var_mapping is None: raise ValueError("Variable mapping does not exist.") if self.solution is None: raise RuntimeError("Please run type solver before calling pp_solution().") typevar_to_var = {} for k, typevars in self._var_mapping.items(): for tv in typevars: typevar_to_var[tv] = k print(f"### {len(self.solution)} solutions") for typevar in sorted(self.solution.keys(), key=str): sol = self.solution[typevar] if typevar in typevar_to_var: var_and_typevar = f"{typevar_to_var[typevar]} ({typevar})" else: var_and_typevar = typevar print(f" {var_and_typevar} -> {sol}") print("### end of solutions ###")
# # Private methods # def _analyze(self): # convert ground truth into constraints if self._ground_truth: translator = TypeTranslator(arch=self.project.arch) for tv, sim_type in self._ground_truth.items(): self._constraints.add(Equivalence(tv, translator.simtype2tc(sim_type))) self._solve() self._specialize() self._translate_to_simtypes() # apply ground truth if self._ground_truth and self.simtypes_solution is not None: self.simtypes_solution.update(self._ground_truth) def _solve(self): solver = SimpleSolver(self.bits, self._constraints) self.solution = solver.solution def _specialize(self): """ Heuristics to make types more natural and more readable. - structs where every element is of the same type will be converted to an array of that element type. """ for tv in list(self.solution.keys()): if self._must_struct and tv in self._must_struct: continue sol = self.solution[tv] specialized = self._specialize_struct(sol) if specialized is not None: self.solution[tv] = specialized def _specialize_struct(self, tc, memo: Optional[Set] = None): if isinstance(tc, Pointer): if memo is not None and tc in memo: return None specialized = self._specialize_struct(tc.basetype, memo={tc} if memo is None else memo | {tc}) if specialized is None: return None return tc.new(specialized) if isinstance(tc, Struct) and tc.fields: offsets: List[int] = sorted(list(tc.fields.keys())) # get a sorted list of offsets offset0 = offsets[0] field0: TypeConstant = tc.fields[offset0] if len(tc.fields) == 1 and 0 in tc.fields: return field0 # are all fields the same? if len(tc.fields) > 1 and all(tc.fields[off] == field0 for off in offsets): # are all fields aligned properly? try: alignment = field0.size except NotImplementedError: alignment = 1 if all(off % alignment == 0 for off in offsets): # yeah! max_offset = offsets[-1] count = (max_offset + field0.size) // alignment return Array(field0, count=count) return None def _translate_to_simtypes(self): """ Translate solutions in type variables to solutions in SimTypes. """ simtypes_solution = {} translator = TypeTranslator(arch=self.project.arch) needs_backpatch = set() for tv, sol in self.solution.items(): simtypes_solution[tv], has_nonexistent_ref = translator.tc2simtype(sol) if has_nonexistent_ref: needs_backpatch.add(tv) # back patch for tv in needs_backpatch: translator.backpatch(simtypes_solution[tv], simtypes_solution) self.simtypes_solution = simtypes_solution self.structs = translator.structs
AnalysesHub.register_default("Typehoon", Typehoon)