Source code for angr.analyses.vfg

from typing import TYPE_CHECKING, Any, Callable, DefaultDict, Dict, List, Generator, Optional, Set, Tuple, Union
import logging
from collections import defaultdict

import archinfo
from archinfo.arch_arm import is_arm_arch
import claripy
from claripy.vsa.strided_interval import StridedInterval
import networkx

from angr.utils.graph import GraphUtils
from angr.analyses import ForwardAnalysis
from .cfg.cfg_job_base import BlockID, FunctionKey, CFGJobBase
from .. import sim_options
from ..engines.procedure import ProcedureEngine
from ..engines import SimSuccessors
from ..errors import (
    AngrDelayJobNotice,
    AngrSkipJobNotice,
    AngrVFGError,
    AngrError,
    AngrVFGRestartAnalysisNotice,
    AngrJobMergingFailureNotice,
    SimValueError,
    SimIRSBError,
    SimError,
)
from ..procedures import SIM_PROCEDURES
from ..state_plugins.callstack import CallStack
from ..analyses import AnalysesHub
from ..sim_state import SimState
from . import Analysis, CFGEmulated

if TYPE_CHECKING:
    from ..knowledge_base import KnowledgeBase

l = logging.getLogger(name=__name__)


[docs]class VFGJob(CFGJobBase): """ A job descriptor that contains local variables used during VFG analysis. """
[docs] def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) self.call_stack_suffix: List = [] self.vfg_node: Optional[VFGNode] = None self.is_call_jump = None self.call_target = None self.dbg_exit_status = {} self.is_return_jump = None self.sim_successors: Optional[SimSuccessors] = None # if this job has a call successor, do we plan to skip the call successor or not self.call_skipped = False # if the call is skipped, calling stack of the skipped function is saved in `call_context_key` self.call_function_key: Optional[FunctionKey] = None self.call_task: Optional[CallAnalysis] = None
@property def block_id(self) -> Optional[BlockID]: return self._block_id
[docs] def callstack_repr(self, kb: "KnowledgeBase"): s = [] for i in range(0, len(self.call_stack_suffix), 2): call_site, func_addr = ( self.call_stack_suffix[i], self.call_stack_suffix[i + 1], ) # pylint:disable=unsubscriptable-object if func_addr is None: continue call_site_str = "%#x" % call_site if call_site is not None else "None" if func_addr in kb.functions: s.append(f"{kb.functions[func_addr].name}[{call_site_str}]") else: s.append(f"{func_addr:#x}[{call_site_str}]") return "//".join(s)
[docs]class PendingJob: """ Describes a pending job during VFG analysis. """ __slots__ = ( "block_id", "state", "call_stack", "src_block_id", "src_stmt_idx", "src_ins_addr", )
[docs] def __init__( self, block_id: BlockID, state: SimState, call_stack: CallStack, src_block_id: BlockID, src_stmt_idx: int, src_ins_addr: int, ) -> None: self.block_id = block_id self.state = state self.call_stack = call_stack self.src_block_id = src_block_id self.src_stmt_idx = src_stmt_idx self.src_ins_addr = src_ins_addr
[docs]class AnalysisTask: """ An analysis task describes a task that should be done before popping this task out of the task stack and discard it. """
[docs] def __init__(self) -> None: pass
@property def done(self): raise NotImplementedError()
[docs]class FunctionAnalysis(AnalysisTask): """ Analyze a function, generate fix-point states from all endpoints of that function, and then merge them to one state. """
[docs] def __init__(self, function_address: int, return_address: Optional[int]) -> None: super().__init__() self.function_address = function_address self.return_address = return_address self.call_analysis: Optional[AnalysisTask] = None # tracks all jobs that are live currently self.jobs = []
def __repr__(self): s = "<Function @ %#08x with %d jobs>" % (self.function_address, len(self.jobs)) return s # # Properties # @property def done(self) -> bool: return not self.jobs
[docs]class CallAnalysis(AnalysisTask): """ Analyze a call by analyze all functions this call might be calling, collect all final states generated by analyzing those functions, and merge them into one state. """
[docs] def __init__( self, address: int, return_address: None, function_analysis_tasks: Optional[List[Any]] = None, mergeable_plugins: Optional[Tuple[str, str]] = None, ) -> None: super().__init__() self.address = address self.return_address = return_address self.function_analysis_tasks = function_analysis_tasks if function_analysis_tasks is not None else [] self._mergeable_plugins = mergeable_plugins self.skipped = False self._final_jobs = []
def __repr__(self): s = "<Call @ %#08x with %d function tasks>" % (self.address, len(self.function_analysis_tasks)) return s # # Properties # @property def done(self) -> bool: for task in self.function_analysis_tasks: if not task.done: return False return True # # Public methods #
[docs] def register_function_analysis(self, task: FunctionAnalysis) -> None: assert isinstance(task, FunctionAnalysis) self.function_analysis_tasks.append(task) task.call_analysis = self
[docs] def add_final_job(self, job: VFGJob) -> None: self._final_jobs.append(job)
[docs] def merge_jobs(self) -> VFGJob: assert self._final_jobs job = self._final_jobs[0] for other in self._final_jobs[1:]: job.state = job.state.merge(other.state, plugin_whitelist=self._mergeable_plugins)[0] return job
[docs]class VFGNode: """ A descriptor of nodes in a Value-Flow Graph """
[docs] def __init__(self, addr: int, key: BlockID, state: Optional["SimState"] = None) -> None: """ Constructor. :param int addr: :param BlockID key: :param SimState state: """ self.key = key self.addr = addr self.state: Optional[SimState] = None self.widened_state: Optional[SimState] = None self.narrowing_times: int = 0 self.all_states: List[SimState] = [] self.events: List = [] self.input_variables: List = [] self.actions: List = [] self.final_states: List[SimState] = [] if state: self.all_states.append(state) self.state = state
def __hash__(self) -> int: return hash(self.key) def __eq__(self, o): return ( type(self) == type(o) and self.key == o.key # pylint:disable=unidiomatic-typecheck and self.addr == o.addr and self.state == o.state and self.actions == o.actions and self.events == o.events and self.narrowing_times == o.narrowing_times and self.all_states == o.all_states and self.widened_state == o.widened_state and self.input_variables == o.input_variables ) def __repr__(self): s = f"VFGNode[{self.addr:#x}] <{repr(self.key)}>" return s
[docs] def append_state(self, s, is_widened_state=False): """ Appended a new state to this VFGNode. :param s: The new state to append :param is_widened_state: Whether it is a widened state or not. """ if not is_widened_state: self.all_states.append(s) self.state = s else: self.widened_state = s
[docs]class VFG(ForwardAnalysis[SimState, VFGNode, VFGJob, BlockID], Analysis): # pylint:disable=abstract-method """ This class represents a control-flow graph with static analysis result. Perform abstract interpretation analysis starting from the given function address. The output is an invariant at the beginning (or the end) of each basic block. Steps: - Generate a CFG first if CFG is not provided. - Identify all merge points (denote the set of merge points as Pw) in the CFG. - Cut those loop back edges (can be derived from Pw) so that we gain an acyclic CFG. - Identify all variables that are 1) from memory loading 2) from initial values, or 3) phi functions. Denote the set of those variables as S_{var}. - Start real AI analysis and try to compute a fix point of each merge point. Perform widening/narrowing only on variables \\in S_{var}. """ # TODO: right now the graph traversal method is not optimal. A new solution is needed to minimize the iteration we # TODO: access each node in the graph
[docs] def __init__( self, cfg: Optional[CFGEmulated] = None, context_sensitivity_level: int = 2, start: Optional[int] = None, function_start: Optional[int] = None, interfunction_level: int = 0, initial_state: Optional["SimState"] = None, avoid_runs: Optional[List[int]] = None, remove_options: Optional[Set[str]] = None, timeout: Optional[int] = None, max_iterations_before_widening: int = 8, max_iterations: int = 40, widening_interval: int = 3, final_state_callback: Optional[Callable[["SimState", CallStack], Any]] = None, status_callback: Optional[Callable[["VFG"], Any]] = None, record_function_final_states: bool = False, ) -> None: """ :param cfg: The control-flow graph to base this analysis on. If none is provided, we will construct a CFGEmulated. :param context_sensitivity_level: The level of context-sensitivity of this VFG. It ranges from 0 to infinity. Default 2. :param function_start: The address of the function to analyze. :param interfunction_level: The level of interfunction-ness to be :param initial_state: A state to use as the initial one :param avoid_runs: A list of runs to avoid :param remove_options: State options to remove from the initial state. It only works when `initial_state` is None :param int timeout: :param final_state_callback: callback function when countering final state :param status_callback: callback function used in _analysis_core_baremetal """ ForwardAnalysis.__init__( self, order_jobs=True, allow_merging=True, allow_widening=True, status_callback=status_callback ) # type:ignore # Related CFG. # We can still perform analysis if you don't specify a CFG. But providing a CFG may give you better result. self._cfg = cfg # Where to start the analysis self._start: int = start if start is not None else self.project.entry self._function_start: int = function_start if function_start is not None else self._start # Other parameters self._avoid_runs: List[int] = [] if avoid_runs is None else avoid_runs self._context_sensitivity_level = context_sensitivity_level self._interfunction_level = interfunction_level self._state_options_to_remove = set() if remove_options is None else remove_options self._timeout = timeout self._start_at_function = self._start == self._function_start self._initial_state = initial_state self._max_iterations_before_widening = max_iterations_before_widening self._max_iterations = max_iterations self._widening_interval = widening_interval self._final_state_callback = final_state_callback self._record_function_final_states = record_function_final_states self._nodes: Dict[BlockID, VFGNode] = {} # all the vfg nodes, keyed on block IDs self._normal_states: Dict[ BlockID, SimState ] = {} # Last available state for each program point without widening self._widened_states: Dict[BlockID, SimState] = {} # States on which widening has occurred # Initial states of each function, which is context sensitive # It maps function key to its states self._function_initial_states: DefaultDict[int, Dict[int, SimState]] = defaultdict(dict) # Final states of each function, right after `ret` is called. Also context sensitive. # even if a function may have multiple return sites, as long as they all return to the same place, there is # only one final state of that function. self._function_final_states: DefaultDict[int, Dict[int, SimState]] = defaultdict(dict) # All final states are put in this list self.final_states: List[SimState] = [] self._state_initialization_map: DefaultDict[int, List[Tuple[int, int]]] = defaultdict(list) self._exit_targets: DefaultDict[Tuple[Union[int, None], ...], List[Tuple[BlockID, str]]] = defaultdict( list ) # A dict to log edges and the jumpkind between each basic block # A dict to record all blocks that returns to a specific address self._return_target_sources: DefaultDict[int, List[int]] = defaultdict(list) self._pending_returns: Dict[BlockID, PendingJob] = {} self._thumb_addrs: Set[int] = set() # set of all addresses that are code in thumb mode self._final_address: Optional[ int ] = None # Address of the very last instruction. The analysis is terminated there. self._function_merge_points: Dict[int, List[int]] = {} self._function_widening_points: Dict[int, List[int]] = {} self._function_node_addrs: Dict[int, List[int]] = {} # sorted in reverse post-order self._mergeable_plugins = ("memory", "registers") self._task_stack: List[FunctionAnalysis] = [] self._tracing_times: DefaultDict[BlockID, int] = defaultdict(int) # counters for debugging self._execution_counter: DefaultDict[BlockID, int] = defaultdict(int) # Start analysis self._analyze()
# # Internal properties # @property def _current_function_address(self): return self._task_stack[-1].function_address @property def _top_task(self) -> Optional[FunctionAnalysis]: """ Get the first task in the stack. :return: The top task in the stack, or None if the stack is empty. :rtype: FunctionAnalysis """ return self._task_stack[-1] if len(self._task_stack) != 0 else None @property def _top_function_analysis_task(self): """ Get the first FunctionAnalysis task in the stack. :return: The top function analysis task in the stack, or None if there isn't any. :rtype: FunctionAnalysis """ for r in reversed(self._task_stack): if isinstance(r, FunctionAnalysis): return r return None @property def function_initial_states(self): return self._function_initial_states @property def function_final_states(self): return self._function_final_states # # Public methods #
[docs] def get_any_node(self, addr: int) -> Optional[VFGNode]: """ Get any VFG node corresponding to the basic block at @addr. Note that depending on the context sensitivity level, there might be multiple nodes corresponding to different contexts. This function will return the first one it encounters, which might not be what you want. """ for n in self.graph.nodes(): if n.addr == addr: return n return None
[docs] def get_all_nodes(self, addr) -> Generator[VFGNode, None, None]: for n in self.graph.nodes(): if n.addr == addr: yield n
[docs] def irsb_from_node(self, node): return self.project.factory.successors(node.state, addr=node.addr)
# # Operations #
[docs] def copy(self): new_vfg = VFG(self.project) # type:ignore new_vfg._cfg = self._cfg new_vfg._graph = networkx.DiGraph(self.graph) new_vfg._nodes = self._nodes.copy() new_vfg._exit_targets = defaultdict(list, self._exit_targets) return new_vfg
# Pickling helpers def __setstate__(self, s): self.__dict__.update(s) def __getstate__(self): return dict(self.__dict__) # # Main analysis routines, mostly overriding methods of ForwardAnalysis # def _pre_analysis(self) -> None: """ Executed before analysis starts. Necessary initializations are performed here. :return: None """ l.debug("Starting from %#x", self._start) # initialize the task stack self._task_stack = [] # initialize the execution counter dict self._execution_counter = defaultdict(int) # Generate a CFG if no CFG is provided if not self._cfg: l.debug("Generating a CFG, since none was given...") # TODO: can we use a fast CFG instead? note that fast CFG does not care of context sensitivity at all, but # TODO: for state merging, we also don't really care about context sensitivity. self._cfg: CFGEmulated = self.project.analyses[CFGEmulated].prep()( context_sensitivity_level=self._context_sensitivity_level, starts=(self._start,) ) if not self._cfg.normalized: l.warning( "The given CFG is not normalized, which might impact the performance/accuracy of the VFG analysis." ) # Prepare the state initial_state = self._prepare_initial_state(self._start, self._initial_state) initial_state.ip = self._start if self.project.arch.name.startswith("MIPS"): initial_state.regs.t9 = self._start # clear function merge points cache self._function_merge_points = {} # Create the initial state state = initial_state.copy() if self._start_at_function: # set the return address to an address so we can catch it and terminate the VSA analysis # TODO: Properly pick an address that will not conflict with any existing code and data in the program self._final_address = 0x4FFF0000 self._set_return_address(state, self._final_address) call_stack = None if not self._start_at_function: # we should build a custom call stack call_stack = CallStack() call_stack = call_stack.call(None, self._function_start, retn_target=self._final_address) job = VFGJob( state.addr, state, self._context_sensitivity_level, jumpkind="Ijk_Boring", final_return_address=self._final_address, call_stack=call_stack, ) block_id = BlockID.new(state.addr, job.get_call_stack_suffix(), job.jumpkind) job._block_id = block_id self._insert_job(job) # create the task function_analysis_task = FunctionAnalysis(self._function_start, self._final_address) function_analysis_task.jobs.append(job) self._task_stack.append(function_analysis_task) def _job_sorting_key(self, job): """ Get the sorting key of a VFGJob instance. :param VFGJob job: the VFGJob object. :return: An integer that determines the order of this job in the queue. :rtype: int """ MAX_BLOCKS_PER_FUNCTION = 1000000 task_functions = list( reversed(list(task.function_address for task in self._task_stack if isinstance(task, FunctionAnalysis))) ) try: function_pos = task_functions.index(job.func_addr) except ValueError: # not in the list # it might be because we followed the wrong path, or there is a bug in the traversal algorithm # anyways, do it first l.warning("Function address %#x is not found in task stack.", job.func_addr) return 0 try: block_in_function_pos = self._ordered_node_addrs(job.func_addr).index(job.addr) except ValueError: # block not found. what? block_in_function_pos = min(job.addr - job.func_addr, MAX_BLOCKS_PER_FUNCTION - 1) return block_in_function_pos + MAX_BLOCKS_PER_FUNCTION * function_pos # return self._cfg.get_topological_order(self._cfg.get_node(job.block_id)) def _job_key(self, job: VFGJob) -> Optional[BlockID]: """ Return the block ID of the job. Two or more jobs owning the same block ID will be merged together. :param VFGJob job: The VFGJob instance. :return: The block ID of the job :rtype: BlockID """ return job.block_id def _pre_job_handling(self, job: VFGJob) -> None: """ Some code executed before actually processing the job. :param VFGJob job: the VFGJob object. :return: None """ # did we reach the final address? if self._final_address is not None and job.addr == self._final_address: # our analysis should be termianted here l.debug("%s is viewed as a final state. Skip.", job) raise AngrSkipJobNotice() l.debug("Handling VFGJob %s", job) if not self._top_task: l.debug("No more tasks available. Skip the job.") raise AngrSkipJobNotice() assert isinstance(self._top_task, FunctionAnalysis) if job not in self._top_task.jobs: # it seems that all jobs of the top task has been done. unwind the task stack # make sure this job is at least recorded somewhere unwind_count = None for i, task in enumerate(reversed(self._task_stack)): if isinstance(task, FunctionAnalysis): if job in task.jobs: # nice unwind_count = i if unwind_count is None: l.debug("%s is not recorded. Skip the job.", job) raise AngrSkipJobNotice() else: # unwind the stack till the target, unless we see any pending jobs for each new top task for i in range(unwind_count): if isinstance(self._top_task, FunctionAnalysis): # are there any pending job belonging to the current function that we should handle first? pending_job_key = self._get_pending_job(self._top_task.function_address) if pending_job_key is not None: # ah there is # analyze it first self._trace_pending_job(pending_job_key) l.debug( "A pending job is found for function %#x. Delay %s.", self._top_task.function_address, job, ) raise AngrDelayJobNotice() task = self._task_stack.pop() if not task.done: l.warning("Removing an unfinished task %s. Might be a bug.", task) assert job in self._top_task.jobs # check if this is considered to be a final state if self._final_state_callback is not None and self._final_state_callback(job.state, job.call_stack): l.debug("%s.state is considered as a final state. Skip the job.", job) self.final_states.append(job.state) raise AngrSkipJobNotice() # increment the execution counter self._execution_counter[job.addr] += 1 self._top_task.jobs.remove(job) # set up some essential variables and parameters job.call_stack_suffix = job.get_call_stack_suffix() # type:ignore job.jumpkind = "Ijk_Boring" if job.state.history.jumpkind is None else job.state.history.jumpkind src_block_id = job.src_block_id src_exit_stmt_idx = job.src_exit_stmt_idx addr = job.state.solver.eval(job.state.regs.ip) input_state = job.state block_id = BlockID.new(addr, job.call_stack_suffix, job.jumpkind) if self._tracing_times[block_id] > self._max_iterations: l.debug("%s has been traced too many times. Skip", job) raise AngrSkipJobNotice() self._tracing_times[block_id] += 1 if block_id not in self._nodes: vfg_node = VFGNode(addr, block_id, state=input_state) self._nodes[block_id] = vfg_node else: vfg_node = self._nodes[block_id] job.vfg_node = vfg_node # log the current state vfg_node.state = input_state # Execute this basic block with input state, and get a new SimSuccessors instance # unused result var is `error_occured` job.sim_successors, _, restart_analysis = self._get_simsuccessors(input_state, addr) if restart_analysis: # We should restart the analysis because of something must be changed in the very initial state raise AngrVFGRestartAnalysisNotice() if job.sim_successors is None: # Ouch, we cannot get the SimSuccessors for some reason # Skip this guy l.debug("Cannot create SimSuccessors for %s. Skip.", job) raise AngrSkipJobNotice() self._graph_add_edge(src_block_id, block_id, jumpkind=job.jumpkind, src_exit_stmt_idx=src_exit_stmt_idx) def _get_successors(self, job: VFGJob) -> List[SimState]: # Extract initial values state = job.state addr = job.addr # Obtain successors if addr not in self._avoid_runs: assert job.sim_successors is not None all_successors: List["SimState"] = ( job.sim_successors.flat_successors + job.sim_successors.unconstrained_successors ) else: all_successors = [] assert job.vfg_node is not None # save those states if job.vfg_node.final_states is None: job.vfg_node.final_states = [] job.vfg_node.final_states.extend(all_successors) # Update thumb_addrs assert job.sim_successors if job.sim_successors.sort == "IRSB" and state.thumb: self._thumb_addrs.update(job.sim_successors.artifacts["insn_addrs"]) if not all_successors: if job.sim_successors.sort == "SimProcedure" and isinstance( job.sim_successors.artifacts["procedure"], SIM_PROCEDURES["stubs"]["PathTerminator"] ): # If there is no valid exit in this branch and it's not # intentional (e.g. caused by a SimProcedure that does not # do_return) , we should make it return to its callsite. # However, we don't want to use its state as it might be # corrupted. Just create a link in the exit_targets map. retn_target = job.call_stack.current_return_target if retn_target is not None: new_call_stack = job.call_stack_copy() exit_target_tpl = new_call_stack.stack_suffix(self._context_sensitivity_level) + (retn_target,) self._exit_targets[job.call_stack_suffix + (addr,)].append((exit_target_tpl, "Ijk_Ret")) else: # This is intentional. We shall remove all the pending returns generated before along this path. self._remove_pending_return(job, self._pending_returns) # If this is a call exit, we shouldn't put the default exit (which # is artificial) into the CFG. The exits will be Ijk_Call and # Ijk_FakeRet, and Ijk_Call always goes first job.is_call_jump = any([self._is_call_jumpkind(i.history.jumpkind) for i in all_successors]) call_targets = [] for succ in all_successors: if self._is_call_jumpkind(succ.history.jumpkind): try: call_targets.append(succ.solver.eval_one(succ._ip)) # finding one is enough! break except SimValueError: # this call has multiple possible targets. ignore it pass job.call_target = None if not call_targets else call_targets[0] job.is_return_jump = len(all_successors) and all_successors[0].history.jumpkind == "Ijk_Ret" if job.is_call_jump: # create the call task # TODO: correctly fill the return address call_task = CallAnalysis(job.addr, None, [], mergeable_plugins=self._mergeable_plugins) self._task_stack.append(call_task) job.call_task = call_task return all_successors def _handle_successor( self, job: VFGJob, successor: SimState, all_successors: List[SimState] ) -> List[Union[VFGJob, Any]]: # pylint:disable=arguments-renamed """ Process each successor generated by the job, and return a new list of succeeding jobs. :param VFGJob job: The VFGJob instance. :param SimState successor: The succeeding state. :param list all_successors: A list of all successors. :return: A list of newly created jobs from the successor. :rtype: list """ # Initialize parameters addr = job.addr jumpkind = successor.history.jumpkind # # Get instruction pointer # if job.is_return_jump: ret_target = job.call_stack.current_return_target if ret_target is None: # We have no where to go according to our call stack. However, the callstack might be corrupted l.debug("According to the call stack, we have nowhere to return to.") return [] successor.ip = ret_target # this try-except block is to handle cases where the instruction pointer is symbolic try: successor_addrs = successor.solver.eval_upto(successor.ip, 2) except SimValueError: # TODO: Should fall back to reading targets from CFG # It cannot be concretized currently. Maybe we could handle # it later, maybe it just cannot be concretized return [] if len(successor_addrs) > 1: # multiple concrete targets if job.is_return_jump: # It might be caused by state merging # We may retrieve the correct ip from call stack successor.ip = job.call_stack.current_return_target else: return self._handle_successor_multitargets(job, successor, all_successors) # Now there should be one single target for the successor successor_addr = successor.solver.eval_one(successor.ip) # Get the fake ret successor fakeret_successor = None if self._is_call_jumpkind(jumpkind): fakeret_successor = all_successors[-1] # If the function we're calling into doesn't return, we should discard it if self._cfg is not None: func = self.kb.functions.function(addr=job.call_target) if func is not None and func.returning is False and len(all_successors) == 2: del all_successors[-1] fakeret_successor = None if self._is_call_jumpkind(jumpkind): # Create a new call stack for the successor new_call_stack = self._create_callstack(job, successor_addr, jumpkind, fakeret_successor) if new_call_stack is None: l.debug("Cannot create a new callstack for address %#x", successor_addr) job.dbg_exit_status[successor] = "" return [] new_call_stack_suffix = new_call_stack.stack_suffix(self._context_sensitivity_level) new_function_key = FunctionKey.new(successor_addr, new_call_stack_suffix) # Save the initial state for the function self._save_function_initial_state(new_function_key, successor_addr, successor.copy()) # bail out if we hit the interfunction_level cap if len(job.call_stack) >= self._interfunction_level: l.debug( "We are not tracing into a new function %#08x as we hit interfunction_level limit", successor_addr ) # mark it as skipped job.dbg_exit_status[successor] = "Skipped" job.call_skipped = True job.call_function_key = new_function_key job.call_task.skipped = True return [] elif jumpkind == "Ijk_Ret": # Pop the current function out from the call stack new_call_stack = self._create_callstack(job, successor_addr, jumpkind, fakeret_successor) if new_call_stack is None: l.debug("Cannot create a new callstack for address %#x", successor_addr) job.dbg_exit_status[successor] = "" return [] new_call_stack_suffix = new_call_stack.stack_suffix(self._context_sensitivity_level) else: new_call_stack = job.call_stack new_call_stack_suffix = job.call_stack_suffix # Generate the new block ID new_block_id = BlockID.new(successor_addr, new_call_stack_suffix, jumpkind) # # Generate new VFG jobs # if jumpkind == "Ijk_Ret": assert not job.is_call_jump # Record this return self._return_target_sources[successor_addr].append(job.call_stack_suffix + (addr,)) # Check if this return is inside our pending returns list if new_block_id in self._pending_returns: del self._pending_returns[new_block_id] # Check if we have reached a fix-point if jumpkind != "Ijk_FakeRet" and new_block_id in self._nodes: last_state = self._nodes[new_block_id].state _, _, merged = last_state.merge(successor, plugin_whitelist=self._mergeable_plugins) if merged: l.debug("%s didn't reach a fix-point", new_block_id) else: l.debug("%s reaches a fix-point.", new_block_id) job.dbg_exit_status[successor] = "Merged due to reaching a fix-point" return [] new_jobs = self._create_new_jobs(job, successor, new_block_id, new_call_stack) return new_jobs def _handle_successor_multitargets(self, job, successor, all_successors): """ Generate new jobs for all possible successor targets when there are more than one possible concrete value for successor.ip :param VFGJob job: The VFGJob instance. :param SimState successor: The succeeding state. :param list all_successors: All succeeding states from the same VFGJob. :return: A list of new succeeding jobs :rtype: list """ new_jobs = [] # Currently we assume a legit jumping target cannot have more than 256 concrete values # TODO: make it a setting on VFG MAX_NUMBER_OF_CONCRETE_VALUES = 256 all_possible_ips = successor.solver.eval_upto(successor.ip, MAX_NUMBER_OF_CONCRETE_VALUES + 1) if len(all_possible_ips) > MAX_NUMBER_OF_CONCRETE_VALUES: l.warning( "IP can be concretized to more than %d values, which means it might be corrupted.", MAX_NUMBER_OF_CONCRETE_VALUES, ) return [] # Call this function to generate a successor for each possible IP for ip in all_possible_ips: concrete_successor = successor.copy() concrete_successor.ip = ip concrete_jobs = self._handle_successor(job, concrete_successor, all_successors) if job.is_call_jump: # TODO: take care of syscalls for new_job in concrete_jobs: # TODO: correctly fill the return address. The return address can be found from the # TODO: fakeret successor in the `successors` list function_analysis_task = FunctionAnalysis(new_job.addr, None) # log the new job function_analysis_task.jobs.append(new_job) # put it onto the stack self._task_stack.append(function_analysis_task) # log it in the call_task job.call_task.register_function_analysis(function_analysis_task) new_jobs.extend(concrete_jobs) return new_jobs def _post_job_handling( self, job: VFGJob, new_jobs: List[Union[VFGJob, Any]], successors: List[SimState] ) -> None: # pylint:disable=unused-argument # Debugging output if l.level == logging.DEBUG: self._post_job_handling_debug(job, successors) # pop all finished tasks from the task stack pending_task_func_addrs = {k.func_addr for k in self._pending_returns} while True: task = self._top_task if task is None: # the task stack is empty break if isinstance(task, CallAnalysis): # the call never returns if task.skipped: l.debug("Calls from %s are skipped.", task) else: l.debug("%s never returns.", task) self._task_stack.pop() else: if not task.done or task.function_address in pending_task_func_addrs: break l.debug("%s is finished.", task) self._task_stack.pop() # the next guy *might be* a call analysis task task = self._top_task if isinstance(task, CallAnalysis): if task.done: # awesome! # pop it from the task stack self._task_stack.pop() if task._final_jobs: # merge all jobs, and create a new job new_job = task.merge_jobs() # register the job to the top task self._top_task.jobs.append(new_job) # insert the job self._insert_job(new_job) # if not new_jobs: # # task stack is empty # self.final_states.append(job.state) def _intra_analysis(self) -> None: pass def _merge_jobs(self, *jobs: VFGJob): l.debug("Merging jobs %s", jobs) # there should not be more than two jobs being merged at the same time assert len(jobs) == 2 addr = jobs[0].addr if self.project.is_hooked(addr) and self.project.hooked_by(addr).is_continuation: raise AngrJobMergingFailureNotice() # update jobs for job in jobs: if job in self._top_function_analysis_task.jobs: self._top_function_analysis_task.jobs.remove(job) state_0 = jobs[0].state state_1 = jobs[1].state merged_state, _ = self._merge_states(state_0, state_1) new_job = VFGJob( jobs[0].addr, merged_state, self._context_sensitivity_level, jumpkind=jobs[0].jumpkind, block_id=jobs[0].block_id, call_stack=jobs[0].call_stack, src_block_id=jobs[0].src_block_id, src_exit_stmt_idx=jobs[0].src_exit_stmt_idx, src_ins_addr=jobs[0].src_ins_addr, ) self._top_function_analysis_task.jobs.append(new_job) return new_job def _should_widen_jobs(self, *jobs): """ :param iterable jobs: :return: True if should widen, False otherwise :rtype: bool """ job_0: VFGJob _: VFGJob job_0, _ = jobs[-2:] # pylint:disable=unbalanced-tuple-unpacking addr = job_0.addr if addr not in self._widening_points(job_0.func_addr): return False tracing_times = self._tracing_times[job_0.block_id] if tracing_times > self._max_iterations_before_widening and tracing_times % self._widening_interval == 0: return True return False def _widen_jobs(self, *jobs: VFGJob): """ :param iterable jobs: :return: """ job_0: VFGJob job_1: VFGJob job_0, job_1 = jobs[-2:] # pylint:disable=unbalanced-tuple-unpacking assert self._top_function_analysis_task is not None # update jobs for job in jobs: if job in self._top_function_analysis_task.jobs: self._top_function_analysis_task.jobs.remove(job) l.debug("Widening %s", job_1) new_state, _ = self._widen_states(job_0.state, job_1.state) new_job = VFGJob( jobs[0].addr, new_state, self._context_sensitivity_level, jumpkind=jobs[0].jumpkind, block_id=jobs[0].block_id, call_stack=jobs[0].call_stack, src_block_id=jobs[0].src_block_id, src_exit_stmt_idx=jobs[0].src_exit_stmt_idx, src_ins_addr=jobs[0].src_ins_addr, ) self._top_function_analysis_task.jobs.append(new_job) return new_job def _job_queue_empty(self) -> None: if self._pending_returns: # We don't have any paths remaining. Let's pop a previously-missing return to # process top_task: FunctionAnalysis = self._top_task func_addr = top_task.function_address pending_ret_key = self._get_pending_job(func_addr) if pending_ret_key is None: # analysis of the current function is somehow terminated # we have to rewind the stack, and try the function that calls the current function l.debug("No pending return for the current function %#x. Unwind the stack.", func_addr) if not self._top_function_analysis_task.done: l.warning( "The top function analysis task is not done yet. This might be a bug. Please report to Fish." ) # stack unwinding while True: s = self._task_stack.pop() if isinstance(s, CallAnalysis): break return self._job_queue_empty() self._trace_pending_job(pending_ret_key) l.debug("Tracing a missing return %s", repr(pending_ret_key)) return None def _post_analysis(self) -> None: pass # # State widening, merging, and narrowing # def _merge_states(self, *states: SimState): """ Merge two given states, and return a new one. :param states: All states to merge. :returns: The merged state, and whether a merging has occurred """ # print old_state.dbg_print_stack() # print new_state.dbg_print_stack() merged = states[0] merging_occurred = False for state in states[1:]: merged, _, merging_occurred_ = merged.merge(state, plugin_whitelist=self._mergeable_plugins) merging_occurred |= merging_occurred_ # print "Merged: " # print merged_state.dbg_print_stack() return merged, merging_occurred @staticmethod def _widen_states(old_state, new_state): """ Perform widen operation on the given states, and return a new one. :param old_state: :param new_state: :returns: The widened state, and whether widening has occurred """ # print old_state.dbg_print_stack() # print new_state.dbg_print_stack() l.debug("Widening state at IP %s", old_state.ip) widened_state, widening_occurred = old_state.widen(new_state) # print "Widened: " # print widened_state.dbg_print_stack() return widened_state, widening_occurred @staticmethod def _narrow_states( node, old_state, new_state, previously_widened_state ): # pylint:disable=unused-argument,no-self-use """ Try to narrow the state! :param old_state: :param new_state: :param previously_widened_state: :returns: The narrowed state, and whether a narrowing has occurred """ l.debug("Narrowing state at IP %s", previously_widened_state.ip) s = previously_widened_state.copy() narrowing_occurred = False # TODO: Finish the narrowing logic return s, narrowing_occurred # # Helper methods # def _prepare_initial_state(self, function_start: int, state: Optional[SimState]) -> SimState: """ Get the state to start the analysis for function. :param int function_start: Address of the function :param SimState state: The program state to base on. """ if state is None: state = self.project.factory.blank_state(mode="static", remove_options=self._state_options_to_remove) assert state is not None # make room for arguments passed to the function sp = state.regs.sp sp_val = state.solver.eval_one(sp) state.memory.set_stack_address_mapping(sp_val, state.memory.stack_id(function_start) + "_pre", 0) state.registers.store("sp", sp - 0x100) # Set the stack address mapping for the initial stack state.memory.set_stack_size(state.arch.stack_size) initial_sp = state.solver.eval( state.regs.sp ) # FIXME: This is bad, as it may lose tracking of multiple sp values initial_sp -= state.arch.bytes state.memory.set_stack_address_mapping(initial_sp, state.memory.stack_id(function_start), function_start) return state def _set_return_address(self, state: "SimState", ret_addr: int) -> None: """ Set the return address of the current state to a specific address. We assume we are at the beginning of a function, or in other words, we are about to execute the very first instruction of the function. :param SimState state: The program state :param int ret_addr: The return address :return: None """ # TODO: the following code is totally untested other than X86 and AMD64. Don't freak out if you find bugs :) # TODO: Test it ret_bvv = state.solver.BVV(ret_addr, self.project.arch.bits) if self.project.arch.name in ("X86", "AMD64"): state.stack_push(ret_bvv) elif is_arm_arch(self.project.arch): state.regs.lr = ret_bvv elif self.project.arch.name in ("MIPS32", "MIPS64"): state.regs.ra = ret_bvv elif self.project.arch.name in ("PPC32", "PPC64"): state.regs.lr = ret_bvv else: l.warning( "Return address cannot be set for architecture %s. Please add corresponding logic to " "VFG._set_return_address().", self.project.arch.name, ) def _create_graph(self, return_target_sources=None): """ Create a DiGraph out of the existing edge map. :param return_target_sources: Used for making up those missing returns :returns: A networkx.DiGraph() object """ if return_target_sources is None: # We set it to a defaultdict in order to be consistent with the # actual parameter. return_target_sources = defaultdict(list) cfg = networkx.DiGraph() # The corner case: add a node to the graph if there is only one block if len(self._nodes) == 1: cfg.add_node(self._nodes[next(iter(self._nodes.keys()))]) # Adding edges for tpl, targets in self._exit_targets.items(): basic_block = self._nodes[tpl] # Cannot fail :) for ex, jumpkind in targets: if ex in self._nodes: target_bbl = self._nodes[ex] cfg.add_edge(basic_block, target_bbl, jumpkind=jumpkind) # Add edges for possibly missing returns if basic_block.addr in return_target_sources: for src_irsb_key in return_target_sources[basic_block.addr]: cfg.add_edge(self._nodes[src_irsb_key], basic_block, jumpkind="Ijk_Ret") else: # Debugging output def addr_formalize(addr): if addr is None: return "None" else: return "%#08x" % addr s = "([" for addr in ex[:-1]: s += addr_formalize(addr) + ", " s += "] %s)" % addr_formalize(ex[-1]) l.warning("Key %s does not exist.", s) return cfg # # DiGraph manipulation # def _graph_get_node(self, block_id: BlockID, terminator_for_nonexistent_node: bool = False) -> VFGNode: """ Get an existing VFGNode instance from the graph. :param BlockID block_id: The block ID for the node to get. :param bool terminator_for_nonexistent_node: True if a Terminator (which is a SimProcedure stub) should be created when there is no existing node available for the given block ID. :return: A node in the graph, or None. :rtype: VFGNode """ if block_id not in self._nodes: l.error("Trying to look up a node that we don't have yet. Is this okay????") if not terminator_for_nonexistent_node: return None # Generate a PathTerminator node addr = block_id.addr func_addr = block_id.func_addr if func_addr is None: # We'll have to use the current block address instead # TODO: Is it really OK? func_addr = addr input_state = self.project.factory.entry_state() input_state.ip = addr pt = VFGNode(addr, block_id, input_state) self._nodes[block_id] = pt if isinstance(self.project.arch, archinfo.ArchARM) and addr % 2 == 1: self._thumb_addrs.add(addr) self._thumb_addrs.add(addr - 1) l.debug("Block ID %s does not exist. Create a PathTerminator instead.", repr(block_id)) return self._nodes[block_id] def _graph_add_edge(self, src_block_id: Optional[BlockID], dst_block_id: BlockID, **kwargs) -> None: """ Add an edge onto the graph. :param BlockID src_block_id: The block ID for source node. :param BlockID dst_block_id: The block Id for destination node. :param str jumpkind: The jumpkind of the edge. :param exit_stmt_idx: ID of the statement in the source IRSB where this edge is created from. 'default' refers to the default exit. :return: None """ dst_node = self._graph_get_node(dst_block_id, terminator_for_nonexistent_node=True) if src_block_id is None: self.graph.add_node(dst_node) else: src_node = self._graph_get_node(src_block_id, terminator_for_nonexistent_node=True) self.graph.add_edge(src_node, dst_node, **kwargs) # # Other methods # def _get_simsuccessors(self, state: SimState, addr: int) -> Tuple[SimSuccessors, bool, bool]: error_occured = False restart_analysis = False jumpkind = "Ijk_Boring" if state.history.jumpkind: jumpkind = state.history.jumpkind try: node = self._cfg.model.get_any_node(addr) num_inst = None if node is None else len(node.instruction_addrs) sim_successors = self.project.factory.successors(state, jumpkind=jumpkind, num_inst=num_inst) except SimIRSBError as ex: # It's a tragedy that we came across some instructions that VEX # does not support. I'll create a terminating stub there l.error("SimIRSBError occurred(%s). Creating a PathTerminator.", ex) error_occured = True inst = SIM_PROCEDURES["stubs"]["PathTerminator"](state) sim_successors = ProcedureEngine().process(state, procedure=inst) except claripy.ClaripyError: l.error("ClaripyError: ", exc_info=True) error_occured = True # Generate a PathTerminator to terminate the current path inst = SIM_PROCEDURES["stubs"]["PathTerminator"](state) sim_successors = ProcedureEngine().process(state, procedure=inst) except SimError: l.error("SimError: ", exc_info=True) error_occured = True # Generate a PathTerminator to terminate the current path inst = SIM_PROCEDURES["stubs"]["PathTerminator"](state) sim_successors = ProcedureEngine().process(state, procedure=inst) except AngrError as ex: # segment = self.project.loader.main_object.in_which_segment(addr) l.error("AngrError %s when generating SimSuccessors at %#x", ex, addr, exc_info=True) # We might be on a wrong branch, and is likely to encounter the # "No bytes in memory xxx" exception # Just ignore it error_occured = True sim_successors = None return sim_successors, error_occured, restart_analysis def _create_new_jobs( self, job: VFGJob, successor: "SimState", new_block_id: BlockID, new_call_stack: CallStack ) -> List[Union[VFGJob, Any]]: """ Create a list of new VFG jobs for the successor state. :param VFGJob job: The VFGJob instance. :param SimState successor: The succeeding state. :param BlockID new_block_id: Block ID for the new VFGJob :param new_call_stack: The new callstack. :return: A list of newly created VFG jobs. :rtype: list """ # TODO: basic block stack is probably useless jumpkind = successor.history.jumpkind stmt_idx = successor.scratch.stmt_idx ins_addr = successor.scratch.ins_addr # Make a copy of the state in case we use it later successor_state = successor.copy() successor_addr = successor_state.solver.eval(successor_state.ip) new_jobs = [] if jumpkind == "Ijk_FakeRet": assert job.is_call_jump # This is the default "fake" return successor generated at each call, if and only if the target function # returns. # if the call is skipped (for whatever reason, like we reached the interfunction tracing limit), we use # this FakeRet successor as the final state of the function. Otherwise we save the FakeRet state in case the # callee does not return normally, but don't process them right away. # Clear the useless values (like return addresses, parameters) on stack if needed if self._cfg is not None: current_function = self.kb.functions.function(job.call_target) if current_function is not None: sp_difference = current_function.sp_delta else: sp_difference = 0 arch = successor_state.arch reg_sp_offset = arch.sp_offset reg_sp_expr = ( successor_state.registers.load(reg_sp_offset, size=arch.bytes, endness=arch.register_endness) + sp_difference ) # type: ignore successor_state.registers.store(arch.sp_offset, reg_sp_expr) # Clear the return value with a TOP top_si = successor_state.solver.TSI(arch.bits) successor_state.registers.store(arch.ret_offset, top_si) if job.call_skipped: # TODO: Make sure the return values make sense # if self.project.arch.name == "X86": # successor_state.regs.eax = successor_state.solver.BVS( # "ret_val", 32, min=0, max=0xFFFFFFFF, stride=1 # ) new_job = VFGJob( successor_addr, successor_state, self._context_sensitivity_level, block_id=new_block_id, jumpkind="Ijk_Ret", call_stack=new_call_stack, src_block_id=job.block_id, src_exit_stmt_idx=stmt_idx, src_ins_addr=ins_addr, ) new_jobs.append(new_job) assert isinstance(self._task_stack[-2], FunctionAnalysis) self._task_stack[-2].jobs.append(new_job) job.dbg_exit_status[successor] = "Pending" else: self._pending_returns[new_block_id] = PendingJob( new_block_id, successor_state, new_call_stack, job.block_id, stmt_idx, ins_addr ) # type: ignore job.dbg_exit_status[successor] = "Pending" else: if sim_options.ABSTRACT_MEMORY in successor.options: if self._is_call_jumpkind(successor.history.jumpkind): # If this is a call, we create a new stack address mapping reg_sp_si = self._create_stack_region(successor_state, successor_addr) # Save the new sp register new_reg_sp_expr = successor_state.solver.ValueSet(successor_state.arch.bits, "global", 0, reg_sp_si) successor_state.regs.sp = new_reg_sp_expr elif successor.history.jumpkind == "Ijk_Ret": # Remove the existing stack address mapping # FIXME: Now we are assuming the sp is restored to its original value reg_sp_expr = successor_state.regs.sp if isinstance(reg_sp_expr._model_vsa, claripy.vsa.StridedInterval): # type: ignore reg_sp_si = reg_sp_expr._model_vsa # reg_sp_si.min # reg_sp_val elif isinstance(reg_sp_expr._model_vsa, claripy.vsa.ValueSet): # type: ignore reg_sp_si = next(iter(reg_sp_expr._model_vsa.items()))[1] # type: ignore # reg_sp_si.min # reg_sp_val # TODO: Finish it! new_job = VFGJob( successor_addr, successor_state, self._context_sensitivity_level, block_id=new_block_id, jumpkind=successor_state.history.jumpkind, call_stack=new_call_stack, src_block_id=job.block_id, src_exit_stmt_idx=stmt_idx, src_ins_addr=ins_addr, ) if successor.history.jumpkind == "Ijk_Ret": # it's returning to the return site # save the state as a final state of the function that we are returning from if self._record_function_final_states: # key of the function that we are returning from source_function_key = FunctionKey.new(job.func_addr, job.call_stack_suffix) self._save_function_final_state(source_function_key, job.func_addr, successor_state) # TODO: add an assertion that requires the returning target being the same as the return address we # TODO: stored before current_task = self._top_task if current_task.call_analysis is not None: current_task.call_analysis.add_final_job(new_job) job.dbg_exit_status[successor] = "Appended to the call analysis task" else: job.dbg_exit_status[successor] = "Discarded (no call analysis task)" else: if self._is_call_jumpkind(successor.history.jumpkind): # create a function analysis task # TODO: the return address task = FunctionAnalysis(new_job.addr, None) self._task_stack.append(task) # link it to the call analysis job.call_task.register_function_analysis(task) else: task = self._top_task # register the job to the function task task.jobs.append(new_job) # insert the new job into the new job array new_jobs.append(new_job) job.dbg_exit_status[successor] = "Appended" if not job.is_call_jump or jumpkind != "Ijk_FakeRet": new_target = (new_block_id, jumpkind) else: new_target = (new_block_id, "Ijk_FakeRet") # This is the fake return! self._exit_targets[job.call_stack_suffix + (job.addr,)].append(new_target) return new_jobs def _remove_pending_return(self, job, pending_returns): """ Remove all pending returns that are related to the current job. """ # Build the tuples that we want to remove from the dict fake_func_retn_exits tpls_to_remove = [] call_stack_copy = job.call_stack_copy() while call_stack_copy.current_return_target is not None: ret_target = call_stack_copy.current_return_target # Remove the current call stack frame call_stack_copy = call_stack_copy.ret(ret_target) call_stack_suffix = call_stack_copy.stack_suffix(self._context_sensitivity_level) tpl = call_stack_suffix + (ret_target,) tpls_to_remove.append(tpl) # Remove those tuples from the dict for tpl in tpls_to_remove: if tpl in pending_returns: del pending_returns[tpl] l.debug( "Removed (%s) from FakeExits dict.", ",".join([hex(i) if i is not None else "None" for i in tpl]) ) def _post_job_handling_debug(self, job: VFGJob, successors): """ Print out debugging information after handling a VFGJob and generating the succeeding jobs. :param VFGJob job: The VFGJob instance. :param list successors: A list of succeeding states. :return: None """ func = self.project.loader.find_symbol(job.addr) function_name = func.name if func is not None else None module_name = self.project.loader.find_object_containing(job.addr).provides l.debug( "VFGJob @ %#08x with callstack [ %s ]", job.addr, job.callstack_repr(self.kb), ) l.debug("(Function %s of %s)", function_name, module_name) l.debug("- is call jump: %s", job.is_call_jump) for suc in successors: if suc not in job.dbg_exit_status: l.warning("- %s is not found. FIND OUT WHY.", suc) continue try: l.debug( "- successor: %#08x of %s [%s]", suc.solver.eval_one(suc.ip), suc.history.jumpkind, job.dbg_exit_status[suc], ) except SimValueError: l.debug("- target cannot be concretized. %s [%s]", job.dbg_exit_status[suc], suc.history.jumpkind) l.debug("Remaining/pending jobs: %d/%d", len(self._job_info_queue), len(self._pending_returns)) l.debug("Remaining jobs: %s", ["%s %d" % (ent.job, id(ent.job)) for ent in self._job_info_queue]) l.debug("Task stack: %s", self._task_stack) @staticmethod def _is_call_jumpkind(jumpkind: str) -> bool: if jumpkind == "Ijk_Call" or jumpkind.startswith("Ijk_Sys_"): return True return False @staticmethod def _is_return_jumpkind(jumpkind): return jumpkind in ("Ijk_Ret", "Ijk_FakeRet") @staticmethod def _create_stack_region(successor_state: SimState, successor_ip: int) -> StridedInterval: arch = successor_state.arch reg_sp_offset = arch.sp_offset reg_sp_expr = successor_state.registers.load(reg_sp_offset, size=arch.bytes, endness=arch.register_endness) if type(reg_sp_expr._model_vsa) is claripy.BVV: # pylint:disable=unidiomatic-typecheck reg_sp_val = successor_state.solver.eval(reg_sp_expr) reg_sp_si = successor_state.solver.SI(to_conv=reg_sp_expr) reg_sp_si = reg_sp_si._model_vsa elif type(reg_sp_expr._model_vsa) is int: # pylint:disable=unidiomatic-typecheck reg_sp_val = reg_sp_expr._model_vsa reg_sp_si = successor_state.solver.SI(bits=successor_state.arch.bits, to_conv=reg_sp_val) reg_sp_si = reg_sp_si._model_vsa elif ( type(reg_sp_expr._model_vsa) is claripy.vsa.StridedInterval ): # pylint:disable=unidiomatic-typecheck # type: ignore reg_sp_si = reg_sp_expr._model_vsa reg_sp_val = reg_sp_si.min else: reg_sp_si = next(iter(reg_sp_expr._model_vsa.items()))[1] reg_sp_val = reg_sp_si.min reg_sp_val = reg_sp_val - arch.bytes # TODO: Is it OK? new_stack_region_id = successor_state.memory.stack_id(successor_ip) successor_state.memory.set_stack_address_mapping(reg_sp_val, new_stack_region_id, successor_ip) return reg_sp_si def _create_callstack( self, job: VFGJob, successor_ip: int, jumpkind: str, fakeret_successor: Optional[SimState] ) -> CallStack: addr = job.addr if self._is_call_jumpkind(jumpkind): new_call_stack = job.call_stack_copy() # Notice that in ARM, there are some freaking instructions # like # BLEQ <address> # It should give us three exits: Ijk_Call, Ijk_Boring, and # Ijk_Ret. The last exit is simulated. # Notice: We assume the last exit is the simulated one if fakeret_successor is None: retn_target_addr = None else: retn_target_addr = fakeret_successor.solver.eval_one(fakeret_successor.ip) # Create call stack new_call_stack = new_call_stack.call(addr, successor_ip, retn_target=retn_target_addr) elif jumpkind == "Ijk_Ret": new_call_stack = job.call_stack_copy() new_call_stack = new_call_stack.ret(successor_ip) else: # Normal control flow transition new_call_stack = job.call_stack return new_call_stack def _save_function_initial_state(self, function_key: FunctionKey, function_address: int, state: SimState) -> None: """ Save the initial state of a function, and merge it with existing ones if there are any. :param FunctionKey function_key: The key to this function. :param int function_address: Address of the function. :param SimState state: Initial state of the function. :return: None """ l.debug("Saving the initial state for function %#08x with function key %s", function_address, function_key) if function_key in self._function_initial_states[function_address]: existing_state = self._function_initial_states[function_address][function_key] merged_state, _, _ = existing_state.merge(state) self._function_initial_states[function_address][function_key] = merged_state else: self._function_initial_states[function_address][function_key] = state def _save_function_final_state(self, function_key, function_address, state): """ Save the final state of a function, and merge it with existing ones if there are any. :param FunctionKey function_key: The key to this function. :param int function_address: Address of the function. :param SimState state: Initial state of the function. :return: None """ l.debug("Saving the final state for function %#08x with function key %s", function_address, function_key) if function_key in self._function_final_states[function_address]: existing_state = self._function_final_states[function_address][function_key] merged_state = existing_state.merge(state, plugin_whitelist=self._mergeable_plugins)[0] self._function_final_states[function_address][function_key] = merged_state else: self._function_final_states[function_address][function_key] = state def _trace_pending_job(self, job_key): pending_job: PendingJob = self._pending_returns.pop(job_key) addr = job_key.addr # Unlike CFG, we will still trace those blocks that have been traced before. In other words, we don't # remove fake returns even if they have been traced - otherwise we cannot come to a fix-point. block_id = BlockID.new(addr, pending_job.call_stack.stack_suffix(self._context_sensitivity_level), "Ijk_Ret") job = VFGJob( addr, pending_job.state, self._context_sensitivity_level, block_id=block_id, jumpkind=pending_job.state.history.jumpkind, call_stack=pending_job.call_stack, src_block_id=pending_job.src_block_id, src_exit_stmt_idx=pending_job.src_stmt_idx, src_ins_addr=pending_job.src_ins_addr, ) self._insert_job(job) self._top_task.jobs.append(job) def _get_pending_job(self, func_addr): pending_ret_key = None k: BlockID for k in self._pending_returns: if k.func_addr == func_addr: pending_ret_key = k break return pending_ret_key def _get_block_addr(self, b): # pylint:disable=R0201 if isinstance(b, SimSuccessors): return b.addr else: raise TypeError("Unsupported block type %s" % type(b)) def _get_nx_paths(self, begin, end): """ Get the possible (networkx) simple paths between two nodes or addresses corresponding to nodes. Input: addresses or node instances Return: a list of lists of nodes representing paths. """ if type(begin) is int and type(end) is int: # pylint:disable=unidiomatic-typecheck n_begin = self.get_any_node(begin) n_end = self.get_any_node(end) elif isinstance(begin, VFGNode) and isinstance(end, VFGNode): # pylint:disable=unidiomatic-typecheck n_begin = begin n_end = end else: raise AngrVFGError("from and to should be of the same type") return networkx.all_simple_paths(self.graph, n_begin, n_end) # type: ignore def _merge_points(self, function_address): """ Return the ordered merge points for a specific function. :param int function_address: Address of the querying function. :return: A list of sorted merge points (addresses). :rtype: list """ # we are entering a new function. now it's time to figure out how to optimally traverse the control flow # graph by generating the sorted merge points try: new_function = self.kb.functions[function_address] except KeyError: # the function does not exist return [] if function_address not in self._function_merge_points: ordered_merge_points = GraphUtils.find_merge_points( function_address, new_function.endpoints, new_function.graph ) self._function_merge_points[function_address] = ordered_merge_points return self._function_merge_points[function_address] def _widening_points(self, function_address): """ Return the ordered widening points for a specific function. :param int function_address: Address of the querying function. :return: A list of sorted merge points (addresses). :rtype: list """ # we are entering a new function. now it's time to figure out how to optimally traverse the control flow # graph by generating the sorted merge points try: new_function = self.kb.functions[function_address] except KeyError: # the function does not exist return [] if function_address not in self._function_widening_points: if not new_function.normalized: new_function.normalize() widening_points = GraphUtils.find_widening_points( function_address, new_function.endpoints, new_function.graph ) self._function_widening_points[function_address] = widening_points return self._function_widening_points[function_address] def _ordered_node_addrs(self, function_address): """ For a given function, return all nodes in an optimal traversal order. If the function does not exist, return an empty list. :param int function_address: Address of the function. :return: A ordered list of the nodes. :rtype: list """ try: function = self.kb.functions[function_address] except KeyError: # the function does not exist return [] if function_address not in self._function_node_addrs: sorted_nodes = GraphUtils.quasi_topological_sort_nodes(function.graph) self._function_node_addrs[function_address] = [n.addr for n in sorted_nodes] return self._function_node_addrs[function_address]
AnalysesHub.register_default("VFG", VFG)