from typing import TYPE_CHECKING, Any, Callable, DefaultDict, Dict, List, Generator, Optional, Set, Tuple, Union
import logging
from collections import defaultdict
import archinfo
from archinfo.arch_arm import is_arm_arch
import claripy
from claripy.vsa.strided_interval import StridedInterval
import networkx
from angr.utils.graph import GraphUtils
from angr.analyses import ForwardAnalysis
from .cfg.cfg_job_base import BlockID, FunctionKey, CFGJobBase
from .. import sim_options
from ..engines.procedure import ProcedureEngine
from ..engines import SimSuccessors
from ..errors import (
AngrDelayJobNotice,
AngrSkipJobNotice,
AngrVFGError,
AngrError,
AngrVFGRestartAnalysisNotice,
AngrJobMergingFailureNotice,
SimValueError,
SimIRSBError,
SimError,
)
from ..procedures import SIM_PROCEDURES
from ..state_plugins.callstack import CallStack
from ..analyses import AnalysesHub
from ..sim_state import SimState
from . import Analysis, CFGEmulated
if TYPE_CHECKING:
from ..knowledge_base import KnowledgeBase
l = logging.getLogger(name=__name__)
[docs]class VFGJob(CFGJobBase):
"""
A job descriptor that contains local variables used during VFG analysis.
"""
[docs] def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self.call_stack_suffix: List = []
self.vfg_node: Optional[VFGNode] = None
self.is_call_jump = None
self.call_target = None
self.dbg_exit_status = {}
self.is_return_jump = None
self.sim_successors: Optional[SimSuccessors] = None
# if this job has a call successor, do we plan to skip the call successor or not
self.call_skipped = False
# if the call is skipped, calling stack of the skipped function is saved in `call_context_key`
self.call_function_key: Optional[FunctionKey] = None
self.call_task: Optional[CallAnalysis] = None
@property
def block_id(self) -> Optional[BlockID]:
return self._block_id
[docs] def callstack_repr(self, kb: "KnowledgeBase"):
s = []
for i in range(0, len(self.call_stack_suffix), 2):
call_site, func_addr = (
self.call_stack_suffix[i],
self.call_stack_suffix[i + 1],
) # pylint:disable=unsubscriptable-object
if func_addr is None:
continue
call_site_str = "%#x" % call_site if call_site is not None else "None"
if func_addr in kb.functions:
s.append(f"{kb.functions[func_addr].name}[{call_site_str}]")
else:
s.append(f"{func_addr:#x}[{call_site_str}]")
return "//".join(s)
[docs]class PendingJob:
"""
Describes a pending job during VFG analysis.
"""
__slots__ = (
"block_id",
"state",
"call_stack",
"src_block_id",
"src_stmt_idx",
"src_ins_addr",
)
[docs] def __init__(
self,
block_id: BlockID,
state: SimState,
call_stack: CallStack,
src_block_id: BlockID,
src_stmt_idx: int,
src_ins_addr: int,
) -> None:
self.block_id = block_id
self.state = state
self.call_stack = call_stack
self.src_block_id = src_block_id
self.src_stmt_idx = src_stmt_idx
self.src_ins_addr = src_ins_addr
[docs]class AnalysisTask:
"""
An analysis task describes a task that should be done before popping this task out of the task stack and discard it.
"""
[docs] def __init__(self) -> None:
pass
@property
def done(self):
raise NotImplementedError()
[docs]class FunctionAnalysis(AnalysisTask):
"""
Analyze a function, generate fix-point states from all endpoints of that function, and then merge them to one state.
"""
[docs] def __init__(self, function_address: int, return_address: Optional[int]) -> None:
super().__init__()
self.function_address = function_address
self.return_address = return_address
self.call_analysis: Optional[AnalysisTask] = None
# tracks all jobs that are live currently
self.jobs = []
def __repr__(self):
s = "<Function @ %#08x with %d jobs>" % (self.function_address, len(self.jobs))
return s
#
# Properties
#
@property
def done(self) -> bool:
return not self.jobs
[docs]class CallAnalysis(AnalysisTask):
"""
Analyze a call by analyze all functions this call might be calling, collect all final states generated by analyzing
those functions, and merge them into one state.
"""
[docs] def __init__(
self,
address: int,
return_address: None,
function_analysis_tasks: Optional[List[Any]] = None,
mergeable_plugins: Optional[Tuple[str, str]] = None,
) -> None:
super().__init__()
self.address = address
self.return_address = return_address
self.function_analysis_tasks = function_analysis_tasks if function_analysis_tasks is not None else []
self._mergeable_plugins = mergeable_plugins
self.skipped = False
self._final_jobs = []
def __repr__(self):
s = "<Call @ %#08x with %d function tasks>" % (self.address, len(self.function_analysis_tasks))
return s
#
# Properties
#
@property
def done(self) -> bool:
for task in self.function_analysis_tasks:
if not task.done:
return False
return True
#
# Public methods
#
[docs] def register_function_analysis(self, task: FunctionAnalysis) -> None:
assert isinstance(task, FunctionAnalysis)
self.function_analysis_tasks.append(task)
task.call_analysis = self
[docs] def add_final_job(self, job: VFGJob) -> None:
self._final_jobs.append(job)
[docs] def merge_jobs(self) -> VFGJob:
assert self._final_jobs
job = self._final_jobs[0]
for other in self._final_jobs[1:]:
job.state = job.state.merge(other.state, plugin_whitelist=self._mergeable_plugins)[0]
return job
[docs]class VFGNode:
"""
A descriptor of nodes in a Value-Flow Graph
"""
[docs] def __init__(self, addr: int, key: BlockID, state: Optional["SimState"] = None) -> None:
"""
Constructor.
:param int addr:
:param BlockID key:
:param SimState state:
"""
self.key = key
self.addr = addr
self.state: Optional[SimState] = None
self.widened_state: Optional[SimState] = None
self.narrowing_times: int = 0
self.all_states: List[SimState] = []
self.events: List = []
self.input_variables: List = []
self.actions: List = []
self.final_states: List[SimState] = []
if state:
self.all_states.append(state)
self.state = state
def __hash__(self) -> int:
return hash(self.key)
def __eq__(self, o):
return (
type(self) == type(o)
and self.key == o.key # pylint:disable=unidiomatic-typecheck
and self.addr == o.addr
and self.state == o.state
and self.actions == o.actions
and self.events == o.events
and self.narrowing_times == o.narrowing_times
and self.all_states == o.all_states
and self.widened_state == o.widened_state
and self.input_variables == o.input_variables
)
def __repr__(self):
s = f"VFGNode[{self.addr:#x}] <{repr(self.key)}>"
return s
[docs] def append_state(self, s, is_widened_state=False):
"""
Appended a new state to this VFGNode.
:param s: The new state to append
:param is_widened_state: Whether it is a widened state or not.
"""
if not is_widened_state:
self.all_states.append(s)
self.state = s
else:
self.widened_state = s
[docs]class VFG(ForwardAnalysis[SimState, VFGNode, VFGJob, BlockID], Analysis): # pylint:disable=abstract-method
"""
This class represents a control-flow graph with static analysis result.
Perform abstract interpretation analysis starting from the given function address. The output is an invariant at
the beginning (or the end) of each basic block.
Steps:
- Generate a CFG first if CFG is not provided.
- Identify all merge points (denote the set of merge points as Pw) in the CFG.
- Cut those loop back edges (can be derived from Pw) so that we gain an acyclic CFG.
- Identify all variables that are 1) from memory loading 2) from initial values, or 3) phi functions. Denote
the set of those variables as S_{var}.
- Start real AI analysis and try to compute a fix point of each merge point. Perform widening/narrowing only on
variables \\in S_{var}.
"""
# TODO: right now the graph traversal method is not optimal. A new solution is needed to minimize the iteration we
# TODO: access each node in the graph
[docs] def __init__(
self,
cfg: Optional[CFGEmulated] = None,
context_sensitivity_level: int = 2,
start: Optional[int] = None,
function_start: Optional[int] = None,
interfunction_level: int = 0,
initial_state: Optional["SimState"] = None,
avoid_runs: Optional[List[int]] = None,
remove_options: Optional[Set[str]] = None,
timeout: Optional[int] = None,
max_iterations_before_widening: int = 8,
max_iterations: int = 40,
widening_interval: int = 3,
final_state_callback: Optional[Callable[["SimState", CallStack], Any]] = None,
status_callback: Optional[Callable[["VFG"], Any]] = None,
record_function_final_states: bool = False,
) -> None:
"""
:param cfg: The control-flow graph to base this analysis on. If none is provided, we will
construct a CFGEmulated.
:param context_sensitivity_level: The level of context-sensitivity of this VFG.
It ranges from 0 to infinity. Default 2.
:param function_start: The address of the function to analyze.
:param interfunction_level: The level of interfunction-ness to be
:param initial_state: A state to use as the initial one
:param avoid_runs: A list of runs to avoid
:param remove_options: State options to remove from the initial state. It only works when `initial_state` is
None
:param int timeout:
:param final_state_callback: callback function when countering final state
:param status_callback: callback function used in _analysis_core_baremetal
"""
ForwardAnalysis.__init__(
self, order_jobs=True, allow_merging=True, allow_widening=True, status_callback=status_callback
) # type:ignore
# Related CFG.
# We can still perform analysis if you don't specify a CFG. But providing a CFG may give you better result.
self._cfg = cfg
# Where to start the analysis
self._start: int = start if start is not None else self.project.entry
self._function_start: int = function_start if function_start is not None else self._start
# Other parameters
self._avoid_runs: List[int] = [] if avoid_runs is None else avoid_runs
self._context_sensitivity_level = context_sensitivity_level
self._interfunction_level = interfunction_level
self._state_options_to_remove = set() if remove_options is None else remove_options
self._timeout = timeout
self._start_at_function = self._start == self._function_start
self._initial_state = initial_state
self._max_iterations_before_widening = max_iterations_before_widening
self._max_iterations = max_iterations
self._widening_interval = widening_interval
self._final_state_callback = final_state_callback
self._record_function_final_states = record_function_final_states
self._nodes: Dict[BlockID, VFGNode] = {} # all the vfg nodes, keyed on block IDs
self._normal_states: Dict[
BlockID, SimState
] = {} # Last available state for each program point without widening
self._widened_states: Dict[BlockID, SimState] = {} # States on which widening has occurred
# Initial states of each function, which is context sensitive
# It maps function key to its states
self._function_initial_states: DefaultDict[int, Dict[int, SimState]] = defaultdict(dict)
# Final states of each function, right after `ret` is called. Also context sensitive.
# even if a function may have multiple return sites, as long as they all return to the same place, there is
# only one final state of that function.
self._function_final_states: DefaultDict[int, Dict[int, SimState]] = defaultdict(dict)
# All final states are put in this list
self.final_states: List[SimState] = []
self._state_initialization_map: DefaultDict[int, List[Tuple[int, int]]] = defaultdict(list)
self._exit_targets: DefaultDict[Tuple[Union[int, None], ...], List[Tuple[BlockID, str]]] = defaultdict(
list
) # A dict to log edges and the jumpkind between each basic block
# A dict to record all blocks that returns to a specific address
self._return_target_sources: DefaultDict[int, List[int]] = defaultdict(list)
self._pending_returns: Dict[BlockID, PendingJob] = {}
self._thumb_addrs: Set[int] = set() # set of all addresses that are code in thumb mode
self._final_address: Optional[
int
] = None # Address of the very last instruction. The analysis is terminated there.
self._function_merge_points: Dict[int, List[int]] = {}
self._function_widening_points: Dict[int, List[int]] = {}
self._function_node_addrs: Dict[int, List[int]] = {} # sorted in reverse post-order
self._mergeable_plugins = ("memory", "registers")
self._task_stack: List[FunctionAnalysis] = []
self._tracing_times: DefaultDict[BlockID, int] = defaultdict(int)
# counters for debugging
self._execution_counter: DefaultDict[BlockID, int] = defaultdict(int)
# Start analysis
self._analyze()
#
# Internal properties
#
@property
def _current_function_address(self):
return self._task_stack[-1].function_address
@property
def _top_task(self) -> Optional[FunctionAnalysis]:
"""
Get the first task in the stack.
:return: The top task in the stack, or None if the stack is empty.
:rtype: FunctionAnalysis
"""
return self._task_stack[-1] if len(self._task_stack) != 0 else None
@property
def _top_function_analysis_task(self):
"""
Get the first FunctionAnalysis task in the stack.
:return: The top function analysis task in the stack, or None if there isn't any.
:rtype: FunctionAnalysis
"""
for r in reversed(self._task_stack):
if isinstance(r, FunctionAnalysis):
return r
return None
@property
def function_initial_states(self):
return self._function_initial_states
@property
def function_final_states(self):
return self._function_final_states
#
# Public methods
#
[docs] def get_any_node(self, addr: int) -> Optional[VFGNode]:
"""
Get any VFG node corresponding to the basic block at @addr.
Note that depending on the context sensitivity level, there might be
multiple nodes corresponding to different contexts. This function will
return the first one it encounters, which might not be what you want.
"""
for n in self.graph.nodes():
if n.addr == addr:
return n
return None
[docs] def get_all_nodes(self, addr) -> Generator[VFGNode, None, None]:
for n in self.graph.nodes():
if n.addr == addr:
yield n
[docs] def irsb_from_node(self, node):
return self.project.factory.successors(node.state, addr=node.addr)
#
# Operations
#
[docs] def copy(self):
new_vfg = VFG(self.project) # type:ignore
new_vfg._cfg = self._cfg
new_vfg._graph = networkx.DiGraph(self.graph)
new_vfg._nodes = self._nodes.copy()
new_vfg._exit_targets = defaultdict(list, self._exit_targets)
return new_vfg
# Pickling helpers
def __setstate__(self, s):
self.__dict__.update(s)
def __getstate__(self):
return dict(self.__dict__)
#
# Main analysis routines, mostly overriding methods of ForwardAnalysis
#
def _pre_analysis(self) -> None:
"""
Executed before analysis starts. Necessary initializations are performed here.
:return: None
"""
l.debug("Starting from %#x", self._start)
# initialize the task stack
self._task_stack = []
# initialize the execution counter dict
self._execution_counter = defaultdict(int)
# Generate a CFG if no CFG is provided
if not self._cfg:
l.debug("Generating a CFG, since none was given...")
# TODO: can we use a fast CFG instead? note that fast CFG does not care of context sensitivity at all, but
# TODO: for state merging, we also don't really care about context sensitivity.
self._cfg: CFGEmulated = self.project.analyses[CFGEmulated].prep()(
context_sensitivity_level=self._context_sensitivity_level, starts=(self._start,)
)
if not self._cfg.normalized:
l.warning(
"The given CFG is not normalized, which might impact the performance/accuracy of the VFG analysis."
)
# Prepare the state
initial_state = self._prepare_initial_state(self._start, self._initial_state)
initial_state.ip = self._start
if self.project.arch.name.startswith("MIPS"):
initial_state.regs.t9 = self._start
# clear function merge points cache
self._function_merge_points = {}
# Create the initial state
state = initial_state.copy()
if self._start_at_function:
# set the return address to an address so we can catch it and terminate the VSA analysis
# TODO: Properly pick an address that will not conflict with any existing code and data in the program
self._final_address = 0x4FFF0000
self._set_return_address(state, self._final_address)
call_stack = None
if not self._start_at_function:
# we should build a custom call stack
call_stack = CallStack()
call_stack = call_stack.call(None, self._function_start, retn_target=self._final_address)
job = VFGJob(
state.addr,
state,
self._context_sensitivity_level,
jumpkind="Ijk_Boring",
final_return_address=self._final_address,
call_stack=call_stack,
)
block_id = BlockID.new(state.addr, job.get_call_stack_suffix(), job.jumpkind)
job._block_id = block_id
self._insert_job(job)
# create the task
function_analysis_task = FunctionAnalysis(self._function_start, self._final_address)
function_analysis_task.jobs.append(job)
self._task_stack.append(function_analysis_task)
def _job_sorting_key(self, job):
"""
Get the sorting key of a VFGJob instance.
:param VFGJob job: the VFGJob object.
:return: An integer that determines the order of this job in the queue.
:rtype: int
"""
MAX_BLOCKS_PER_FUNCTION = 1000000
task_functions = list(
reversed(list(task.function_address for task in self._task_stack if isinstance(task, FunctionAnalysis)))
)
try:
function_pos = task_functions.index(job.func_addr)
except ValueError:
# not in the list
# it might be because we followed the wrong path, or there is a bug in the traversal algorithm
# anyways, do it first
l.warning("Function address %#x is not found in task stack.", job.func_addr)
return 0
try:
block_in_function_pos = self._ordered_node_addrs(job.func_addr).index(job.addr)
except ValueError:
# block not found. what?
block_in_function_pos = min(job.addr - job.func_addr, MAX_BLOCKS_PER_FUNCTION - 1)
return block_in_function_pos + MAX_BLOCKS_PER_FUNCTION * function_pos
# return self._cfg.get_topological_order(self._cfg.get_node(job.block_id))
def _job_key(self, job: VFGJob) -> Optional[BlockID]:
"""
Return the block ID of the job. Two or more jobs owning the same block ID will be merged together.
:param VFGJob job: The VFGJob instance.
:return: The block ID of the job
:rtype: BlockID
"""
return job.block_id
def _pre_job_handling(self, job: VFGJob) -> None:
"""
Some code executed before actually processing the job.
:param VFGJob job: the VFGJob object.
:return: None
"""
# did we reach the final address?
if self._final_address is not None and job.addr == self._final_address:
# our analysis should be termianted here
l.debug("%s is viewed as a final state. Skip.", job)
raise AngrSkipJobNotice()
l.debug("Handling VFGJob %s", job)
if not self._top_task:
l.debug("No more tasks available. Skip the job.")
raise AngrSkipJobNotice()
assert isinstance(self._top_task, FunctionAnalysis)
if job not in self._top_task.jobs:
# it seems that all jobs of the top task has been done. unwind the task stack
# make sure this job is at least recorded somewhere
unwind_count = None
for i, task in enumerate(reversed(self._task_stack)):
if isinstance(task, FunctionAnalysis):
if job in task.jobs:
# nice
unwind_count = i
if unwind_count is None:
l.debug("%s is not recorded. Skip the job.", job)
raise AngrSkipJobNotice()
else:
# unwind the stack till the target, unless we see any pending jobs for each new top task
for i in range(unwind_count):
if isinstance(self._top_task, FunctionAnalysis):
# are there any pending job belonging to the current function that we should handle first?
pending_job_key = self._get_pending_job(self._top_task.function_address)
if pending_job_key is not None:
# ah there is
# analyze it first
self._trace_pending_job(pending_job_key)
l.debug(
"A pending job is found for function %#x. Delay %s.",
self._top_task.function_address,
job,
)
raise AngrDelayJobNotice()
task = self._task_stack.pop()
if not task.done:
l.warning("Removing an unfinished task %s. Might be a bug.", task)
assert job in self._top_task.jobs
# check if this is considered to be a final state
if self._final_state_callback is not None and self._final_state_callback(job.state, job.call_stack):
l.debug("%s.state is considered as a final state. Skip the job.", job)
self.final_states.append(job.state)
raise AngrSkipJobNotice()
# increment the execution counter
self._execution_counter[job.addr] += 1
self._top_task.jobs.remove(job)
# set up some essential variables and parameters
job.call_stack_suffix = job.get_call_stack_suffix() # type:ignore
job.jumpkind = "Ijk_Boring" if job.state.history.jumpkind is None else job.state.history.jumpkind
src_block_id = job.src_block_id
src_exit_stmt_idx = job.src_exit_stmt_idx
addr = job.state.solver.eval(job.state.regs.ip)
input_state = job.state
block_id = BlockID.new(addr, job.call_stack_suffix, job.jumpkind)
if self._tracing_times[block_id] > self._max_iterations:
l.debug("%s has been traced too many times. Skip", job)
raise AngrSkipJobNotice()
self._tracing_times[block_id] += 1
if block_id not in self._nodes:
vfg_node = VFGNode(addr, block_id, state=input_state)
self._nodes[block_id] = vfg_node
else:
vfg_node = self._nodes[block_id]
job.vfg_node = vfg_node
# log the current state
vfg_node.state = input_state
# Execute this basic block with input state, and get a new SimSuccessors instance
# unused result var is `error_occured`
job.sim_successors, _, restart_analysis = self._get_simsuccessors(input_state, addr)
if restart_analysis:
# We should restart the analysis because of something must be changed in the very initial state
raise AngrVFGRestartAnalysisNotice()
if job.sim_successors is None:
# Ouch, we cannot get the SimSuccessors for some reason
# Skip this guy
l.debug("Cannot create SimSuccessors for %s. Skip.", job)
raise AngrSkipJobNotice()
self._graph_add_edge(src_block_id, block_id, jumpkind=job.jumpkind, src_exit_stmt_idx=src_exit_stmt_idx)
def _get_successors(self, job: VFGJob) -> List[SimState]:
# Extract initial values
state = job.state
addr = job.addr
# Obtain successors
if addr not in self._avoid_runs:
assert job.sim_successors is not None
all_successors: List["SimState"] = (
job.sim_successors.flat_successors + job.sim_successors.unconstrained_successors
)
else:
all_successors = []
assert job.vfg_node is not None
# save those states
if job.vfg_node.final_states is None:
job.vfg_node.final_states = []
job.vfg_node.final_states.extend(all_successors)
# Update thumb_addrs
assert job.sim_successors
if job.sim_successors.sort == "IRSB" and state.thumb:
self._thumb_addrs.update(job.sim_successors.artifacts["insn_addrs"])
if not all_successors:
if job.sim_successors.sort == "SimProcedure" and isinstance(
job.sim_successors.artifacts["procedure"], SIM_PROCEDURES["stubs"]["PathTerminator"]
):
# If there is no valid exit in this branch and it's not
# intentional (e.g. caused by a SimProcedure that does not
# do_return) , we should make it return to its callsite.
# However, we don't want to use its state as it might be
# corrupted. Just create a link in the exit_targets map.
retn_target = job.call_stack.current_return_target
if retn_target is not None:
new_call_stack = job.call_stack_copy()
exit_target_tpl = new_call_stack.stack_suffix(self._context_sensitivity_level) + (retn_target,)
self._exit_targets[job.call_stack_suffix + (addr,)].append((exit_target_tpl, "Ijk_Ret"))
else:
# This is intentional. We shall remove all the pending returns generated before along this path.
self._remove_pending_return(job, self._pending_returns)
# If this is a call exit, we shouldn't put the default exit (which
# is artificial) into the CFG. The exits will be Ijk_Call and
# Ijk_FakeRet, and Ijk_Call always goes first
job.is_call_jump = any([self._is_call_jumpkind(i.history.jumpkind) for i in all_successors])
call_targets = []
for succ in all_successors:
if self._is_call_jumpkind(succ.history.jumpkind):
try:
call_targets.append(succ.solver.eval_one(succ._ip))
# finding one is enough!
break
except SimValueError:
# this call has multiple possible targets. ignore it
pass
job.call_target = None if not call_targets else call_targets[0]
job.is_return_jump = len(all_successors) and all_successors[0].history.jumpkind == "Ijk_Ret"
if job.is_call_jump:
# create the call task
# TODO: correctly fill the return address
call_task = CallAnalysis(job.addr, None, [], mergeable_plugins=self._mergeable_plugins)
self._task_stack.append(call_task)
job.call_task = call_task
return all_successors
def _handle_successor(
self, job: VFGJob, successor: SimState, all_successors: List[SimState]
) -> List[Union[VFGJob, Any]]: # pylint:disable=arguments-renamed
"""
Process each successor generated by the job, and return a new list of succeeding jobs.
:param VFGJob job: The VFGJob instance.
:param SimState successor: The succeeding state.
:param list all_successors: A list of all successors.
:return: A list of newly created jobs from the successor.
:rtype: list
"""
# Initialize parameters
addr = job.addr
jumpkind = successor.history.jumpkind
#
# Get instruction pointer
#
if job.is_return_jump:
ret_target = job.call_stack.current_return_target
if ret_target is None:
# We have no where to go according to our call stack. However, the callstack might be corrupted
l.debug("According to the call stack, we have nowhere to return to.")
return []
successor.ip = ret_target
# this try-except block is to handle cases where the instruction pointer is symbolic
try:
successor_addrs = successor.solver.eval_upto(successor.ip, 2)
except SimValueError:
# TODO: Should fall back to reading targets from CFG
# It cannot be concretized currently. Maybe we could handle
# it later, maybe it just cannot be concretized
return []
if len(successor_addrs) > 1:
# multiple concrete targets
if job.is_return_jump:
# It might be caused by state merging
# We may retrieve the correct ip from call stack
successor.ip = job.call_stack.current_return_target
else:
return self._handle_successor_multitargets(job, successor, all_successors)
# Now there should be one single target for the successor
successor_addr = successor.solver.eval_one(successor.ip)
# Get the fake ret successor
fakeret_successor = None
if self._is_call_jumpkind(jumpkind):
fakeret_successor = all_successors[-1]
# If the function we're calling into doesn't return, we should discard it
if self._cfg is not None:
func = self.kb.functions.function(addr=job.call_target)
if func is not None and func.returning is False and len(all_successors) == 2:
del all_successors[-1]
fakeret_successor = None
if self._is_call_jumpkind(jumpkind):
# Create a new call stack for the successor
new_call_stack = self._create_callstack(job, successor_addr, jumpkind, fakeret_successor)
if new_call_stack is None:
l.debug("Cannot create a new callstack for address %#x", successor_addr)
job.dbg_exit_status[successor] = ""
return []
new_call_stack_suffix = new_call_stack.stack_suffix(self._context_sensitivity_level)
new_function_key = FunctionKey.new(successor_addr, new_call_stack_suffix)
# Save the initial state for the function
self._save_function_initial_state(new_function_key, successor_addr, successor.copy())
# bail out if we hit the interfunction_level cap
if len(job.call_stack) >= self._interfunction_level:
l.debug(
"We are not tracing into a new function %#08x as we hit interfunction_level limit", successor_addr
)
# mark it as skipped
job.dbg_exit_status[successor] = "Skipped"
job.call_skipped = True
job.call_function_key = new_function_key
job.call_task.skipped = True
return []
elif jumpkind == "Ijk_Ret":
# Pop the current function out from the call stack
new_call_stack = self._create_callstack(job, successor_addr, jumpkind, fakeret_successor)
if new_call_stack is None:
l.debug("Cannot create a new callstack for address %#x", successor_addr)
job.dbg_exit_status[successor] = ""
return []
new_call_stack_suffix = new_call_stack.stack_suffix(self._context_sensitivity_level)
else:
new_call_stack = job.call_stack
new_call_stack_suffix = job.call_stack_suffix
# Generate the new block ID
new_block_id = BlockID.new(successor_addr, new_call_stack_suffix, jumpkind)
#
# Generate new VFG jobs
#
if jumpkind == "Ijk_Ret":
assert not job.is_call_jump
# Record this return
self._return_target_sources[successor_addr].append(job.call_stack_suffix + (addr,))
# Check if this return is inside our pending returns list
if new_block_id in self._pending_returns:
del self._pending_returns[new_block_id]
# Check if we have reached a fix-point
if jumpkind != "Ijk_FakeRet" and new_block_id in self._nodes:
last_state = self._nodes[new_block_id].state
_, _, merged = last_state.merge(successor, plugin_whitelist=self._mergeable_plugins)
if merged:
l.debug("%s didn't reach a fix-point", new_block_id)
else:
l.debug("%s reaches a fix-point.", new_block_id)
job.dbg_exit_status[successor] = "Merged due to reaching a fix-point"
return []
new_jobs = self._create_new_jobs(job, successor, new_block_id, new_call_stack)
return new_jobs
def _handle_successor_multitargets(self, job, successor, all_successors):
"""
Generate new jobs for all possible successor targets when there are more than one possible concrete value for
successor.ip
:param VFGJob job: The VFGJob instance.
:param SimState successor: The succeeding state.
:param list all_successors: All succeeding states from the same VFGJob.
:return: A list of new succeeding jobs
:rtype: list
"""
new_jobs = []
# Currently we assume a legit jumping target cannot have more than 256 concrete values
# TODO: make it a setting on VFG
MAX_NUMBER_OF_CONCRETE_VALUES = 256
all_possible_ips = successor.solver.eval_upto(successor.ip, MAX_NUMBER_OF_CONCRETE_VALUES + 1)
if len(all_possible_ips) > MAX_NUMBER_OF_CONCRETE_VALUES:
l.warning(
"IP can be concretized to more than %d values, which means it might be corrupted.",
MAX_NUMBER_OF_CONCRETE_VALUES,
)
return []
# Call this function to generate a successor for each possible IP
for ip in all_possible_ips:
concrete_successor = successor.copy()
concrete_successor.ip = ip
concrete_jobs = self._handle_successor(job, concrete_successor, all_successors)
if job.is_call_jump: # TODO: take care of syscalls
for new_job in concrete_jobs:
# TODO: correctly fill the return address. The return address can be found from the
# TODO: fakeret successor in the `successors` list
function_analysis_task = FunctionAnalysis(new_job.addr, None)
# log the new job
function_analysis_task.jobs.append(new_job)
# put it onto the stack
self._task_stack.append(function_analysis_task)
# log it in the call_task
job.call_task.register_function_analysis(function_analysis_task)
new_jobs.extend(concrete_jobs)
return new_jobs
def _post_job_handling(
self, job: VFGJob, new_jobs: List[Union[VFGJob, Any]], successors: List[SimState]
) -> None: # pylint:disable=unused-argument
# Debugging output
if l.level == logging.DEBUG:
self._post_job_handling_debug(job, successors)
# pop all finished tasks from the task stack
pending_task_func_addrs = {k.func_addr for k in self._pending_returns}
while True:
task = self._top_task
if task is None:
# the task stack is empty
break
if isinstance(task, CallAnalysis):
# the call never returns
if task.skipped:
l.debug("Calls from %s are skipped.", task)
else:
l.debug("%s never returns.", task)
self._task_stack.pop()
else:
if not task.done or task.function_address in pending_task_func_addrs:
break
l.debug("%s is finished.", task)
self._task_stack.pop()
# the next guy *might be* a call analysis task
task = self._top_task
if isinstance(task, CallAnalysis):
if task.done:
# awesome!
# pop it from the task stack
self._task_stack.pop()
if task._final_jobs:
# merge all jobs, and create a new job
new_job = task.merge_jobs()
# register the job to the top task
self._top_task.jobs.append(new_job)
# insert the job
self._insert_job(new_job)
# if not new_jobs:
# # task stack is empty
# self.final_states.append(job.state)
def _intra_analysis(self) -> None:
pass
def _merge_jobs(self, *jobs: VFGJob):
l.debug("Merging jobs %s", jobs)
# there should not be more than two jobs being merged at the same time
assert len(jobs) == 2
addr = jobs[0].addr
if self.project.is_hooked(addr) and self.project.hooked_by(addr).is_continuation:
raise AngrJobMergingFailureNotice()
# update jobs
for job in jobs:
if job in self._top_function_analysis_task.jobs:
self._top_function_analysis_task.jobs.remove(job)
state_0 = jobs[0].state
state_1 = jobs[1].state
merged_state, _ = self._merge_states(state_0, state_1)
new_job = VFGJob(
jobs[0].addr,
merged_state,
self._context_sensitivity_level,
jumpkind=jobs[0].jumpkind,
block_id=jobs[0].block_id,
call_stack=jobs[0].call_stack,
src_block_id=jobs[0].src_block_id,
src_exit_stmt_idx=jobs[0].src_exit_stmt_idx,
src_ins_addr=jobs[0].src_ins_addr,
)
self._top_function_analysis_task.jobs.append(new_job)
return new_job
def _should_widen_jobs(self, *jobs):
"""
:param iterable jobs:
:return: True if should widen, False otherwise
:rtype: bool
"""
job_0: VFGJob
_: VFGJob
job_0, _ = jobs[-2:] # pylint:disable=unbalanced-tuple-unpacking
addr = job_0.addr
if addr not in self._widening_points(job_0.func_addr):
return False
tracing_times = self._tracing_times[job_0.block_id]
if tracing_times > self._max_iterations_before_widening and tracing_times % self._widening_interval == 0:
return True
return False
def _widen_jobs(self, *jobs: VFGJob):
"""
:param iterable jobs:
:return:
"""
job_0: VFGJob
job_1: VFGJob
job_0, job_1 = jobs[-2:] # pylint:disable=unbalanced-tuple-unpacking
assert self._top_function_analysis_task is not None
# update jobs
for job in jobs:
if job in self._top_function_analysis_task.jobs:
self._top_function_analysis_task.jobs.remove(job)
l.debug("Widening %s", job_1)
new_state, _ = self._widen_states(job_0.state, job_1.state)
new_job = VFGJob(
jobs[0].addr,
new_state,
self._context_sensitivity_level,
jumpkind=jobs[0].jumpkind,
block_id=jobs[0].block_id,
call_stack=jobs[0].call_stack,
src_block_id=jobs[0].src_block_id,
src_exit_stmt_idx=jobs[0].src_exit_stmt_idx,
src_ins_addr=jobs[0].src_ins_addr,
)
self._top_function_analysis_task.jobs.append(new_job)
return new_job
def _job_queue_empty(self) -> None:
if self._pending_returns:
# We don't have any paths remaining. Let's pop a previously-missing return to
# process
top_task: FunctionAnalysis = self._top_task
func_addr = top_task.function_address
pending_ret_key = self._get_pending_job(func_addr)
if pending_ret_key is None:
# analysis of the current function is somehow terminated
# we have to rewind the stack, and try the function that calls the current function
l.debug("No pending return for the current function %#x. Unwind the stack.", func_addr)
if not self._top_function_analysis_task.done:
l.warning(
"The top function analysis task is not done yet. This might be a bug. Please report to Fish."
)
# stack unwinding
while True:
s = self._task_stack.pop()
if isinstance(s, CallAnalysis):
break
return self._job_queue_empty()
self._trace_pending_job(pending_ret_key)
l.debug("Tracing a missing return %s", repr(pending_ret_key))
return None
def _post_analysis(self) -> None:
pass
#
# State widening, merging, and narrowing
#
def _merge_states(self, *states: SimState):
"""
Merge two given states, and return a new one.
:param states: All states to merge.
:returns: The merged state, and whether a merging has occurred
"""
# print old_state.dbg_print_stack()
# print new_state.dbg_print_stack()
merged = states[0]
merging_occurred = False
for state in states[1:]:
merged, _, merging_occurred_ = merged.merge(state, plugin_whitelist=self._mergeable_plugins)
merging_occurred |= merging_occurred_
# print "Merged: "
# print merged_state.dbg_print_stack()
return merged, merging_occurred
@staticmethod
def _widen_states(old_state, new_state):
"""
Perform widen operation on the given states, and return a new one.
:param old_state:
:param new_state:
:returns: The widened state, and whether widening has occurred
"""
# print old_state.dbg_print_stack()
# print new_state.dbg_print_stack()
l.debug("Widening state at IP %s", old_state.ip)
widened_state, widening_occurred = old_state.widen(new_state)
# print "Widened: "
# print widened_state.dbg_print_stack()
return widened_state, widening_occurred
@staticmethod
def _narrow_states(
node, old_state, new_state, previously_widened_state
): # pylint:disable=unused-argument,no-self-use
"""
Try to narrow the state!
:param old_state:
:param new_state:
:param previously_widened_state:
:returns: The narrowed state, and whether a narrowing has occurred
"""
l.debug("Narrowing state at IP %s", previously_widened_state.ip)
s = previously_widened_state.copy()
narrowing_occurred = False
# TODO: Finish the narrowing logic
return s, narrowing_occurred
#
# Helper methods
#
def _prepare_initial_state(self, function_start: int, state: Optional[SimState]) -> SimState:
"""
Get the state to start the analysis for function.
:param int function_start: Address of the function
:param SimState state: The program state to base on.
"""
if state is None:
state = self.project.factory.blank_state(mode="static", remove_options=self._state_options_to_remove)
assert state is not None
# make room for arguments passed to the function
sp = state.regs.sp
sp_val = state.solver.eval_one(sp)
state.memory.set_stack_address_mapping(sp_val, state.memory.stack_id(function_start) + "_pre", 0)
state.registers.store("sp", sp - 0x100)
# Set the stack address mapping for the initial stack
state.memory.set_stack_size(state.arch.stack_size)
initial_sp = state.solver.eval(
state.regs.sp
) # FIXME: This is bad, as it may lose tracking of multiple sp values
initial_sp -= state.arch.bytes
state.memory.set_stack_address_mapping(initial_sp, state.memory.stack_id(function_start), function_start)
return state
def _set_return_address(self, state: "SimState", ret_addr: int) -> None:
"""
Set the return address of the current state to a specific address. We assume we are at the beginning of a
function, or in other words, we are about to execute the very first instruction of the function.
:param SimState state: The program state
:param int ret_addr: The return address
:return: None
"""
# TODO: the following code is totally untested other than X86 and AMD64. Don't freak out if you find bugs :)
# TODO: Test it
ret_bvv = state.solver.BVV(ret_addr, self.project.arch.bits)
if self.project.arch.name in ("X86", "AMD64"):
state.stack_push(ret_bvv)
elif is_arm_arch(self.project.arch):
state.regs.lr = ret_bvv
elif self.project.arch.name in ("MIPS32", "MIPS64"):
state.regs.ra = ret_bvv
elif self.project.arch.name in ("PPC32", "PPC64"):
state.regs.lr = ret_bvv
else:
l.warning(
"Return address cannot be set for architecture %s. Please add corresponding logic to "
"VFG._set_return_address().",
self.project.arch.name,
)
def _create_graph(self, return_target_sources=None):
"""
Create a DiGraph out of the existing edge map.
:param return_target_sources: Used for making up those missing returns
:returns: A networkx.DiGraph() object
"""
if return_target_sources is None:
# We set it to a defaultdict in order to be consistent with the
# actual parameter.
return_target_sources = defaultdict(list)
cfg = networkx.DiGraph()
# The corner case: add a node to the graph if there is only one block
if len(self._nodes) == 1:
cfg.add_node(self._nodes[next(iter(self._nodes.keys()))])
# Adding edges
for tpl, targets in self._exit_targets.items():
basic_block = self._nodes[tpl] # Cannot fail :)
for ex, jumpkind in targets:
if ex in self._nodes:
target_bbl = self._nodes[ex]
cfg.add_edge(basic_block, target_bbl, jumpkind=jumpkind)
# Add edges for possibly missing returns
if basic_block.addr in return_target_sources:
for src_irsb_key in return_target_sources[basic_block.addr]:
cfg.add_edge(self._nodes[src_irsb_key], basic_block, jumpkind="Ijk_Ret")
else:
# Debugging output
def addr_formalize(addr):
if addr is None:
return "None"
else:
return "%#08x" % addr
s = "(["
for addr in ex[:-1]:
s += addr_formalize(addr) + ", "
s += "] %s)" % addr_formalize(ex[-1])
l.warning("Key %s does not exist.", s)
return cfg
#
# DiGraph manipulation
#
def _graph_get_node(self, block_id: BlockID, terminator_for_nonexistent_node: bool = False) -> VFGNode:
"""
Get an existing VFGNode instance from the graph.
:param BlockID block_id: The block ID for the node to get.
:param bool terminator_for_nonexistent_node: True if a Terminator (which is a SimProcedure stub) should be
created when there is no existing node available for the given
block ID.
:return: A node in the graph, or None.
:rtype: VFGNode
"""
if block_id not in self._nodes:
l.error("Trying to look up a node that we don't have yet. Is this okay????")
if not terminator_for_nonexistent_node:
return None
# Generate a PathTerminator node
addr = block_id.addr
func_addr = block_id.func_addr
if func_addr is None:
# We'll have to use the current block address instead
# TODO: Is it really OK?
func_addr = addr
input_state = self.project.factory.entry_state()
input_state.ip = addr
pt = VFGNode(addr, block_id, input_state)
self._nodes[block_id] = pt
if isinstance(self.project.arch, archinfo.ArchARM) and addr % 2 == 1:
self._thumb_addrs.add(addr)
self._thumb_addrs.add(addr - 1)
l.debug("Block ID %s does not exist. Create a PathTerminator instead.", repr(block_id))
return self._nodes[block_id]
def _graph_add_edge(self, src_block_id: Optional[BlockID], dst_block_id: BlockID, **kwargs) -> None:
"""
Add an edge onto the graph.
:param BlockID src_block_id: The block ID for source node.
:param BlockID dst_block_id: The block Id for destination node.
:param str jumpkind: The jumpkind of the edge.
:param exit_stmt_idx: ID of the statement in the source IRSB where this edge is created from. 'default'
refers to the default exit.
:return: None
"""
dst_node = self._graph_get_node(dst_block_id, terminator_for_nonexistent_node=True)
if src_block_id is None:
self.graph.add_node(dst_node)
else:
src_node = self._graph_get_node(src_block_id, terminator_for_nonexistent_node=True)
self.graph.add_edge(src_node, dst_node, **kwargs)
#
# Other methods
#
def _get_simsuccessors(self, state: SimState, addr: int) -> Tuple[SimSuccessors, bool, bool]:
error_occured = False
restart_analysis = False
jumpkind = "Ijk_Boring"
if state.history.jumpkind:
jumpkind = state.history.jumpkind
try:
node = self._cfg.model.get_any_node(addr)
num_inst = None if node is None else len(node.instruction_addrs)
sim_successors = self.project.factory.successors(state, jumpkind=jumpkind, num_inst=num_inst)
except SimIRSBError as ex:
# It's a tragedy that we came across some instructions that VEX
# does not support. I'll create a terminating stub there
l.error("SimIRSBError occurred(%s). Creating a PathTerminator.", ex)
error_occured = True
inst = SIM_PROCEDURES["stubs"]["PathTerminator"](state)
sim_successors = ProcedureEngine().process(state, procedure=inst)
except claripy.ClaripyError:
l.error("ClaripyError: ", exc_info=True)
error_occured = True
# Generate a PathTerminator to terminate the current path
inst = SIM_PROCEDURES["stubs"]["PathTerminator"](state)
sim_successors = ProcedureEngine().process(state, procedure=inst)
except SimError:
l.error("SimError: ", exc_info=True)
error_occured = True
# Generate a PathTerminator to terminate the current path
inst = SIM_PROCEDURES["stubs"]["PathTerminator"](state)
sim_successors = ProcedureEngine().process(state, procedure=inst)
except AngrError as ex:
# segment = self.project.loader.main_object.in_which_segment(addr)
l.error("AngrError %s when generating SimSuccessors at %#x", ex, addr, exc_info=True)
# We might be on a wrong branch, and is likely to encounter the
# "No bytes in memory xxx" exception
# Just ignore it
error_occured = True
sim_successors = None
return sim_successors, error_occured, restart_analysis
def _create_new_jobs(
self, job: VFGJob, successor: "SimState", new_block_id: BlockID, new_call_stack: CallStack
) -> List[Union[VFGJob, Any]]:
"""
Create a list of new VFG jobs for the successor state.
:param VFGJob job: The VFGJob instance.
:param SimState successor: The succeeding state.
:param BlockID new_block_id: Block ID for the new VFGJob
:param new_call_stack: The new callstack.
:return: A list of newly created VFG jobs.
:rtype: list
"""
# TODO: basic block stack is probably useless
jumpkind = successor.history.jumpkind
stmt_idx = successor.scratch.stmt_idx
ins_addr = successor.scratch.ins_addr
# Make a copy of the state in case we use it later
successor_state = successor.copy()
successor_addr = successor_state.solver.eval(successor_state.ip)
new_jobs = []
if jumpkind == "Ijk_FakeRet":
assert job.is_call_jump
# This is the default "fake" return successor generated at each call, if and only if the target function
# returns.
# if the call is skipped (for whatever reason, like we reached the interfunction tracing limit), we use
# this FakeRet successor as the final state of the function. Otherwise we save the FakeRet state in case the
# callee does not return normally, but don't process them right away.
# Clear the useless values (like return addresses, parameters) on stack if needed
if self._cfg is not None:
current_function = self.kb.functions.function(job.call_target)
if current_function is not None:
sp_difference = current_function.sp_delta
else:
sp_difference = 0
arch = successor_state.arch
reg_sp_offset = arch.sp_offset
reg_sp_expr = (
successor_state.registers.load(reg_sp_offset, size=arch.bytes, endness=arch.register_endness)
+ sp_difference
) # type: ignore
successor_state.registers.store(arch.sp_offset, reg_sp_expr)
# Clear the return value with a TOP
top_si = successor_state.solver.TSI(arch.bits)
successor_state.registers.store(arch.ret_offset, top_si)
if job.call_skipped:
# TODO: Make sure the return values make sense
# if self.project.arch.name == "X86":
# successor_state.regs.eax = successor_state.solver.BVS(
# "ret_val", 32, min=0, max=0xFFFFFFFF, stride=1
# )
new_job = VFGJob(
successor_addr,
successor_state,
self._context_sensitivity_level,
block_id=new_block_id,
jumpkind="Ijk_Ret",
call_stack=new_call_stack,
src_block_id=job.block_id,
src_exit_stmt_idx=stmt_idx,
src_ins_addr=ins_addr,
)
new_jobs.append(new_job)
assert isinstance(self._task_stack[-2], FunctionAnalysis)
self._task_stack[-2].jobs.append(new_job)
job.dbg_exit_status[successor] = "Pending"
else:
self._pending_returns[new_block_id] = PendingJob(
new_block_id, successor_state, new_call_stack, job.block_id, stmt_idx, ins_addr
) # type: ignore
job.dbg_exit_status[successor] = "Pending"
else:
if sim_options.ABSTRACT_MEMORY in successor.options:
if self._is_call_jumpkind(successor.history.jumpkind):
# If this is a call, we create a new stack address mapping
reg_sp_si = self._create_stack_region(successor_state, successor_addr)
# Save the new sp register
new_reg_sp_expr = successor_state.solver.ValueSet(successor_state.arch.bits, "global", 0, reg_sp_si)
successor_state.regs.sp = new_reg_sp_expr
elif successor.history.jumpkind == "Ijk_Ret":
# Remove the existing stack address mapping
# FIXME: Now we are assuming the sp is restored to its original value
reg_sp_expr = successor_state.regs.sp
if isinstance(reg_sp_expr._model_vsa, claripy.vsa.StridedInterval): # type: ignore
reg_sp_si = reg_sp_expr._model_vsa
# reg_sp_si.min # reg_sp_val
elif isinstance(reg_sp_expr._model_vsa, claripy.vsa.ValueSet): # type: ignore
reg_sp_si = next(iter(reg_sp_expr._model_vsa.items()))[1] # type: ignore
# reg_sp_si.min # reg_sp_val
# TODO: Finish it!
new_job = VFGJob(
successor_addr,
successor_state,
self._context_sensitivity_level,
block_id=new_block_id,
jumpkind=successor_state.history.jumpkind,
call_stack=new_call_stack,
src_block_id=job.block_id,
src_exit_stmt_idx=stmt_idx,
src_ins_addr=ins_addr,
)
if successor.history.jumpkind == "Ijk_Ret":
# it's returning to the return site
# save the state as a final state of the function that we are returning from
if self._record_function_final_states:
# key of the function that we are returning from
source_function_key = FunctionKey.new(job.func_addr, job.call_stack_suffix)
self._save_function_final_state(source_function_key, job.func_addr, successor_state)
# TODO: add an assertion that requires the returning target being the same as the return address we
# TODO: stored before
current_task = self._top_task
if current_task.call_analysis is not None:
current_task.call_analysis.add_final_job(new_job)
job.dbg_exit_status[successor] = "Appended to the call analysis task"
else:
job.dbg_exit_status[successor] = "Discarded (no call analysis task)"
else:
if self._is_call_jumpkind(successor.history.jumpkind):
# create a function analysis task
# TODO: the return address
task = FunctionAnalysis(new_job.addr, None)
self._task_stack.append(task)
# link it to the call analysis
job.call_task.register_function_analysis(task)
else:
task = self._top_task
# register the job to the function task
task.jobs.append(new_job)
# insert the new job into the new job array
new_jobs.append(new_job)
job.dbg_exit_status[successor] = "Appended"
if not job.is_call_jump or jumpkind != "Ijk_FakeRet":
new_target = (new_block_id, jumpkind)
else:
new_target = (new_block_id, "Ijk_FakeRet") # This is the fake return!
self._exit_targets[job.call_stack_suffix + (job.addr,)].append(new_target)
return new_jobs
def _remove_pending_return(self, job, pending_returns):
"""
Remove all pending returns that are related to the current job.
"""
# Build the tuples that we want to remove from the dict fake_func_retn_exits
tpls_to_remove = []
call_stack_copy = job.call_stack_copy()
while call_stack_copy.current_return_target is not None:
ret_target = call_stack_copy.current_return_target
# Remove the current call stack frame
call_stack_copy = call_stack_copy.ret(ret_target)
call_stack_suffix = call_stack_copy.stack_suffix(self._context_sensitivity_level)
tpl = call_stack_suffix + (ret_target,)
tpls_to_remove.append(tpl)
# Remove those tuples from the dict
for tpl in tpls_to_remove:
if tpl in pending_returns:
del pending_returns[tpl]
l.debug(
"Removed (%s) from FakeExits dict.", ",".join([hex(i) if i is not None else "None" for i in tpl])
)
def _post_job_handling_debug(self, job: VFGJob, successors):
"""
Print out debugging information after handling a VFGJob and generating the succeeding jobs.
:param VFGJob job: The VFGJob instance.
:param list successors: A list of succeeding states.
:return: None
"""
func = self.project.loader.find_symbol(job.addr)
function_name = func.name if func is not None else None
module_name = self.project.loader.find_object_containing(job.addr).provides
l.debug(
"VFGJob @ %#08x with callstack [ %s ]",
job.addr,
job.callstack_repr(self.kb),
)
l.debug("(Function %s of %s)", function_name, module_name)
l.debug("- is call jump: %s", job.is_call_jump)
for suc in successors:
if suc not in job.dbg_exit_status:
l.warning("- %s is not found. FIND OUT WHY.", suc)
continue
try:
l.debug(
"- successor: %#08x of %s [%s]",
suc.solver.eval_one(suc.ip),
suc.history.jumpkind,
job.dbg_exit_status[suc],
)
except SimValueError:
l.debug("- target cannot be concretized. %s [%s]", job.dbg_exit_status[suc], suc.history.jumpkind)
l.debug("Remaining/pending jobs: %d/%d", len(self._job_info_queue), len(self._pending_returns))
l.debug("Remaining jobs: %s", ["%s %d" % (ent.job, id(ent.job)) for ent in self._job_info_queue])
l.debug("Task stack: %s", self._task_stack)
@staticmethod
def _is_call_jumpkind(jumpkind: str) -> bool:
if jumpkind == "Ijk_Call" or jumpkind.startswith("Ijk_Sys_"):
return True
return False
@staticmethod
def _is_return_jumpkind(jumpkind):
return jumpkind in ("Ijk_Ret", "Ijk_FakeRet")
@staticmethod
def _create_stack_region(successor_state: SimState, successor_ip: int) -> StridedInterval:
arch = successor_state.arch
reg_sp_offset = arch.sp_offset
reg_sp_expr = successor_state.registers.load(reg_sp_offset, size=arch.bytes, endness=arch.register_endness)
if type(reg_sp_expr._model_vsa) is claripy.BVV: # pylint:disable=unidiomatic-typecheck
reg_sp_val = successor_state.solver.eval(reg_sp_expr)
reg_sp_si = successor_state.solver.SI(to_conv=reg_sp_expr)
reg_sp_si = reg_sp_si._model_vsa
elif type(reg_sp_expr._model_vsa) is int: # pylint:disable=unidiomatic-typecheck
reg_sp_val = reg_sp_expr._model_vsa
reg_sp_si = successor_state.solver.SI(bits=successor_state.arch.bits, to_conv=reg_sp_val)
reg_sp_si = reg_sp_si._model_vsa
elif (
type(reg_sp_expr._model_vsa) is claripy.vsa.StridedInterval
): # pylint:disable=unidiomatic-typecheck # type: ignore
reg_sp_si = reg_sp_expr._model_vsa
reg_sp_val = reg_sp_si.min
else:
reg_sp_si = next(iter(reg_sp_expr._model_vsa.items()))[1]
reg_sp_val = reg_sp_si.min
reg_sp_val = reg_sp_val - arch.bytes # TODO: Is it OK?
new_stack_region_id = successor_state.memory.stack_id(successor_ip)
successor_state.memory.set_stack_address_mapping(reg_sp_val, new_stack_region_id, successor_ip)
return reg_sp_si
def _create_callstack(
self, job: VFGJob, successor_ip: int, jumpkind: str, fakeret_successor: Optional[SimState]
) -> CallStack:
addr = job.addr
if self._is_call_jumpkind(jumpkind):
new_call_stack = job.call_stack_copy()
# Notice that in ARM, there are some freaking instructions
# like
# BLEQ <address>
# It should give us three exits: Ijk_Call, Ijk_Boring, and
# Ijk_Ret. The last exit is simulated.
# Notice: We assume the last exit is the simulated one
if fakeret_successor is None:
retn_target_addr = None
else:
retn_target_addr = fakeret_successor.solver.eval_one(fakeret_successor.ip)
# Create call stack
new_call_stack = new_call_stack.call(addr, successor_ip, retn_target=retn_target_addr)
elif jumpkind == "Ijk_Ret":
new_call_stack = job.call_stack_copy()
new_call_stack = new_call_stack.ret(successor_ip)
else:
# Normal control flow transition
new_call_stack = job.call_stack
return new_call_stack
def _save_function_initial_state(self, function_key: FunctionKey, function_address: int, state: SimState) -> None:
"""
Save the initial state of a function, and merge it with existing ones if there are any.
:param FunctionKey function_key: The key to this function.
:param int function_address: Address of the function.
:param SimState state: Initial state of the function.
:return: None
"""
l.debug("Saving the initial state for function %#08x with function key %s", function_address, function_key)
if function_key in self._function_initial_states[function_address]:
existing_state = self._function_initial_states[function_address][function_key]
merged_state, _, _ = existing_state.merge(state)
self._function_initial_states[function_address][function_key] = merged_state
else:
self._function_initial_states[function_address][function_key] = state
def _save_function_final_state(self, function_key, function_address, state):
"""
Save the final state of a function, and merge it with existing ones if there are any.
:param FunctionKey function_key: The key to this function.
:param int function_address: Address of the function.
:param SimState state: Initial state of the function.
:return: None
"""
l.debug("Saving the final state for function %#08x with function key %s", function_address, function_key)
if function_key in self._function_final_states[function_address]:
existing_state = self._function_final_states[function_address][function_key]
merged_state = existing_state.merge(state, plugin_whitelist=self._mergeable_plugins)[0]
self._function_final_states[function_address][function_key] = merged_state
else:
self._function_final_states[function_address][function_key] = state
def _trace_pending_job(self, job_key):
pending_job: PendingJob = self._pending_returns.pop(job_key)
addr = job_key.addr
# Unlike CFG, we will still trace those blocks that have been traced before. In other words, we don't
# remove fake returns even if they have been traced - otherwise we cannot come to a fix-point.
block_id = BlockID.new(addr, pending_job.call_stack.stack_suffix(self._context_sensitivity_level), "Ijk_Ret")
job = VFGJob(
addr,
pending_job.state,
self._context_sensitivity_level,
block_id=block_id,
jumpkind=pending_job.state.history.jumpkind,
call_stack=pending_job.call_stack,
src_block_id=pending_job.src_block_id,
src_exit_stmt_idx=pending_job.src_stmt_idx,
src_ins_addr=pending_job.src_ins_addr,
)
self._insert_job(job)
self._top_task.jobs.append(job)
def _get_pending_job(self, func_addr):
pending_ret_key = None
k: BlockID
for k in self._pending_returns:
if k.func_addr == func_addr:
pending_ret_key = k
break
return pending_ret_key
def _get_block_addr(self, b): # pylint:disable=R0201
if isinstance(b, SimSuccessors):
return b.addr
else:
raise TypeError("Unsupported block type %s" % type(b))
def _get_nx_paths(self, begin, end):
"""
Get the possible (networkx) simple paths between two nodes or addresses
corresponding to nodes.
Input: addresses or node instances
Return: a list of lists of nodes representing paths.
"""
if type(begin) is int and type(end) is int: # pylint:disable=unidiomatic-typecheck
n_begin = self.get_any_node(begin)
n_end = self.get_any_node(end)
elif isinstance(begin, VFGNode) and isinstance(end, VFGNode): # pylint:disable=unidiomatic-typecheck
n_begin = begin
n_end = end
else:
raise AngrVFGError("from and to should be of the same type")
return networkx.all_simple_paths(self.graph, n_begin, n_end) # type: ignore
def _merge_points(self, function_address):
"""
Return the ordered merge points for a specific function.
:param int function_address: Address of the querying function.
:return: A list of sorted merge points (addresses).
:rtype: list
"""
# we are entering a new function. now it's time to figure out how to optimally traverse the control flow
# graph by generating the sorted merge points
try:
new_function = self.kb.functions[function_address]
except KeyError:
# the function does not exist
return []
if function_address not in self._function_merge_points:
ordered_merge_points = GraphUtils.find_merge_points(
function_address, new_function.endpoints, new_function.graph
)
self._function_merge_points[function_address] = ordered_merge_points
return self._function_merge_points[function_address]
def _widening_points(self, function_address):
"""
Return the ordered widening points for a specific function.
:param int function_address: Address of the querying function.
:return: A list of sorted merge points (addresses).
:rtype: list
"""
# we are entering a new function. now it's time to figure out how to optimally traverse the control flow
# graph by generating the sorted merge points
try:
new_function = self.kb.functions[function_address]
except KeyError:
# the function does not exist
return []
if function_address not in self._function_widening_points:
if not new_function.normalized:
new_function.normalize()
widening_points = GraphUtils.find_widening_points(
function_address, new_function.endpoints, new_function.graph
)
self._function_widening_points[function_address] = widening_points
return self._function_widening_points[function_address]
def _ordered_node_addrs(self, function_address):
"""
For a given function, return all nodes in an optimal traversal order. If the function does not exist, return an
empty list.
:param int function_address: Address of the function.
:return: A ordered list of the nodes.
:rtype: list
"""
try:
function = self.kb.functions[function_address]
except KeyError:
# the function does not exist
return []
if function_address not in self._function_node_addrs:
sorted_nodes = GraphUtils.quasi_topological_sort_nodes(function.graph)
self._function_node_addrs[function_address] = [n.addr for n in sorted_nodes]
return self._function_node_addrs[function_address]
AnalysesHub.register_default("VFG", VFG)