Source code for angr.analyses.decompiler.region_identifier

from itertools import count
from collections import defaultdict
import logging
from typing import List, Optional, Union

import networkx

import ailment
from ailment import Block
from ailment.statement import ConditionalJump, Jump
from ailment.expression import Const

from angr.utils.graph import GraphUtils
from ...utils.graph import dfs_back_edges, subgraph_between_nodes, dominates, shallow_reverse
from .. import Analysis, register_analysis
from .structuring.structurer_nodes import MultiNode, ConditionNode, IncompleteSwitchCaseHeadStatement
from .graph_region import GraphRegion
from .condition_processor import ConditionProcessor
from .utils import replace_last_statement, first_nonlabel_statement

l = logging.getLogger(name=__name__)


# an ever-incrementing counter
CONDITIONNODE_ADDR = count(0xFF000000)


[docs]class RegionIdentifier(Analysis): """ Identifies regions within a function. """
[docs] def __init__( self, func, cond_proc=None, graph=None, largest_successor_tree_outside_loop=True, force_loop_single_exit=True, complete_successors=False, ): self.function = func self.cond_proc = ( cond_proc if cond_proc is not None else ConditionProcessor( self.project.arch if getattr(self, "project", None) is not None else None # it's only None in test cases ) ) self._graph = graph if graph is not None else self.function.graph self.region = None self._start_node = None self._loop_headers: Optional[List] = None self.regions_by_block_addrs = [] self._largest_successor_tree_outside_loop = largest_successor_tree_outside_loop self._force_loop_single_exit = force_loop_single_exit self._complete_successors = complete_successors self._analyze()
[docs] @staticmethod def slice_graph(graph, node, frontier, include_frontier=False): """ Generate a slice of the graph from the head node to the given frontier. :param networkx.DiGraph graph: The graph to work on. :param node: The starting node in the graph. :param frontier: A list of frontier nodes. :param bool include_frontier: Whether the frontier nodes are included in the slice or not. :return: A subgraph. :rtype: networkx.DiGraph """ subgraph = subgraph_between_nodes(graph, node, frontier, include_frontier=include_frontier) if not list(subgraph.nodes): # HACK: FIXME: for infinite loop nodes, this would return an empty set, so we include the loop body itself # Make sure this makes sense (EDG thinks it does) if (node, node) in graph.edges: subgraph.add_edge(node, node) return subgraph
def _analyze(self): # make a copy of the graph graph = networkx.DiGraph(self._graph) # preprocess: make it a super graph self._make_supergraph(graph) self._start_node = self._get_start_node(graph) # preprocess: find loop headers self._loop_headers = self._find_loop_headers(graph) self.region = self._make_regions(graph) # make regions into block address lists self.regions_by_block_addrs = self._make_regions_by_block_addrs() def _make_regions_by_block_addrs(self) -> List[List[int]]: """ Creates a list of addr lists representing each region without recursion. A single region is defined as a set of only blocks, no Graphs containing nested regions. The list contains the address of each block in the region, including the heads of each recursive region. @return: List of addr lists """ work_list = [self.region] block_only_regions = [] seen_regions = set() while work_list: children_regions = [] for region in work_list: children_blocks = [] for node in region.graph.nodes: if isinstance(node, Block): children_blocks.append(node.addr) elif isinstance(node, MultiNode): children_blocks += [n.addr for n in node.nodes] elif isinstance(node, GraphRegion): if node not in seen_regions: children_regions.append(node) children_blocks.append(node.head.addr) seen_regions.add(node) else: continue if children_blocks: block_only_regions.append(children_blocks) work_list = children_regions return block_only_regions def _get_start_node(self, graph: networkx.DiGraph): try: return next(n for n in graph.nodes() if graph.in_degree(n) == 0) except StopIteration: pass try: return next(n for n in graph.nodes() if n.addr == self.function.addr) except StopIteration as ex: raise RuntimeError("Cannot find the start node from the graph!") from ex def _test_reducibility(self): # make a copy of the graph graph = networkx.DiGraph(self._graph) # preprocess: make it a super graph self._make_supergraph(graph) while True: changed = False # find a node with a back-edge, remove the edge (deleting the loop), and replace it with a MultiNode changed |= self._remove_self_loop(graph) # find a node that has only one predecessor, and merge it with its predecessor (replace them with a # MultiNode) changed |= self._merge_single_entry_node(graph) if not changed: # a fixed-point is reached break # Flow graph reducibility, Hecht and Ullman if len(graph.nodes) == 1: return True return False def _make_supergraph(self, graph: networkx.DiGraph): while True: for src, dst, data in graph.edges(data=True): type_ = data.get("type", None) if type_ == "fake_return": if len(list(graph.successors(src))) == 1 and len(list(graph.predecessors(dst))) == 1: self._merge_nodes(graph, src, dst, force_multinode=True) break elif type_ == "call": graph.remove_node(dst) break else: break def _find_loop_headers(self, graph: networkx.DiGraph) -> List: heads = {t for _, t in dfs_back_edges(graph, self._start_node)} return GraphUtils.quasi_topological_sort_nodes(graph, heads) def _find_initial_loop_nodes(self, graph: networkx.DiGraph, head): # TODO optimize latching_nodes = {s for s, t in dfs_back_edges(graph, self._start_node) if t == head} loop_subgraph = self.slice_graph(graph, head, latching_nodes, include_frontier=True) # special case: any node with more than two non-self successors are probably the head of a switch-case. we # should include all successors into the loop subgraph. while True: updated = False for node in list(loop_subgraph): nonself_successors = [succ for succ in graph.successors(node) if succ is not node] if len(nonself_successors) > 2: for succ in nonself_successors: if not loop_subgraph.has_edge(node, succ): updated = True loop_subgraph.add_edge(node, succ) if not updated: break nodes = set(loop_subgraph) return nodes def _refine_loop(self, graph: networkx.DiGraph, head, initial_loop_nodes, initial_exit_nodes): if len(initial_exit_nodes) <= 1: return initial_loop_nodes, initial_exit_nodes refined_loop_nodes = initial_loop_nodes.copy() refined_exit_nodes = initial_exit_nodes.copy() # simple optimization: include all single-in-degree successors of existing loop nodes while True: added = set() for exit_node in list(refined_exit_nodes): if graph.in_degree[exit_node] == 1 and graph.out_degree[exit_node] <= 1: added.add(exit_node) refined_loop_nodes.add(exit_node) refined_exit_nodes |= { succ for succ in graph.successors(exit_node) if succ not in refined_loop_nodes } refined_exit_nodes.remove(exit_node) if not added: break if len(refined_exit_nodes) <= 1: return refined_loop_nodes, refined_exit_nodes idom = networkx.immediate_dominators(graph, head) new_exit_nodes = refined_exit_nodes # a graph with only initial exit nodes and new loop nodes that are reachable from at least one initial exit # node. subgraph = networkx.DiGraph() sorted_refined_exit_nodes = GraphUtils.quasi_topological_sort_nodes(graph, refined_exit_nodes) while len(sorted_refined_exit_nodes) > 1 and new_exit_nodes: # visit each node in refined_exit_nodes once and determine which nodes to consider as loop nodes candidate_nodes = {} for n in list(sorted_refined_exit_nodes): if all((pred is n or pred in refined_loop_nodes) for pred in graph.predecessors(n)) and dominates( idom, head, n ): to_add = set(graph.successors(n)) - refined_loop_nodes candidate_nodes[n] = to_add # visit all candidate nodes and only consider candidates that will not be added as exit nodes all_new_exit_candidates = set() for new_exit_candidates in candidate_nodes.values(): all_new_exit_candidates |= new_exit_candidates # to guarantee progressing, we must ensure all_new_exit_candidates cannot contain all candidate nodes if all(n in all_new_exit_candidates for n in candidate_nodes): all_new_exit_candidates = set() # do the actual work new_exit_nodes = set() for n in candidate_nodes: if n in all_new_exit_candidates: continue refined_loop_nodes.add(n) sorted_refined_exit_nodes.remove(n) to_add = set(graph.successors(n)) - refined_loop_nodes new_exit_nodes |= to_add for succ in to_add: subgraph.add_edge(n, succ) sorted_refined_exit_nodes += list(new_exit_nodes) sorted_refined_exit_nodes = list(set(sorted_refined_exit_nodes)) sorted_refined_exit_nodes = GraphUtils.quasi_topological_sort_nodes(graph, sorted_refined_exit_nodes) refined_exit_nodes = set(sorted_refined_exit_nodes) refined_loop_nodes = refined_loop_nodes - refined_exit_nodes if self._largest_successor_tree_outside_loop and not refined_exit_nodes: # figure out the new successor tree with the highest number of nodes initial_exit_to_newnodes = defaultdict(set) newnode_to_initial_exits = defaultdict(set) for initial_exit in initial_exit_nodes: if initial_exit in subgraph: for _, succs in networkx.bfs_successors(subgraph, initial_exit): initial_exit_to_newnodes[initial_exit] |= set(succs) for succ in succs: newnode_to_initial_exits[succ].add(initial_exit) for newnode, exits in newnode_to_initial_exits.items(): for exit_ in exits: initial_exit_to_newnodes[exit_].add(newnode) # filter initial_exit_to_newnodes and remove the subtrees with nodes that are reachable from nodes that are # outside the current subtree for initial_exit, subtree in list(initial_exit_to_newnodes.items()): subtree_preds = set() for node in subtree: preds = set(graph.predecessors(node)) subtree_preds |= {pred for pred in preds if pred not in subtree} if len(subtree_preds) > 1: # early break break if len(subtree_preds) > 1: # there is more than one out-of-tree predecessor. remove this subtree del initial_exit_to_newnodes[initial_exit] if initial_exit_to_newnodes: tree_sizes = {exit_: len(initial_exit_to_newnodes[exit_]) for exit_ in initial_exit_to_newnodes} max_tree_size = max(tree_sizes.values()) if list(tree_sizes.values()).count(max_tree_size) == 1: tree_size_to_exit = {v: k for k, v in tree_sizes.items()} max_size_exit = tree_size_to_exit[max_tree_size] if all(len(newnode_to_initial_exits[nn]) == 1 for nn in initial_exit_to_newnodes[max_size_exit]): refined_loop_nodes = ( refined_loop_nodes - initial_exit_to_newnodes[max_size_exit] - {max_size_exit} ) refined_exit_nodes.add(max_size_exit) return refined_loop_nodes, refined_exit_nodes def _remove_self_loop(self, graph: networkx.DiGraph): r = False while True: for node in graph.nodes(): if node in graph[node]: # found a self loop self._remove_node(graph, node) r = True break else: break return r def _merge_single_entry_node(self, graph: networkx.DiGraph): r = False while True: for node in networkx.dfs_postorder_nodes(graph): preds = graph.predecessors(node) if len(preds) == 1: # merge the two nodes self._absorb_node(graph, preds[0], node) r = True break else: break return r def _make_regions(self, graph: networkx.DiGraph): structured_loop_headers = set() new_regions = [] # FIXME: _get_start_node() will fail if the graph is just a loop # Find all loops while True: restart = False self._start_node = self._get_start_node(graph) # Start from loops for node in list(reversed(self._loop_headers)): if node in structured_loop_headers: continue if node not in graph: continue region = self._make_cyclic_region(node, graph) if region is None: # failed to struct the loop region - remove the header node from loop headers l.debug( "Failed to structure a loop region starting at %#x. Remove it from loop headers.", node.addr ) self._loop_headers.remove(node) else: l.debug("Structured a loop region %r.", region) new_regions.append(region) structured_loop_headers.add(node) restart = True break if restart: continue break new_regions.append(GraphRegion(self._get_start_node(graph), graph, None, None, False, None)) l.debug("Identified %d loop regions.", len(structured_loop_headers)) l.debug("No more loops left. Start structuring acyclic regions.") # No more loops left. Structure acyclic regions. while new_regions: region = new_regions.pop(0) head = region.head subgraph = region.graph failed_region_attempts = set() while self._make_acyclic_region( head, subgraph, region.graph_with_successors, failed_region_attempts, region.cyclic ): if head not in subgraph: # update head head = next(iter(n for n in subgraph.nodes() if n.addr == head.addr)) head = next(iter(n for n in subgraph.nodes() if n.addr == head.addr)) region.head = head if len(graph.nodes()) == 1 and isinstance(list(graph.nodes())[0], GraphRegion): return list(graph.nodes())[0] # create a large graph region new_head = self._get_start_node(graph) region = GraphRegion(new_head, graph, None, None, False, None) return region # # Cyclic regions # def _make_cyclic_region(self, head, graph: networkx.DiGraph): l.debug("Found cyclic region at %#08x", head.addr) initial_loop_nodes = self._find_initial_loop_nodes(graph, head) l.debug("Initial loop nodes %s", self._dbg_block_list(initial_loop_nodes)) # Make sure no other loops are contained in the current loop if {n for n in initial_loop_nodes if n.addr != head.addr}.intersection(self._loop_headers): return None normal_entries = {n for n in graph.predecessors(head) if n not in initial_loop_nodes} abnormal_entries = set() for n in initial_loop_nodes: if n == head: continue preds = set(graph.predecessors(n)) abnormal_entries |= preds - initial_loop_nodes l.debug("Normal entries %s", self._dbg_block_list(normal_entries)) l.debug("Abnormal entries %s", self._dbg_block_list(abnormal_entries)) initial_exit_nodes = set() for n in initial_loop_nodes: succs = set(graph.successors(n)) initial_exit_nodes |= succs - initial_loop_nodes l.debug("Initial exit nodes %s", self._dbg_block_list(initial_exit_nodes)) refined_loop_nodes, refined_exit_nodes = self._refine_loop(graph, head, initial_loop_nodes, initial_exit_nodes) l.debug("Refined loop nodes %s", self._dbg_block_list(refined_loop_nodes)) l.debug("Refined exit nodes %s", self._dbg_block_list(refined_exit_nodes)) # make sure there is a jump statement to the outside at the end of each node going to exit nodes. # this jump statement will be rewritten to a break statement during structuring. for exit_node in refined_exit_nodes: for pred in graph.predecessors(exit_node): if pred in refined_loop_nodes: self._ensure_jump_at_loop_exit_ends(pred) if len(refined_exit_nodes) > 1: # self._get_start_node(graph) node_post_order = list(networkx.dfs_postorder_nodes(graph, head)) sorted_exit_nodes = sorted(list(refined_exit_nodes), key=node_post_order.index) normal_exit_node = sorted_exit_nodes[0] abnormal_exit_nodes = set(sorted_exit_nodes[1:]) else: normal_exit_node = next(iter(refined_exit_nodes)) if len(refined_exit_nodes) > 0 else None abnormal_exit_nodes = set() region = self._abstract_cyclic_region( graph, refined_loop_nodes, head, normal_entries, abnormal_entries, normal_exit_node, abnormal_exit_nodes ) if len(region.successors) > 1 and self._force_loop_single_exit: # multi-successor region. refinement is required self._refine_loop_successors(region, graph) return region def _refine_loop_successors(self, region, graph: networkx.DiGraph): """ If there are multiple successors of a loop, convert them into conditional gotos. Eventually there should be only one loop successor. :param GraphRegion region: The cyclic region to refine. :param networkx.DiGraph graph: The current graph that is being structured. :return: None """ if len(region.successors) <= 1: return # recover reaching conditions self.cond_proc.recover_reaching_conditions(region, with_successors=True) successors = list(region.successors) condnode_addr = next(CONDITIONNODE_ADDR) # create a new successor cond = ConditionNode( condnode_addr, None, self.cond_proc.reaching_conditions[successors[0]], successors[0], false_node=None, ) for succ in successors[1:]: cond = ConditionNode( condnode_addr, None, self.cond_proc.reaching_conditions[succ], succ, false_node=cond, ) g = region.graph_with_successors # modify region in place region.successors = {cond} for succ in successors: for src, _, data in list(g.in_edges(succ, data=True)): removed_edges = [] for src2src, _, data_ in list(g.in_edges(src, data=True)): removed_edges.append((src2src, src, data_)) g.remove_edge(src2src, src) g.remove_edge(src, succ) # TODO: rewrite the conditional jumps in src so that it goes to cond-node instead. # modify the last statement of src so that it jumps to cond replaced_any_stmt = False last_stmts = self.cond_proc.get_last_statements(src) for last_stmt in last_stmts: if isinstance(last_stmt, ConditionalJump): if ( isinstance(last_stmt.true_target, ailment.Expr.Const) and last_stmt.true_target.value == succ.addr ): new_last_stmt = ConditionalJump( last_stmt.idx, last_stmt.condition, ailment.Expr.Const(None, None, condnode_addr, self.project.arch.bits), last_stmt.false_target, ins_addr=last_stmt.ins_addr, ) elif ( isinstance(last_stmt.false_target, ailment.Expr.Const) and last_stmt.false_target.value == succ.addr ): new_last_stmt = ConditionalJump( last_stmt.idx, last_stmt.condition, last_stmt.true_target, ailment.Expr.Const(None, None, condnode_addr, self.project.arch.bits), ins_addr=last_stmt.ins_addr, ) else: # none of the two branches is jumping out of the loop continue elif isinstance(last_stmt, Jump): if isinstance(last_stmt.target, ailment.Expr.Const): new_last_stmt = Jump( last_stmt.idx, ailment.Expr.Const(None, None, condnode_addr, self.project.arch.bits), ins_addr=last_stmt.ins_addr, ) else: # an indirect jump - might be a jump table. ignore it continue else: l.error("Unexpected last_stmt type %s. Ignore.", type(last_stmt)) continue replace_last_statement(src, last_stmt, new_last_stmt) replaced_any_stmt = True if not replaced_any_stmt: l.warning("No statement was replaced. Is there anything wrong?") # raise Exception() # add src back for src2src, _, data_ in removed_edges: g.add_edge(src2src, src, **data_) g.add_edge(src, cond, **data) # modify graph graph.add_edge(region, cond) for succ in successors: edge_data = graph.get_edge_data(region, succ) graph.remove_edge(region, succ) graph.add_edge(cond, succ, **edge_data) # # Acyclic regions # def _make_acyclic_region(self, head, graph: networkx.DiGraph, secondary_graph, failed_region_attempts, cyclic): # pre-processing # we need to create a copy of the original graph if # - there are in edges to the head node, or # - there are more than one end nodes head_inedges = list(graph.in_edges(head)) if head_inedges: # we need a copy of the graph to remove edges coming into the head graph_copy = networkx.DiGraph(graph) # remove any in-edge to the head node for src, _ in head_inedges: graph_copy.remove_edge(src, head) else: graph_copy = graph endnodes = [node for node in graph_copy.nodes() if graph_copy.out_degree(node) == 0] if len(endnodes) == 0: # sanity check: there should be at least one end node l.critical("No end node is found in a supposedly acyclic graph. Is it really acyclic?") return False add_dummy_endnode = False if len(endnodes) > 1: # if this graph has multiple end nodes: create a single end node add_dummy_endnode = True elif head_inedges and len(endnodes) == 1 and endnodes[0] not in list(graph.predecessors(head)): # special case: there are in-edges to head, but the only end node is not a predecessor to head. # in this case, we will want to put the end node and a predecessor of the head into the same region. add_dummy_endnode = True if add_dummy_endnode: # we need a copy of the graph! graph_copy = networkx.DiGraph(graph_copy) dummy_endnode = "DUMMY_ENDNODE" for endnode in endnodes: graph_copy.add_edge(endnode, dummy_endnode) endnodes = [dummy_endnode] else: dummy_endnode = None # compute dominator tree doms = networkx.immediate_dominators(graph_copy, head) # compute post-dominator tree inverted_graph = shallow_reverse(graph_copy) postdoms = networkx.immediate_dominators(inverted_graph, endnodes[0]) # dominance frontiers df = networkx.algorithms.dominance_frontiers(graph_copy, head) # visit the nodes in post-order for node in networkx.dfs_postorder_nodes(graph_copy, source=head): if node is dummy_endnode: # skip the dummy endnode continue if cyclic and node is head: continue out_degree = graph_copy.out_degree[node] if out_degree == 0: # the root element of the region hierarchy should always be a GraphRegion, # so we transform it into one, if necessary if graph_copy.in_degree(node) == 0 and not isinstance(node, GraphRegion): subgraph = networkx.DiGraph() subgraph.add_node(node) self._abstract_acyclic_region( graph, GraphRegion(node, subgraph, None, None, False, None, cyclic_ancestor=cyclic), [], secondary_graph=secondary_graph, ) continue # test if this node is an entry to a single-entry, single-successor region levels = 0 postdom_node = postdoms.get(node, None) while postdom_node is not None: if (node, postdom_node) not in failed_region_attempts: if self._check_region(graph_copy, node, postdom_node, doms, df): frontier = [postdom_node] region = self._compute_region( graph_copy, node, frontier, dummy_endnode=dummy_endnode, cyclic_ancestor=cyclic ) if region is not None: # update region.graph_with_successors if secondary_graph is not None: if self._complete_successors: for nn in list(region.graph_with_successors.nodes): original_successors = secondary_graph.successors(nn) for succ in original_successors: if not region.graph_with_successors.has_edge(nn, succ): region.graph_with_successors.add_edge(nn, succ) region.successors.add(succ) else: for nn in list(region.graph_with_successors.nodes): original_successors = secondary_graph.successors(nn) for succ in original_successors: if succ not in graph_copy: # the successor wasn't added to the graph because it does not belong # to the frontier. we backpatch the successor graph here. region.graph_with_successors.add_edge(nn, succ) region.successors.add(succ) # l.debug("Walked back %d levels in postdom tree.", levels) l.debug("Node %r, frontier %r.", node, frontier) # l.debug("Identified an acyclic region %s.", self._dbg_block_list(region.graph.nodes())) self._abstract_acyclic_region( graph, region, frontier, dummy_endnode=dummy_endnode, secondary_graph=secondary_graph ) # assert dummy_endnode not in graph return True failed_region_attempts.add((node, postdom_node)) if not dominates(doms, node, postdom_node): break if postdom_node is postdoms.get(postdom_node, None): break postdom_node = postdoms.get(postdom_node, None) levels += 1 # l.debug("Walked back %d levels in postdom tree and did not find anything for %r. Next.", levels, node) return False @staticmethod def _check_region(graph, start_node, end_node, doms, df): """ :param graph: :param start_node: :param end_node: :param doms: :param df: :return: """ # if the exit node is the header of a loop that contains the start node, the dominance frontier should only # contain the exit node. if not dominates(doms, start_node, end_node): frontier = df.get(start_node, set()) for node in frontier: if node is not start_node and node is not end_node: return False # no edges should enter the region. for node in df.get(end_node, set()): if dominates(doms, start_node, node) and node is not end_node: return False # no edges should leave the region. for node in df.get(start_node, set()): if node is start_node or node is end_node: continue if node not in df.get(end_node, set()): return False for pred in graph.predecessors(node): if dominates(doms, start_node, pred) and not dominates(doms, end_node, pred): return False return True @staticmethod def _compute_region(graph, node, frontier, include_frontier=False, dummy_endnode=None, cyclic_ancestor=False): subgraph = networkx.DiGraph() frontier_edges = [] queue = [node] traversed = set() while queue: node_ = queue.pop() if node_ in frontier: continue traversed.add(node_) subgraph.add_node(node_) for succ in graph.successors(node_): edge_data = graph.get_edge_data(node_, succ) if node_ in frontier and succ in traversed: if include_frontier: # if frontier nodes are included, do not keep traversing their successors # however, if it has an edge to an already traversed node, we should add that edge subgraph.add_edge(node_, succ, **edge_data) else: frontier_edges.append((node_, succ, edge_data)) continue if succ is dummy_endnode: continue if succ in frontier: if not include_frontier: # skip all frontier nodes frontier_edges.append((node_, succ, edge_data)) continue subgraph.add_edge(node_, succ, **edge_data) if succ in traversed: continue queue.append(succ) if dummy_endnode is not None: frontier = {n for n in frontier if n is not dummy_endnode} if subgraph.number_of_nodes() > 1: subgraph_with_frontier = networkx.DiGraph(subgraph) for src, dst, edge_data in frontier_edges: if dst is not dummy_endnode: subgraph_with_frontier.add_edge(src, dst, **edge_data) # assert dummy_endnode not in frontier # assert dummy_endnode not in subgraph_with_frontier return GraphRegion( node, subgraph, frontier, subgraph_with_frontier, False, None, cyclic_ancestor=cyclic_ancestor ) else: return None def _abstract_acyclic_region( self, graph: networkx.DiGraph, region, frontier, dummy_endnode=None, secondary_graph=None ): in_edges = self._region_in_edges(graph, region, data=True) out_edges = self._region_out_edges(graph, region, data=True) nodes_set = set() for node_ in list(region.graph.nodes()): nodes_set.add(node_) if node_ is not dummy_endnode: graph.remove_node(node_) graph.add_node(region) for src, _, data in in_edges: if src not in nodes_set: graph.add_edge(src, region, **data) for _, dst, data in out_edges: if dst not in nodes_set: graph.add_edge(region, dst, **data) if frontier: for frontier_node in frontier: if frontier_node is not dummy_endnode: graph.add_edge(region, frontier_node) if secondary_graph is not None: self._abstract_acyclic_region(secondary_graph, region, {}) @staticmethod def _abstract_cyclic_region( graph: networkx.DiGraph, loop_nodes, head, normal_entries, abnormal_entries, normal_exit_node, abnormal_exit_nodes, ): region = GraphRegion(head, None, None, None, True, None) subgraph = networkx.DiGraph() region_outedges = [] delayed_edges = [] full_graph = networkx.DiGraph() for node in loop_nodes: subgraph.add_node(node) in_edges = list(graph.in_edges(node, data=True)) out_edges = list(graph.out_edges(node, data=True)) for src, dst, data in in_edges: full_graph.add_edge(src, dst, **data) if src in loop_nodes: subgraph.add_edge(src, dst, **data) elif src == region: subgraph.add_edge(head, dst, **data) elif src in normal_entries: # graph.add_edge(src, region, **data) delayed_edges.append((src, region, data)) elif src in abnormal_entries: data["region_dst_node"] = dst # graph.add_edge(src, region, **data) delayed_edges.append((src, region, data)) else: assert 0 for src, dst, data in out_edges: full_graph.add_edge(src, dst, **data) if dst in loop_nodes: subgraph.add_edge(src, dst, **data) elif dst == region: subgraph.add_edge(src, head, **data) elif dst == normal_exit_node: region_outedges.append((node, dst)) # graph.add_edge(region, dst, **data) delayed_edges.append((region, dst, data)) elif dst in abnormal_exit_nodes: region_outedges.append((node, dst)) # data['region_src_node'] = src # graph.add_edge(region, dst, **data) delayed_edges.append((region, dst, data)) else: assert 0 subgraph_with_exits = networkx.DiGraph(subgraph) for src, dst in region_outedges: subgraph_with_exits.add_edge(src, dst) region.graph = subgraph region.graph_with_successors = subgraph_with_exits if normal_exit_node is not None: region.successors = [normal_exit_node] else: region.successors = [] region.successors += list(abnormal_exit_nodes) for node in loop_nodes: graph.remove_node(node) # add delayed edges graph.add_node(region) for src, dst, data in delayed_edges: graph.add_edge(src, dst, **data) region.full_graph = full_graph return region @staticmethod def _region_in_edges(graph, region, data=False): return list(graph.in_edges(region.head, data=data)) @staticmethod def _region_out_edges(graph, region, data=False): out_edges = [] for node in region.graph.nodes(): out_ = graph.out_edges(node, data=data) for _, dst, data_ in out_: if dst in region.graph: continue out_edges.append((region, dst, data_)) return out_edges def _remove_node(self, graph: networkx.DiGraph, node): # pylint:disable=no-self-use in_edges = [(src, dst, data) for (src, dst, data) in graph.in_edges(node, data=True) if src is not node] out_edges = [(src, dst, data) for (src, dst, data) in graph.out_edges(node, data=True) if dst is not node] if len(in_edges) <= 1 and len(out_edges) <= 1: # it forms a region by itself :-) new_node = None else: new_node = MultiNode([node]) graph.remove_node(node) if new_node is not None: for src, _, data in in_edges: graph.add_edge(src, new_node, **data) for _, dst, data in out_edges: graph.add_edge(new_node, dst, **data) def _merge_nodes( self, graph: networkx.DiGraph, node_a, node_b, force_multinode=False ): # pylint:disable=no-self-use in_edges = list(graph.in_edges(node_a, data=True)) out_edges = list(graph.out_edges(node_b, data=True)) if not force_multinode and len(in_edges) <= 1 and len(out_edges) <= 1: # it forms a region by itself :-) new_node = None else: new_node = MultiNode([node_a, node_b]) graph.remove_node(node_a) graph.remove_node(node_b) if new_node is not None: graph.add_node(new_node) for src, _, data in in_edges: if src is node_b: src = new_node graph.add_edge(src, new_node, **data) for _, dst, data in out_edges: if dst is node_a: dst = new_node graph.add_edge(new_node, dst, **data) assert node_a not in graph assert node_b not in graph def _absorb_node( self, graph: networkx.DiGraph, node_mommy, node_kiddie, force_multinode=False ): # pylint:disable=no-self-use in_edges_mommy = graph.in_edges(node_mommy, data=True) out_edges_mommy = graph.out_edges(node_mommy, data=True) out_edges_kiddie = graph.out_edges(node_kiddie, data=True) if not force_multinode and len(in_edges_mommy) <= 1 and len(out_edges_kiddie) <= 1: # it forms a region by itself :-) new_node = None else: new_node = MultiNode([node_mommy, node_kiddie]) graph.remove_node(node_mommy) graph.remove_node(node_kiddie) if new_node is not None: graph.add_node(new_node) for src, _, data in in_edges_mommy: if src == node_kiddie: src = new_node graph.add_edge(src, new_node, **data) for _, dst, data in out_edges_mommy: if dst == node_kiddie: continue if dst == node_mommy: dst = new_node graph.add_edge(new_node, dst, **data) for _, dst, data in out_edges_kiddie: if dst == node_mommy: dst = new_node graph.add_edge(new_node, dst, **data) assert node_mommy not in graph assert node_kiddie not in graph def _ensure_jump_at_loop_exit_ends(self, node: Union[Block, MultiNode]) -> None: if isinstance(node, Block): if not node.statements: node.statements.append( Jump( None, Const(None, None, node.addr + node.original_size, self.project.arch.bits), ins_addr=node.addr, ) ) else: if not isinstance(first_nonlabel_statement(node), ConditionalJump) and not isinstance( node.statements[-1], ( Jump, ConditionalJump, IncompleteSwitchCaseHeadStatement, ), ): node.statements.append( Jump( None, Const(None, None, node.addr + node.original_size, self.project.arch.bits), ins_addr=node.addr, ) ) elif isinstance(node, MultiNode): if node.nodes: self._ensure_jump_at_loop_exit_ends(node.nodes[-1]) @staticmethod def _dbg_block_list(blocks): return [(hex(b.addr) if hasattr(b, "addr") else repr(b)) for b in blocks]
register_analysis(RegionIdentifier, "RegionIdentifier")