Source code for angr.analyses.cdg

from typing import Optional

import logging

import networkx

from ..utils.graph import compute_dominance_frontier, PostDominators, TemporaryNode
from . import Analysis

_l = logging.getLogger(name=__name__)


[docs]class CDG(Analysis):
    """
    Implements a control dependence graph.
    """

[docs]    def __init__(self, cfg, start=None, no_construct=False):
        """
        Constructor.

        :param cfg:             The control flow graph upon which this control dependence graph will build
        :param start:           The starting point to begin constructing the control dependence graph
        :param no_construct:    Skip the construction step. Only used in unit-testing.
        """
        self._start = start if start is not None else self.project.entry
        self._cfg = cfg

        self._ancestor = None
        self._semi = None
        self._post_dom: Optional[networkx.DiGraph] = None

        self._graph: Optional[networkx.DiGraph] = None
        self._normalized_cfg = None

        if not no_construct:
            if self._cfg is None:
                # This leads to import cycles otherwise
                # pylint: disable=import-outside-toplevel
                from angr.analyses.cfg.cfg_emulated import CFGEmulated

                self._cfg = self.project.analyses[CFGEmulated].prep()()

            # FIXME: We should not use get_any_irsb in such a real setting...
            self._entry = self._cfg.model.get_any_node(self._start)

            self._construct()

    #
    # Properties
    #

    @property
    def graph(self):
        return self._graph

    #
    # Public methods
    #

[docs]    def get_post_dominators(self):
        """
        Return the post-dom tree
        """
        return self._post_dom

[docs]    def get_dependants(self, run):
        """
        Return a list of nodes that are control dependent on the given node in the control dependence graph
        """
        if run in self._graph.nodes():
            return list(self._graph.successors(run))
        else:
            return []

[docs]    def get_guardians(self, run):
        """
        Return a list of nodes on whom the specific node is control dependent in the control dependence graph
        """
        if run in self._graph.nodes():
            return list(self._graph.predecessors(run))
        else:
            return []

    #
    # Private methods
    #

    def _construct(self):
        """
        Construct a control dependence graph.

        This implementation is based on figure 6 of paper An Efficient Method of Computing Static Single Assignment
        Form by Ron Cytron, etc.
        """

        if not self._cfg._model.ident.startswith("CFGEmulated"):
            raise ValueError("CDG is only supported by CFGEmulated.")

        self._acyclic_cfg = self._cfg.copy()
        # TODO: Cycle-removing is not needed - confirm it later
        # The CFG we use should be acyclic!
        # self._acyclic_cfg.remove_cycles()

        # Pre-process the acyclic CFG
        self._pre_process_cfg()

        # Construct post-dominator tree
        self._pd_construct()

        self._graph: networkx.DiGraph = networkx.DiGraph()

        # Construct the reversed dominance frontier mapping
        rdf = compute_dominance_frontier(self._normalized_cfg, self._post_dom)

        for y in self._cfg.graph.nodes():
            if y not in rdf:
                continue
            for x in rdf[y]:
                self._graph.add_edge(x, y)

        # self._post_process()

    def _pre_process_cfg(self):
        """
        Pre-process the acyclic CFG.
        - Change all FakeRet edges to normal edges when necessary (e.g. the normal/expected return edge does not exist)
        """
        for _, dst, data in self._acyclic_cfg.graph.edges(data=True):
            if "jumpkind" in data and data["jumpkind"] == "Ijk_FakeRet":
                all_edges_to_dst = self._acyclic_cfg.graph.in_edges([dst], data=True)
                if not any((s, d) for s, d, da in all_edges_to_dst if da["jumpkind"] != "Ijk_FakeRet"):
                    # All in edges are FakeRets
                    # Change them to a normal edge
                    for _, _, data_ in all_edges_to_dst:
                        data_["jumpkind"] = "Ijk_Boring"

    def _post_process(self):
        """
        There are cases where a loop has two overlapping loop headers thanks
        to the way VEX is dealing with continuous instructions. As we were
        breaking the connection between the second loop header and its
        successor, we shall restore them in our CDG.
        """
        # TODO: Verify its correctness
        loop_back_edges = self._cfg.get_loop_back_edges()
        for b1, b2 in loop_back_edges:
            self._graph.add_edge(b1, b2)

    #
    # Post-dominator tree related
    #

    def _pd_construct(self):
        pdoms = PostDominators(self._acyclic_cfg, self._entry, successors_func=self._pd_graph_successors)

        self._post_dom = pdoms.post_dom

        self._pd_post_process(self._acyclic_cfg)

        # Create the normalized_cfg without the annoying ContainerNodes
        self._normalized_cfg = networkx.DiGraph()
        for src, dst in pdoms.prepared_graph.edges():
            self._normalized_cfg.add_edge(src.obj, dst.obj)

    @staticmethod
    def _pd_graph_successors(graph, node):
        if type(node) is TemporaryNode:
            # This is for testing
            successors = graph.graph.successors(node)
        else:
            # Real CFGNode!
            successors = graph.model.get_successors(node)

        return successors

    def _pd_post_process(self, cfg):
        """
        Take care of those loop headers/tails where we manually broke their
        connection to the next BBL
        """
        loop_back_edges = self._cfg.get_loop_back_edges()

        for b1, b2 in loop_back_edges:
            # The edge between b1 and b2 is manually broken
            # The post dominator of b1 should be b2 (or not?)

            successors = list(self._pd_graph_successors(cfg, b1))

            if len(successors) == 0:
                if b2 in self._post_dom:
                    self._post_dom.add_edge(b1, b2)
                else:
                    _l.debug("%s is not in post dominator dict.", b2)


from angr.analyses import AnalysesHub

AnalysesHub.register_default("CDG", CDG)