import logging
import time
from archinfo.arch_soot import ArchSoot, SootAddressDescriptor, SootMethodDescriptor
from cle.backends.backend import Backend
from cle.errors import CLEError
try:
import pysoot
from pysoot.lifter import Lifter
except ImportError:
pysoot = None
Lifter = None
log = logging.getLogger(name=__name__)
[docs]class Soot(Backend):
"""
The basis backend for lifting and loading bytecode from JARs and APKs to Soot IR.
Note that self.min_addr will be 0 and self.max_addr will be 1. Hopefully no other object will be mapped at address
0.
"""
[docs] def __init__(
self,
*args,
entry_point=None,
entry_point_params=(),
input_format=None,
additional_jars=None,
additional_jar_roots=None,
jni_libs_ld_path=None,
jni_libs=None,
android_sdk=None,
**kwargs,
):
if not pysoot:
raise ImportError("Cannot import PySoot. The Soot backend requires PySoot.")
if kwargs.get("has_memory", False):
raise CLEError('The parameter "has_memory" must be False for Soot backend.')
super().__init__(*args, has_memory=False, **kwargs)
if self.binary is None:
raise ValueError("Cannot use the Soot backend loading from a stream")
# load the classes
log.debug("Lifting to Soot IR ...")
start_time = time.time()
pysoot_lifter = Lifter(
self.binary,
input_format=input_format,
android_sdk=android_sdk,
additional_jars=additional_jars,
additional_jar_roots=additional_jar_roots,
)
end_time = time.time()
log.debug("Lifting completed in %ds", round(end_time - start_time, 2))
self._classes = pysoot_lifter.classes
# find entry method
if entry_point:
try:
ep_method = self.get_soot_method(entry_point, params=entry_point_params)
ep_method_descriptor = SootMethodDescriptor.from_soot_method(ep_method)
self._entry = SootAddressDescriptor(ep_method_descriptor, 0, 0)
log.debug("Entry point set to %s", self._entry)
except CLEError:
log.warning("Couldn't find entry point %s.", entry_point)
self._entry = None
self.os = "javavm"
self.rebase_addr = None
self.set_arch(ArchSoot())
if jni_libs:
# native libraries are getting loaded by adding them as a dependency of this object
self.deps += [jni_libs] if type(jni_libs) in (str, bytes) else jni_libs
# if available, add additional load path(s)
if jni_libs_ld_path:
path_list = [jni_libs_ld_path] if type(jni_libs_ld_path) in (str, bytes) else jni_libs_ld_path
self.extra_load_path += path_list
self.jni_support = True
else:
self.jni_support = False
@property
def max_addr(self):
# FIXME: This is a hack to satisfy checks elsewhere that max_addr must be greater than min_addr
return self.min_addr + 1
@property
def entry(self):
return self._entry
@property
def classes(self):
return self._classes
[docs] def get_soot_class(self, cls_name, none_if_missing=False):
"""
Get Soot class object.
:param str cls_name: Name of the class.
:return: The class object.
:rtype: pysoot.soot.SootClass
"""
try:
return self._classes[cls_name]
except KeyError:
if none_if_missing:
return None
else:
raise CLEError('Class "%s" does not exist.' % cls_name)
[docs] def get_soot_method(self, thing, class_name=None, params=(), none_if_missing=False):
"""
Get Soot method object.
:param thing: Descriptor or the method, or name of the method.
:param str class_name: Name of the class. If not specified, class name can be parsed from method_name.
:return: Soot method that satisfy the criteria.
"""
# Step 1: Parse input
if isinstance(thing, SootMethodDescriptor):
method_description = {
"class_name": thing.class_name,
"name": thing.name,
"params": thing.params,
}
elif isinstance(thing, (str, bytes)):
method_name = thing
# if class_name is not set, parse it from the method name
if class_name is None:
last_dot = method_name.rfind(".")
if last_dot >= 0:
class_name = method_name[:last_dot]
method_name = method_name[last_dot + 1 :]
else:
raise ValueError("Cannot parse class name from method %s." % method_name)
method_description = {
"class_name": class_name,
"name": method_name,
"params": params,
}
else:
raise TypeError('Unsupported type "%s" for the first argument.' % thing)
# Step 2: Load class containing the method
try:
cls = self.get_soot_class(method_description["class_name"])
except CLEError:
if none_if_missing:
return None
else:
raise
# Step 3: Get all methods matching the description
methods = [
soot_method
for soot_method in cls.methods
if self._description_matches_soot_method(soot_method, **method_description)
]
if not methods:
if none_if_missing:
return None
else:
raise CLEError(
"Method with description %s does not exist in class %s."
% (method_description, method_description["class_name"])
)
if len(methods) > 1:
# Warn if we found several matching methods
log.warning(
"Method with description %s is ambiguous in class %s.",
method_description,
method_description["class_name"],
)
return methods[0]
@staticmethod
def _description_matches_soot_method(soot_method, name=None, class_name=None, params=()):
if name and soot_method.name != name:
return False
if class_name and soot_method.class_name != class_name:
return False
if soot_method.params != params:
return False
return True
@property
def main_methods(self):
"""
Find all Main methods in this binary.
:return: All main methods in each class.
:rtype: iterator
"""
for cls in self.classes.values():
for method in cls.methods:
if method.name == "main": # TODO: Check more attributes
yield method
[docs] @staticmethod
def is_zip_archive(stream):
stream.seek(0)
identstring = stream.read(4)
stream.seek(0)
return identstring.startswith(b"\x50\x4b\x03\x04")