cle — Binary Loader#

CLE is an extensible binary loader. Its main goal is to take an executable program and any libraries it depends on and produce an address space where that program is loaded and ready to run.

The primary interface to CLE is the Loader class.

class cle.CGC(binary, binary_stream, *args, **kwargs)[source]#

Bases: ELF

Backend to support the CGC elf format used by the Cyber Grand Challenge competition.

See : https://github.com/CyberGrandChallenge/libcgcef/blob/master/cgc_executable_format.md

is_default = True#
static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

supported_filetypes = ['cgc']#
addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

close()#
contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_arch(reader)#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

property finalizers#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(symid, symbol_table=None)#

Gets a Symbol object for the specified symbol.

Parameters:

symid – Either an index into .dynsym or the name of a symbol.

property image_base_delta#
initial_register_values()#

Deprecated

property initializers#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property is_ppc64_abiv1#

Returns whether the arch is PowerPC64 ABIv1.

Returns:

True if PowerPC64 ABIv1, False otherwise.

property is_ppc64_abiv2#

Returns whether the arch is PowerPC64 ABIv2.

Returns:

True if PowerPC64 ABIv2, False otherwise.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

property plt#

Maps names to addresses.

property ppc64_initial_rtoc#

Get initial rtoc value for PowerPC64 architecture.

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property reverse_plt#

Maps addresses to names.

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
property symbols_by_name#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

class cle.ELF(*args, addend=None, debug_symbols=None, discard_section_headers=False, discard_program_headers=False, **kwargs)[source]#

Bases: MetaELF

The main loader class for statically loading ELF executables. Uses the pyreadelf library where useful.

Useful backend options:

  • debug_symbols: Provides the path to a separate file which contains the binary’s debug symbols

  • discard_section_headers: Do not parse section headers. Use this if they are corrupted or malicious.

  • discard_program_headers: Do not parse program headers. Use this if the binary is for a platform whose ELF

    loader only looks at section headers, but whose toolchain generates program headers anyway.

is_default = True#
close()[source]#
classmethod check_compatibility(spec, obj)[source]#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

classmethod check_magic_compatibility(stream)[source]#

Check if a stream of bytes contains the same magic number as the main object

static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

static extract_arch(reader)[source]#
property initializers#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property finalizers#

Stub function. Like initializers, but with finalizers.

property symbols_by_name#
get_symbol(symid, symbol_table=None)[source]#

Gets a Symbol object for the specified symbol.

Parameters:

symid – Either an index into .dynsym or the name of a symbol.

rebase(new_base)[source]#

Rebase backend’s regions to the new base where they were mapped by the loader

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

property image_base_delta#
initial_register_values()#

Deprecated

property is_ppc64_abiv1#

Returns whether the arch is PowerPC64 ABIv1.

Returns:

True if PowerPC64 ABIv1, False otherwise.

property is_ppc64_abiv2#

Returns whether the arch is PowerPC64 ABIv2.

Returns:

True if PowerPC64 ABIv2, False otherwise.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

property plt#

Maps names to addresses.

property ppc64_initial_rtoc#

Get initial rtoc value for PowerPC64 architecture.

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property reverse_plt#

Maps addresses to names.

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
supported_filetypes = ['elf']#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

class cle.PE(*args, **kwargs)[source]#

Bases: Backend

Representation of a PE (i.e. Windows) binary.

is_default = True#
property segments: Regions[Segment]#
static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

classmethod check_magic_compatibility(stream)[source]#

Check if a stream of bytes contains the same magic number as the main object

classmethod check_compatibility(spec, obj)[source]#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

close()[source]#
get_symbol(name)[source]#

Look up the symbol with the given name. Symbols can be looked up by ordinal with the name "ordinal.%d" % num

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

class cle.XBE(*args, **kwargs)[source]#

Bases: Backend

The main loader class for statically loading XBE executables.

is_default = True#
close()[source]#
static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

property min_addr#

This returns the lowest virtual address contained in any loaded segment of the binary.

property max_addr#

This returns the highest virtual address contained in any loaded segment of the binary.

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)[source]#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property loader: Loader#
offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

class cle.Apk(apk_path, binary_stream, entry_point=None, entry_point_params=(), android_sdk=None, supported_jni_archs=None, jni_libs=None, jni_libs_ld_path=None, **options)[source]#

Bases: Soot

Backend for lifting Apk’s to Soot.

is_default = True#
get_callbacks(class_name, callback_names)[source]#

Get callback methods from the name of callback methods.

Parameters:
  • class_name (str) – Name of the class.

  • callback_names (List[str]) – Name list of the callbacks.

Returns:

The method object which is callback.

Return type:

list[pysoot.sootir.soot_method.SootMethod]

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

property classes#
close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_soot_class(cls_name, none_if_missing=False)#

Get Soot class object.

Parameters:

cls_name (str) – Name of the class.

Returns:

The class object.

Return type:

pysoot.soot.SootClass

get_soot_method(thing, class_name=None, params=(), none_if_missing=False)#

Get Soot method object.

Parameters:
  • thing – Descriptor or the method, or name of the method.

  • class_name (str) – Name of the class. If not specified, class name can be parsed from method_name.

Returns:

Soot method that satisfy the criteria.

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

static is_zip_archive(stream)#
property loader: Loader#
property main_methods#

Find all Main methods in this binary.

Returns:

All main methods in each class.

Return type:

iterator

property max_addr#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

class cle.BackedCGC(*args, memory_backer=None, register_backer=None, writes_backer=None, permissions_map=None, current_allocation_base=None, **kwargs)[source]#

Bases: CGC

This is a backend for CGC executables that allows user provide a memory backer and a register backer as the initial state of the running binary.

is_default = True#
static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

property threads#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

thread_registers(thread=None)[source]#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

close()#
contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_arch(reader)#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

property finalizers#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(symid, symbol_table=None)#

Gets a Symbol object for the specified symbol.

Parameters:

symid – Either an index into .dynsym or the name of a symbol.

property image_base_delta#
initial_register_values()#

Deprecated

property initializers#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property is_ppc64_abiv1#

Returns whether the arch is PowerPC64 ABIv1.

Returns:

True if PowerPC64 ABIv1, False otherwise.

property is_ppc64_abiv2#

Returns whether the arch is PowerPC64 ABIv2.

Returns:

True if PowerPC64 ABIv2, False otherwise.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

property plt#

Maps names to addresses.

property ppc64_initial_rtoc#

Get initial rtoc value for PowerPC64 architecture.

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property reverse_plt#

Maps addresses to names.

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
supported_filetypes = ['cgc']#
property symbols_by_addr#
property symbols_by_name#
addr_to_line: SortedDict[int, Set[Tuple[int, int]]]#
variables: Optional[List[Variable]]#
compilation_units: Optional[List[CompilationUnit]]#
symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
class cle.Backend(binary, binary_stream, loader=None, is_main_bin=False, entry_point=None, arch=None, base_addr=None, force_rebase=False, has_memory=True, **kwargs)[source]#

Bases: object

Main base class for CLE binary objects.

An alternate interface to this constructor exists as the static method cle.loader.Loader.load_object()

Variables:
  • binary – The path to the file this object is loaded from

  • binary_basename – The basename of the filepath, or a short representation of the stream it was loaded from

  • is_main_bin – Whether this binary is loaded as the main executable

  • segments – A listing of all the loaded segments in this file

  • sections – A listing of all the demarked sections in the file

  • sections_map – A dict mapping from section name to section

  • imports – A mapping from symbol name to import relocation

  • resolved_imports – A list of all the import symbols that are successfully resolved

  • relocs – A list of all the relocations in this binary

  • irelatives – A list of tuples representing all the irelative relocations that need to be performed. The first item in the tuple is the address of the resolver function, and the second item is the address of where to write the result. The destination address is an RVA.

  • jmprel – A mapping from symbol name to the address of its jump slot relocation, i.e. its GOT entry.

  • arch (archinfo.arch.Arch) – The architecture of this binary

  • os (str) – The operating system this binary is meant to run under

  • mapped_base (int) – The base address of this object in virtual memory

  • deps – A list of names of shared libraries this binary depends on

  • linking – ‘dynamic’ or ‘static’

  • linked_base – The base address this object requests to be loaded at

  • pic (bool) – Whether this object is position-independent

  • execstack (bool) – Whether this executable has an executable stack

  • provides (str) – The name of the shared library dependancy that this object resolves

  • symbols (list) – A list of symbols provided by this object, sorted by address

  • has_memory – Whether this backend is backed by a Clemory or not. As it stands now, a backend should still define min_addr and max_addr even if has_memory is False.

is_default = False#
property arch: Arch#
property loader: Loader#
close()[source]#
Return type:

None

set_arch(arch)[source]#
property image_base_delta#
property entry#
property segments: Regions[Segment]#
property sections: Regions[Section]#
property symbols_by_addr#
rebase(new_base)[source]#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()[source]#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

contains_addr(addr)[source]#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

find_loadable_containing(addr)[source]#
find_segment_containing(addr)[source]#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

find_section_containing(addr)[source]#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

addr_to_offset(addr)[source]#
Return type:

Optional[int]

Parameters:

addr (int) –

offset_to_addr(offset)[source]#
Return type:

Optional[int]

Parameters:

offset (int) –

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

thread_registers(thread=None)[source]#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

initial_register_values()[source]#

Deprecated

get_symbol(name)[source]#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

static extract_soname(path)[source]#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

classmethod is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

Return type:

bool

classmethod check_compatibility(spec, obj)[source]#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)[source]#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

class cle.Blob(*args, offset=None, segments=None, **kwargs)[source]#

Bases: Backend

Representation of a binary blob, i.e. an executable in an unknown file format.

is_default = True#
static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

property min_addr#

This returns the lowest virtual address contained in any loaded segment of the binary.

property max_addr#

This returns the highest virtual address contained in any loaded segment of the binary.

function_name(addr)[source]#

Blobs don’t support function names.

contains_addr(addr)[source]#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

in_which_segment(addr)[source]#

Blobs don’t support segments.

classmethod check_compatibility(spec, obj)[source]#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property loader: Loader#
offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
class cle.ELFCore(*args, executable=None, remote_file_mapping=None, remote_file_mapper=None, **kwargs)[source]#

Bases: ELF

Loader class for ELF core files.

One key pain point when analyzing a core dump generated on a remote machine is that the paths to binaries are absolute (and may not exist or be the same on your local machine).

Therefore, you can use the options `remote_file_mapping to specify a dict mapping (easy if there are a small number of mappings) or remote_file_mapper to specify a function that accepts a remote file name and returns the local file name (useful if there are many mappings).

If you specify both remote_file_mapping and remote_file_mapper, remote_file_mapping is applied first, then the result is passed to remote_file_mapper.

Parameters:
  • executable – Optional path to the main binary of the core dump. If not supplied, ELFCore will attempt to figure it out automatically from the core dump.

  • remote_file_mapping – Optional dict that maps specific file names in the core dump to other file names.

  • remote_file_mapper – Optional function that is used to map every file name in the core dump to whatever is returned from this function.

is_default = True#
static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

property threads#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

thread_registers(thread=None)[source]#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

close()#
contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_arch(reader)#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

property finalizers#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(symid, symbol_table=None)#

Gets a Symbol object for the specified symbol.

Parameters:

symid – Either an index into .dynsym or the name of a symbol.

property image_base_delta#
initial_register_values()#

Deprecated

property initializers#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property is_ppc64_abiv1#

Returns whether the arch is PowerPC64 ABIv1.

Returns:

True if PowerPC64 ABIv1, False otherwise.

property is_ppc64_abiv2#

Returns whether the arch is PowerPC64 ABIv2.

Returns:

True if PowerPC64 ABIv2, False otherwise.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

property plt#

Maps names to addresses.

property ppc64_initial_rtoc#

Get initial rtoc value for PowerPC64 architecture.

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property reverse_plt#

Maps addresses to names.

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
supported_filetypes = ['elf']#
property symbols_by_addr#
property symbols_by_name#
addr_to_line: SortedDict[int, Set[Tuple[int, int]]]#
variables: Optional[List[Variable]]#
compilation_units: Optional[List[CompilationUnit]]#
symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
class cle.ExceptionHandling(start_addr, size, handler_addr=None, type_=None, func_addr=None)[source]#

Bases: object

Describes an exception handling.

Exception handlers are usually language-specific. In C++, it is usually implemented as try {} catch {} blocks.

Variables:
  • start_addr (int) – The beginning of the try block.

  • size (int) – Size of the try block.

  • handler_addr (Optional[int]) – Address of the exception handler code.

  • type – Type of the exception handler. Optional.

  • func_addr (Optional[int]) – Address of the function. Optional.

start_addr#
size#
handler_addr#
type#
func_addr#
class cle.FunctionHint(addr, size, source)[source]#

Bases: object

Describes a function hint.

Variables:
  • addr (int) – Address of the function.

  • size (int) – Size of the function.

  • source (int) – Source of this hint.

addr#
size#
source#
class cle.FunctionHintSource[source]#

Bases: object

Enums that describe the source of function hints.

EH_FRAME = 0#
EXTERNAL_EH_FRAME = 1#
class cle.Hex(*args, **kwargs)[source]#

Bases: Backend

A loader for Intel Hex Objects See https://en.wikipedia.org/wiki/Intel_HEX

is_default = True#
static parse_record(line)[source]#
static coalesce_regions(regions)[source]#
addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

class cle.Jar(jar_path, binary_stream, entry_point=None, entry_point_params=('java.lang.String[]',), jni_libs=None, jni_libs_ld_path=None, **kwargs)[source]#

Bases: Soot

Backend for lifting JARs to Soot.

is_default = True#
static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

get_manifest(binary_path=None)[source]#

Load the MANIFEST.MF file

Returns:

A dict of meta info

Return type:

dict

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

property classes#
close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_soot_class(cls_name, none_if_missing=False)#

Get Soot class object.

Parameters:

cls_name (str) – Name of the class.

Returns:

The class object.

Return type:

pysoot.soot.SootClass

get_soot_method(thing, class_name=None, params=(), none_if_missing=False)#

Get Soot method object.

Parameters:
  • thing – Descriptor or the method, or name of the method.

  • class_name (str) – Name of the class. If not specified, class name can be parsed from method_name.

Returns:

Soot method that satisfy the criteria.

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

static is_zip_archive(stream)#
property loader: Loader#
property main_methods#

Find all Main methods in this binary.

Returns:

All main methods in each class.

Return type:

iterator

property max_addr#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
class cle.MachO(*args, **kwargs)[source]#

Bases: Backend

Mach-O binaries for CLE

The Mach-O format is notably different from other formats, as such: * Sections are always part of a segment, self.sections will thus be empty * Symbols cannot be categorized like in ELF * Symbol resolution must be handled by the binary * Rebasing in dyld is implemented via adding a small slide to addresses inside the binary, instead of

changing the base address of the binary and the addresses being relative. CLE needs relative addresses, so there are a lot of AT.from_lva().to_rva() calls in this backend.

is_default = True#
MH_MAGIC_64 = 4277009103#
MH_CIGAM_64 = 3489328638#
MH_MAGIC = 4277009102#
MH_CIGAM = 3472551422#
symbols: List[Symbol]#
ncmds: int#
sizeofcmds: int#
property min_addr#

This returns the lowest virtual address contained in any loaded segment of the binary.

classmethod check_compatibility(spec, obj)[source]#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

classmethod is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

is_thumb_interworking(address)[source]#

Returns true if the given address is a THUMB interworking address

decode_thumb_interworking(address)[source]#

Decodes a thumb interworking address

find_segment_by_name(name)[source]#
do_binding()[source]#
get_string(start)[source]#

Loads a string from the string table

parse_lc_str(f, start, limit=None)[source]#

Parses a lc_str data structure

Parameters:

limit (int | None) –

S = ~S#
addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
get_symbol_by_address_fuzzy(address)[source]#

Locates a symbol by checking the given address against sym.addr, sym.bind_xrefs and sym.symbol_stubs

get_symbol(name, include_stab=False, fuzzy=False)[source]#

Returns all symbols matching name.

Note that especially when include_stab=True there may be multiple symbols with the same name, therefore this method always returns an array.

Parameters:
  • name – the name of the symbol

  • include_stab – Include debugging symbols NOT RECOMMENDED

  • fuzzy – Replace exact match with “contains”-style match

get_symbol_by_insertion_order(idx)[source]#
Parameters:

idx (int) – idx when this symbol was inserted

Return type:

AbstractMachOSymbol

Returns:

get_segment_by_name(name)[source]#

Searches for a MachOSegment with the given name and returns it :type name: :param name: Name of the sought segment :return: MachOSegment or None

property segments: Regions[Segment]#
class cle.MetaELF(*args, **kwargs)[source]#

Bases: Backend

A base class that implements functions used by all backends that can load an ELF.

supported_filetypes = ['elf']#
property plt#

Maps names to addresses.

property reverse_plt#

Maps addresses to names.

property is_ppc64_abiv1#

Returns whether the arch is PowerPC64 ABIv1.

Returns:

True if PowerPC64 ABIv1, False otherwise.

property is_ppc64_abiv2#

Returns whether the arch is PowerPC64 ABIv2.

Returns:

True if PowerPC64 ABIv2, False otherwise.

property ppc64_initial_rtoc#

Get initial rtoc value for PowerPC64 architecture.

static extract_soname(path)[source]#

Extracts the shared object identifier from the path, or returns None if it cannot.

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

classmethod is_compatible(stream)#

Determine quickly whether this backend can load an object from this stream

Return type:

bool

is_default = False#
property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
class cle.Minidump(*args, **kwargs)[source]#

Bases: Backend

is_default = True#
close()[source]#
static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

property threads#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)[source]#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
get_thread_registers_by_id(thread_id)[source]#
class cle.NamedRegion(name, start, end, is_readable=True, is_writable=True, is_executable=False, **kwargs)[source]#

Bases: Backend

A NamedRegion represents a region of memory that has a name, a location, but no static content.

This region also has permissions; with no memory, these obviously don’t do anything on their own, but they help inform any other code that relies on CLE (e.g., angr)

This can be used as a placeholder for memory that should exist in CLE’s view, but for which it does not need data, like RAM, MMIO, etc

is_default = False#
has_memory = False#
static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

property min_addr#

This returns the lowest virtual address contained in any loaded segment of the binary.

property max_addr#

This returns the highest virtual address contained in any loaded segment of the binary.

function_name(addr)[source]#

NamedRegions don’t support function names.

contains_addr(addr)[source]#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)[source]#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property loader: Loader#
offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
class cle.Region(offset, vaddr, filesize, memsize)[source]#

Bases: object

A region of memory that is mapped in the object’s file.

Variables:
  • offset – The offset into the file the region starts.

  • vaddr – The virtual address.

  • filesize – The size of the region in the file.

  • memsize – The size of the region when loaded into memory.

Parameters:
  • vaddr (int) –

  • filesize (int) –

  • memsize (int) –

The prefix v- on a variable or parameter name indicates that it refers to the virtual, loaded memory space, while a corresponding variable without the v- refers to the flat zero-based memory of the file.

When used next to each other, addr and offset refer to virtual memory address and file offset, respectively.

vaddr: int#
memsize: int#
filesize: int#
contains_addr(addr)[source]#

Does this region contain this virtual address?

contains_offset(offset)[source]#

Does this region contain this offset into the file?

addr_to_offset(addr)[source]#

Convert a virtual memory address into a file offset

offset_to_addr(offset)[source]#

Convert a file offset into a virtual memory address

property max_addr#

The maximum virtual address of this region

property min_addr#

The minimum virtual address of this region

property max_offset#

The maximum file offset of this region

min_offset()[source]#

The minimum file offset of this region

property is_readable: bool#
property is_writable: bool#
property is_executable: bool#
class cle.Regions(lst=None)[source]#

Bases: Generic[R]

A container class acting as a list of regions (sections or segments). Additionally, it keeps an sorted list of all regions that are mapped into memory to allow fast lookups.

We assume none of the regions overlap with others.

property raw_list: List[R]#

Get the internal list. Any change to it is not tracked, and therefore _sorted_list will not be updated. Therefore you probably does not want to modify the list.

Returns:

The internal list container.

Return type:

list

property max_addr: int | None#

Get the highest address of all regions.

Returns:

The highest address of all regions, or None if there is no region available.

Return type:

int or None

append(region)[source]#

Append a new Region instance into the list.

Parameters:

region (TypeVar(R, bound= Region)) – The region to append.

remove(region)[source]#

Remove an existing Region instance from the list.

Parameters:

region (TypeVar(R, bound= Region)) – The region to remove.

Return type:

None

find_region_containing(addr)[source]#

Find the region that contains a specific address. Returns None if none of the regions covers the address.

Parameters:

addr (int) – The address.

Return type:

Optional[TypeVar(R, bound= Region)]

Returns:

The region that covers the specific address, or None if no such region is found.

find_region_next_to(addr)[source]#

Find the next region after the given address.

Parameters:

addr (int) – The address to test.

Return type:

Optional[TypeVar(R, bound= Region)]

Returns:

The next region that goes after the given address, or None if there is no section after the address,

class cle.Section(name, offset, vaddr, size)[source]#

Bases: Region

Simple representation of a loaded section.

Variables:

name (str) – The name of the section

Parameters:

vaddr (int) –

property is_readable#

Whether this section has read permissions

property is_writable#

Whether this section has write permissions

addr_to_offset(addr)#

Convert a virtual memory address into a file offset

contains_addr(addr)#

Does this region contain this virtual address?

contains_offset(offset)#

Does this region contain this offset into the file?

property is_executable#

Whether this section has execute permissions

property max_addr#

The maximum virtual address of this region

property max_offset#

The maximum file offset of this region

property min_addr#

The minimum virtual address of this region

min_offset()#

The minimum file offset of this region

offset_to_addr(offset)#

Convert a file offset into a virtual memory address

vaddr: int#
memsize: int#
filesize: int#
property only_contains_uninitialized_data#

Whether this section is initialized to zero after the executable is loaded.

class cle.Segment(offset, vaddr, filesize, memsize)[source]#

Bases: Region

Parameters:
  • vaddr (int) –

  • filesize (int) –

  • memsize (int) –

addr_to_offset(addr)#

Convert a virtual memory address into a file offset

contains_addr(addr)#

Does this region contain this virtual address?

contains_offset(offset)#

Does this region contain this offset into the file?

property is_executable: bool#
property is_readable: bool#
property is_writable: bool#
property max_addr#

The maximum virtual address of this region

property max_offset#

The maximum file offset of this region

property min_addr#

The minimum virtual address of this region

min_offset()#

The minimum file offset of this region

offset_to_addr(offset)#

Convert a file offset into a virtual memory address

vaddr: int#
memsize: int#
filesize: int#
class cle.Soot(*args, entry_point=None, entry_point_params=(), input_format=None, additional_jars=None, additional_jar_roots=None, jni_libs_ld_path=None, jni_libs=None, android_sdk=None, **kwargs)[source]#

Bases: Backend

The basis backend for lifting and loading bytecode from JARs and APKs to Soot IR.

Note that self.min_addr will be 0 and self.max_addr will be 1. Hopefully no other object will be mapped at address 0.

property max_addr#

This returns the highest virtual address contained in any loaded segment of the binary.

property entry#
property classes#
get_soot_class(cls_name, none_if_missing=False)[source]#

Get Soot class object.

Parameters:

cls_name (str) – Name of the class.

Returns:

The class object.

Return type:

pysoot.soot.SootClass

get_soot_method(thing, class_name=None, params=(), none_if_missing=False)[source]#

Get Soot method object.

Parameters:
  • thing – Descriptor or the method, or name of the method.

  • class_name (str) – Name of the class. If not specified, class name can be parsed from method_name.

Returns:

Soot method that satisfy the criteria.

property main_methods#

Find all Main methods in this binary.

Returns:

All main methods in each class.

Return type:

iterator

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

classmethod is_compatible(stream)#

Determine quickly whether this backend can load an object from this stream

Return type:

bool

is_default = False#
static is_zip_archive(stream)[source]#
property loader: Loader#
property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
class cle.StaticArchive(*args, **kwargs)[source]#

Bases: Backend

classmethod is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

is_default = True#
property arch: Arch#
addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
cle.register_backend(name, cls)[source]#
class cle.ExternObject(loader, map_size=0, tls_size=0)[source]#

Bases: Backend

rebase(new_base)[source]#

Rebase backend’s regions to the new base where they were mapped by the loader

make_extern(name, size=0, alignment=None, thumb=False, sym_type=SymbolType.TYPE_FUNCTION, point_to=None, libname=None)[source]#
Return type:

Symbol

get_pseudo_addr(name)[source]#
Return type:

int

allocate(size=1, alignment=8, thumb=False, tls=False)[source]#
Return type:

int

property max_addr#

This returns the highest virtual address contained in any loaded segment of the binary.

make_import(name, sym_type)[source]#
addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

classmethod is_compatible(stream)#

Determine quickly whether this backend can load an object from this stream

Return type:

bool

is_default = False#
property loader: Loader#
property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
class cle.ExternSegment(map_size)[source]#

Bases: Segment

addr_to_offset(addr)[source]#

Convert a virtual memory address into a file offset

offset_to_addr(offset)[source]#

Convert a file offset into a virtual memory address

contains_offset(offset)[source]#

Does this region contain this offset into the file?

is_readable = True#
is_writable = True#
is_executable = True#
contains_addr(addr)#

Does this region contain this virtual address?

property max_addr#

The maximum virtual address of this region

property max_offset#

The maximum file offset of this region

property min_addr#

The minimum virtual address of this region

min_offset()#

The minimum file offset of this region

vaddr: int#
memsize: int#
filesize: int#
class cle.KernelObject(loader, map_size=32768)[source]#

Bases: Backend

add_name(name, addr)[source]#
property max_addr#

This returns the highest virtual address contained in any loaded segment of the binary.

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

classmethod is_compatible(stream)#

Determine quickly whether this backend can load an object from this stream

Return type:

bool

is_default = False#
property loader: Loader#
property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
class cle.PointToPrecise(owner, name, relative_addr, size, sym_type)[source]#

Bases: PointTo

Parameters:
  • owner (Backend) –

  • name (str) –

  • relative_addr (int) –

  • size (int) –

  • sym_type (SymbolType) –

pointto_precise = None#
relocations()[source]#

Maybe implement me: If you like, return a list of relocation objects to apply. To create new import symbols, use self.owner.make_extern_import.

addend: int = 0#
is_common = False#
is_export = False#
is_extern = False#
is_forward = False#
property is_function#

Whether this symbol is a function

is_import = False#
is_local = False#
is_static = False#
is_weak = False#
libname: str = NotImplemented#
property linked_addr#
name: str = NotImplemented#
property owner_obj#
pointto_name: str = NotImplemented#
pointto_type: SymbolType = NotImplemented#
property rebased_addr#

The address of this symbol in the global memory space

resolve(obj)#
resolve_forwarder()#

If this symbol is a forwarding export, return the symbol the forwarding refers to, or None if it cannot be found

classmethod static_size(owner)#

Implement me: return the size of the symbol in bytes before it gets constructed

Parameters:

owner – The ExternObject owning the symbol-to-be. Useful to get at owner.arch.

property subtype: SymbolSubType#

A subclass’ ABI-specific types

type: SymbolType = 3#
value()#

Implement me: the initial value of the bytes in memory for the symbol. Should return a bytestring of the same length as static_size returned. (owner is self.owner now)

class cle.TOCRelocation(owner, symbol, relative_addr)[source]#

Bases: Relocation

Parameters:
  • owner (Backend) –

  • symbol (Symbol | None) –

  • relative_addr (int) –

property value#
AUTO_HANDLE_NONE = False#
property dest_addr#
property linked_addr#
property owner_obj#
property rebased_addr#

The address in the global memory space this relocation would like to write to

relocate()#

Applies this relocation. Will make changes to the memory object of the object it came from.

This implementation is a generic version that can be overridden in subclasses.

resolve(obj, **kwargs)#
resolve_symbol(solist, thumb=False, extern_object=None, **kwargs)#
Parameters:

solist (List[Any]) –

class cle.ELFCoreThreadManager(loader, arch, **kwargs)[source]#

Bases: ThreadManager

new_thread(insert=False)[source]#
register_object(obj)[source]#
static initialization_image(obj)#
Return type:

Optional[bytes]

class cle.ELFThreadManager(*args, **kwargs)[source]#

Bases: ThreadManager

register_object(obj)[source]#
static initialization_image(obj)#
Return type:

Optional[bytes]

new_thread(insert=True)#
class cle.InternalTLSRelocation(val, offset, owner)[source]#

Bases: Relocation

AUTO_HANDLE_NONE = True#
property value#
property dest_addr#
property linked_addr#
property owner_obj#
property rebased_addr#

The address in the global memory space this relocation would like to write to

relocate()#

Applies this relocation. Will make changes to the memory object of the object it came from.

This implementation is a generic version that can be overridden in subclasses.

resolve(obj, **kwargs)#
resolve_symbol(solist, thumb=False, extern_object=None, **kwargs)#
Parameters:

solist (List[Any]) –

class cle.MinidumpThreadManager(loader, arch, **kwargs)[source]#

Bases: ThreadManager

new_thread(insert=False)[source]#
register_object(obj)[source]#
static initialization_image(obj)#
Return type:

Optional[bytes]

class cle.PEThreadManager(loader, arch, max_modules=256)[source]#

Bases: ThreadManager

register_object(obj)[source]#
static initialization_image(obj)#
Return type:

Optional[bytes]

new_thread(insert=True)#
class cle.ThreadManager(loader, arch, max_modules=256)[source]#

Bases: object

This class tracks what data is thread-local and can generate thread initialization images

Most of the heavy lifting will be handled in a subclass

register_object(obj)[source]#
static initialization_image(obj)[source]#
Return type:

Optional[bytes]

new_thread(insert=True)[source]#
class cle.TLSObject(loader, arch)[source]#

Bases: Backend

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

classmethod is_compatible(stream)#

Determine quickly whether this backend can load an object from this stream

Return type:

bool

is_default = False#
property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
exception cle.CLECompatibilityError[source]#

Bases: CLEError

Error raised when loading an executable that is not currently supported by CLE.

args#
with_traceback()#

Exception.with_traceback(tb) – set self.__traceback__ to tb and return self.

exception cle.CLEError[source]#

Bases: Exception

Base class for errors raised by CLE.

args#
with_traceback()#

Exception.with_traceback(tb) – set self.__traceback__ to tb and return self.

exception cle.CLEFileNotFoundError[source]#

Bases: CLEError

Error raised when a file does not exist.

args#
with_traceback()#

Exception.with_traceback(tb) – set self.__traceback__ to tb and return self.

exception cle.CLEInvalidBinaryError[source]#

Bases: CLEError

Error raised when an executable file is invalid or corrupted.

args#
with_traceback()#

Exception.with_traceback(tb) – set self.__traceback__ to tb and return self.

exception cle.CLEOperationError[source]#

Bases: CLEError

Error raised when a problem is encountered in the process of loading an executable.

args#
with_traceback()#

Exception.with_traceback(tb) – set self.__traceback__ to tb and return self.

exception cle.CLEUnknownFormatError[source]#

Bases: CLEError

Error raised when CLE encounters an unknown executable file format.

args#
with_traceback()#

Exception.with_traceback(tb) – set self.__traceback__ to tb and return self.

exception cle.CLEMemoryError[source]#

Bases: CLEError

Error raised when performing memory operations on unmapped addresses

args#
with_traceback()#

Exception.with_traceback(tb) – set self.__traceback__ to tb and return self.

cle.convert_info_proc_maps(fname)[source]#

Convert a dump from gdb’s info proc maps command to a set of options that can be passed to CLE to replicate the address space from the gdb session

Parameters:

fname – The name of a file containing the dump

Returns:

A dict appropriate to be passed as **kwargs for angr.Project or cle.Loader

cle.convert_info_sharedlibrary(fname)[source]#

Convert a dump from gdb’s info sharedlibrary command to a set of options that can be passed to CLE to replicate the address space from the gdb session

Parameters:

fname – The name of a file containing the dump

Returns:

A dict appropriate to be passed as **kwargs for angr.Project or cle.Loader

class cle.Loader(main_binary, auto_load_libs=True, concrete_target=None, force_load_libs=(), skip_libs=(), main_opts=None, lib_opts=None, ld_path=(), use_system_libs=True, ignore_import_version_numbers=True, case_insensitive=False, rebase_granularity=1048576, except_missing_libs=False, aslr=False, perform_relocations=True, load_debug_info=False, page_size=1, preload_libs=(), arch=None)[source]#

Bases: object

The loader loads all the objects and exports an abstraction of the memory of the process. What you see here is an address space with loaded and rebased binaries.

Parameters:
  • main_binary (Union[str, BinaryIO, Path, Backend]) – The path to the main binary you’re loading, or a file-like object with the binary in it.

  • auto_load_libs (bool) –

  • force_load_libs (Iterable[str | BinaryIO | Path]) –

  • skip_libs (Iterable[str]) –

  • main_opts (Dict[str, Any] | None) –

  • lib_opts (Dict[str, Dict[str, Any]] | None) –

  • ld_path (Iterable[str | Path]) –

  • use_system_libs (bool) –

  • ignore_import_version_numbers (bool) –

  • case_insensitive (bool) –

  • rebase_granularity (int) –

  • except_missing_libs (bool) –

  • aslr (bool) –

  • perform_relocations (bool) –

  • load_debug_info (bool) –

  • page_size (int) –

  • preload_libs (Iterable[str | BinaryIO | Path]) –

  • arch (Arch | str | None) –

The following parameters are optional.

Parameters:
  • auto_load_libs (bool) – Whether to automatically load shared libraries that loaded objects depend on.

  • load_debug_info (bool) – Whether to automatically parse DWARF data and search for debug symbol files.

  • concrete_target – Whether to instantiate a concrete target for a concrete execution of the process. if this is the case we will need to instantiate a SimConcreteEngine that wraps the ConcreteTarget provided by the user.

  • force_load_libs (Iterable[Union[str, BinaryIO, Path]]) – A list of libraries to load regardless of if they’re required by a loaded object.

  • skip_libs (Iterable[str]) – A list of libraries to never load, even if they’re required by a loaded object.

  • main_opts (Optional[Dict[str, Any]]) – A dictionary of options to be used loading the main binary.

  • lib_opts (Optional[Dict[str, Dict[str, Any]]]) – A dictionary mapping library names to the dictionaries of options to be used when loading them.

  • ld_path (Iterable[Union[str, Path]]) – A list of paths in which we can search for shared libraries.

  • use_system_libs (bool) – Whether or not to search the system load path for requested libraries. Default True.

  • ignore_import_version_numbers (bool) – Whether libraries with different version numbers in the filename will be considered equivalent, for example libc.so.6 and libc.so.0

  • case_insensitive (bool) – If this is set to True, filesystem loads will be done case-insensitively regardless of the case-sensitivity of the underlying filesystem.

  • rebase_granularity (int) – The alignment to use for rebasing shared objects

  • except_missing_libs (bool) – Throw an exception when a shared library can’t be found.

  • aslr (bool) – Load libraries in symbolic address space. Do not use this option.

  • page_size (int) – The granularity with which data is mapped into memory. Set to 0x1000 if you are working in an environment where data will always be memory mapped in a page-graunlar way.

  • preload_libs (Iterable[Union[str, BinaryIO, Path]]) – Similar to force_load_libs but will provide for symbol resolution, with precedence over any dependencies.

  • main_binary (str | BinaryIO | Path | Backend) –

  • perform_relocations (bool) –

  • arch (Arch | str | None) –

Variables:
  • memory (cle.memory.Clemory) – The loaded, rebased, and relocated memory of the program.

  • main_object – The object representing the main binary (i.e., the executable).

  • shared_objects – A dictionary mapping loaded library names to the objects representing them.

  • all_objects – A list containing representations of all the different objects loaded.

  • requested_names – A set containing the names of all the different shared libraries that were marked as a dependency by somebody.

  • initial_load_objects – A list of all the objects that were loaded as a result of the initial load request.

When reference is made to a dictionary of options, it requires a dictionary with zero or more of the following keys:

  • backend : “elf”, “pe”, “mach-o”, “blob” : which loader backend to use

  • arch : The archinfo.Arch object to use for the binary

  • base_addr : The address to rebase the object at

  • entry_point : The entry point to use for the object

More keys are defined on a per-backend basis.

property main_object: Backend#
property memory: Clemory#
property tls: ThreadManager#
close()[source]#
property max_addr: int#

The maximum address loaded as part of any loaded object (i.e., the whole address space).

property min_addr: int#

The minimum address loaded as part of any loaded object (i.e., the whole address space).

property initializers: List[int]#

Return a list of all the initializers that should be run before execution reaches the entry point, in the order they should be run.

property finalizers: List[int]#

Return a list of all the finalizers that should be run before the program exits. I’m not sure what order they should be run in.

property linux_loader_object: Backend | None#

If the linux dynamic loader is present in memory, return it

property elfcore_object: ELFCore | None#

If a corefile was loaded, this returns the actual core object instead of the main binary

property extern_object: ExternObject#

Return the extern object used to provide addresses to unresolved symbols and angr internals.

Accessing this property will load this object into memory if it was not previously present.

proposed model for how multiple extern objects should work:

  1. extern objects are a linked list. the one in loader._extern_object is the head of the list

  2. each round of explicit loads generates a new extern object if it has unresolved dependencies. this object has exactly the size necessary to hold all its exports.

  3. All requests for size are passed down the chain until they reach an object which has the space to service it or an object which has not yet been mapped. If all objects have been mapped and are full, a new extern object is mapped with a fixed size.

property kernel_object: KernelObject#

Return the object used to provide addresses to syscalls.

Accessing this property will load this object into memory if it was not previously present.

property all_elf_objects: List[MetaELF]#

Return a list of every object that was loaded from an ELF file.

property all_pe_objects: List[PE]#

Return a list of every object that was loaded from an ELF file.

property missing_dependencies: Set[str]#

Return a set of every name that was requested as a shared object dependency but could not be loaded

property auto_load_libs: bool#
describe_addr(addr)[source]#

Returns a textual description of what’s in memory at the provided address

Return type:

str

Parameters:

addr (int) –

find_object(spec, extra_objects=())[source]#

If the given library specification has been loaded, return its object, otherwise return None.

Return type:

Optional[Backend]

Parameters:
find_object_containing(addr, membership_check=True)[source]#

Return the object that contains the given address, or None if the address is unmapped.

Parameters:
  • addr (int) – The address that should be contained in the object.

  • membership_check (bool) – Whether a membership check should be performed or not (True by default). This option can be set to False if you are certain that the target object does not have “holes”.

Return type:

Optional[Backend]

Returns:

The object or None.

find_segment_containing(addr, skip_pseudo_objects=True)[source]#

Find the section object that the address belongs to.

Parameters:
  • addr (int) – The address to test

  • skip_pseudo_objects (bool) – Skip objects that CLE adds during loading.

Returns:

The section that the address belongs to, or None if the address does not belong to any section, or if section information is not available.

Return type:

cle.Segment

find_section_containing(addr, skip_pseudo_objects=True)[source]#

Find the section object that the address belongs to.

Parameters:
  • addr (int) – The address to test.

  • skip_pseudo_objects (bool) – Skip objects that CLE adds during loading.

Returns:

The section that the address belongs to, or None if the address does not belong to any section, or if section information is not available.

Return type:

cle.Section

find_loadable_containing(addr, skip_pseudo_objects=True)[source]#

Find the section or segment object the address belongs to. Sections will only be used if the corresponding object does not have segments.

Parameters:
  • addr (int) – The address to test

  • skip_pseudo_objects – Skip objects that CLE adds during loading.

Return type:

Optional[Region]

Returns:

The section or segment that the address belongs to, or None if the address does not belong to any section or segment.

find_section_next_to(addr, skip_pseudo_objects=True)[source]#

Find the next section after the given address.

Parameters:
  • addr (int) – The address to test.

  • skip_pseudo_objects (bool) – Skip objects that CLE adds during loading.

Returns:

The next section that goes after the given address, or None if there is no section after the address, or if section information is not available.

Return type:

cle.Section

find_symbol(thing, fuzzy=False)[source]#

Search for the symbol with the given name or address.

Parameters:
  • thing – Either the name or address of a symbol to look up

  • fuzzy – Set to True to return the first symbol before or at the given address

Return type:

Optional[Symbol]

Returns:

A cle.backends.Symbol object if found, None otherwise.

property symbols: Iterator[Symbol]#
find_all_symbols(name, exclude_imports=True, exclude_externs=False, exclude_forwards=True)[source]#

Iterate over all symbols present in the set of loaded binaries that have the given name

Parameters:
  • name (str) – The name to search for

  • exclude_imports – Whether to exclude import symbols. Default True.

  • exclude_externs – Whether to exclude symbols in the extern object. Default False.

  • exclude_forwards – Whether to exclude forward symbols. Default True.

Return type:

Iterable[Symbol]

find_plt_stub_name(addr)[source]#

Return the name of the PLT stub starting at addr.

Return type:

Optional[str]

Parameters:

addr (int) –

find_relevant_relocations(name)[source]#

Iterate through all the relocations referring to the symbol with the given name

Return type:

Iterator[Relocation]

Parameters:

name (str) –

perform_irelative_relocs(resolver_func)[source]#

Use this method to satisfy IRelative relocations in the binary that require execution of loaded code.

Note that this does NOT handle IFunc symbols, which must be handled separately. (this could be changed, but at the moment it’s desirable to support lazy IFunc resolution, since emulation is usually slow)

Parameters:

resolver_func – A callback function that takes an address, runs the code at that address, and returns the return value from the emulated function.

dynamic_load(spec)[source]#

Load a file into the address space. Note that the sematics of auto_load_libs and except_missing_libs apply at all times.

Parameters:

spec – The path to the file to load. May be an absolute path, a relative path, or a name to search in the load path.

Returns:

A list of all the objects successfully loaded, which may be empty if this object was previously loaded. If the object specified in spec failed to load for any reason, including the file not being found, return None.

get_loader_symbolic_constraints()[source]#

Do not use this method.

fast_memory_load_pointer(addr, size=None)[source]#

Perform a fast memory loading of a pointer.

Parameters:
  • addr (int) – Address to read from.

  • size (Optional[int]) – Size of the pointer. Default to machine-word size.

Return type:

Optional[int]

Returns:

A pointer or None if the address does not exist.

class cle.Clemory(arch, root=False)[source]#

Bases: ClemoryBase

An object representing a memory space.

Accesses can be made with [index] notation.

consecutive#
min_addr#
max_addr#
add_backer(start, data, overwrite=False)[source]#

Adds a backer to the memory.

Parameters:
  • start – The address where the backer should be loaded.

  • data – The backer itself. Can be either a bytestring or another Clemory.

  • overwrite – If True and the range overlaps any existing backer, the existing backer will be split up and the overlapping part will be replaced with the new backer.

split_backer(addr)[source]#

Ensures that addr is the start of a backer, if it is backed.

update_backer(start, data)[source]#
remove_backer(start)[source]#
backers(addr=0)[source]#

Iterate through each backer for this clemory and all its children, yielding tuples of (start_addr, backer) where each backer is a bytearray.

Parameters:

addr – An optional starting address - all backers before and not including this address will be skipped.

load(addr, n)[source]#

Read up to n bytes at address addr in memory and return a bytes object.

Reading will stop at the beginning of the first unallocated region found, or when n bytes have been read.

store(addr, data)[source]#

Write bytes from data at address addr.

Note: If the store runs off the end of a backer and into unbacked space, this function will update the backer but also raise KeyError.

find(data, search_min=None, search_max=None)[source]#

Find all occurances of a bytestring in memory.

Parameters:
  • data (bytes) – The bytestring to search for

  • search_min (int) – Optional: The first address to include as valid

  • search_max (int) – Optional: The last address to include as valid

Return Iterator[int]:

Iterates over addresses at which the bytestring occurs

close()#
pack(addr, fmt, *data)#

Use the struct module to pack data into memory at address addr with the format fmt.

pack_word(addr, data, size=None, signed=False, endness=None)#

Use the struct module to pack a single integer data into memory at the address addr.

You may override any of the attributes of the word being packed:

Parameters:
  • size (int) – The size in bytes to pack/unpack. Defaults to wordsize (e.g. 4 bytes on a 32 bit architecture)

  • signed (bool) – Whether the data should be extracted signed/unsigned. Default unsigned

  • endness (archinfo.Endness) – The endian to use in packing/unpacking. Defaults to memory endness

read(nbytes)#

The stream-like function that reads up to a number of bytes starting from the current position and updates the current position. Use with seek().

Up to nbytes bytes will be read, halting at the beginning of the first unmapped region encountered.

seek(value)#

The stream-like function that sets the “file’s” current position. Use with read().

Parameters:

value – The position to seek to.

tell()#
unpack(addr, fmt)#

Use the struct module to unpack the data at address addr with the format fmt.

unpack_word(addr, size=None, signed=False, endness=None)#

Use the struct module to unpack a single integer from the address addr.

You may override any of the attributes of the word being extracted:

Parameters:
  • size (int) – The size in bytes to pack/unpack. Defaults to wordsize (e.g. 4 bytes on a 32 bit architecture)

  • signed (bool) – Whether the data should be extracted signed/unsigned. Default unsigned

  • endness (archinfo.Endness) – The endian to use in packing/unpacking. Defaults to memory endness

class cle.ClemoryBase(arch)[source]#

Bases: object

load(addr, n)[source]#
store(addr, data)[source]#
backers(addr=0)[source]#
find(data, search_min=None, search_max=None)[source]#
unpack(addr, fmt)[source]#

Use the struct module to unpack the data at address addr with the format fmt.

unpack_word(addr, size=None, signed=False, endness=None)[source]#

Use the struct module to unpack a single integer from the address addr.

You may override any of the attributes of the word being extracted:

Parameters:
  • size (int) – The size in bytes to pack/unpack. Defaults to wordsize (e.g. 4 bytes on a 32 bit architecture)

  • signed (bool) – Whether the data should be extracted signed/unsigned. Default unsigned

  • endness (archinfo.Endness) – The endian to use in packing/unpacking. Defaults to memory endness

pack(addr, fmt, *data)[source]#

Use the struct module to pack data into memory at address addr with the format fmt.

pack_word(addr, data, size=None, signed=False, endness=None)[source]#

Use the struct module to pack a single integer data into memory at the address addr.

You may override any of the attributes of the word being packed:

Parameters:
  • size (int) – The size in bytes to pack/unpack. Defaults to wordsize (e.g. 4 bytes on a 32 bit architecture)

  • signed (bool) – Whether the data should be extracted signed/unsigned. Default unsigned

  • endness (archinfo.Endness) – The endian to use in packing/unpacking. Defaults to memory endness

read(nbytes)[source]#

The stream-like function that reads up to a number of bytes starting from the current position and updates the current position. Use with seek().

Up to nbytes bytes will be read, halting at the beginning of the first unmapped region encountered.

seek(value)[source]#

The stream-like function that sets the “file’s” current position. Use with read().

Parameters:

value – The position to seek to.

tell()[source]#
close()[source]#
class cle.ClemoryView(backer, start, end, offset=0)[source]#

Bases: ClemoryBase

backers(addr=0)[source]#
load(addr, n)[source]#
store(addr, data)[source]#
find(data, search_min=None, search_max=None)[source]#
close()#
pack(addr, fmt, *data)#

Use the struct module to pack data into memory at address addr with the format fmt.

pack_word(addr, data, size=None, signed=False, endness=None)#

Use the struct module to pack a single integer data into memory at the address addr.

You may override any of the attributes of the word being packed:

Parameters:
  • size (int) – The size in bytes to pack/unpack. Defaults to wordsize (e.g. 4 bytes on a 32 bit architecture)

  • signed (bool) – Whether the data should be extracted signed/unsigned. Default unsigned

  • endness (archinfo.Endness) – The endian to use in packing/unpacking. Defaults to memory endness

read(nbytes)#

The stream-like function that reads up to a number of bytes starting from the current position and updates the current position. Use with seek().

Up to nbytes bytes will be read, halting at the beginning of the first unmapped region encountered.

seek(value)#

The stream-like function that sets the “file’s” current position. Use with read().

Parameters:

value – The position to seek to.

tell()#
unpack(addr, fmt)#

Use the struct module to unpack the data at address addr with the format fmt.

unpack_word(addr, size=None, signed=False, endness=None)#

Use the struct module to unpack a single integer from the address addr.

You may override any of the attributes of the word being extracted:

Parameters:
  • size (int) – The size in bytes to pack/unpack. Defaults to wordsize (e.g. 4 bytes on a 32 bit architecture)

  • signed (bool) – Whether the data should be extracted signed/unsigned. Default unsigned

  • endness (archinfo.Endness) – The endian to use in packing/unpacking. Defaults to memory endness

class cle.ClemoryTranslator(backer, func)[source]#

Bases: ClemoryBase

Uses a function to translate between address spaces when accessing a child clemory. Intended to be used only as a stream object.

Parameters:

backer (ClemoryBase) –

load(addr, n)[source]#
store(addr, data)[source]#
backers(addr=0)[source]#
find(data, search_min=None, search_max=None)[source]#
close()#
pack(addr, fmt, *data)#

Use the struct module to pack data into memory at address addr with the format fmt.

pack_word(addr, data, size=None, signed=False, endness=None)#

Use the struct module to pack a single integer data into memory at the address addr.

You may override any of the attributes of the word being packed:

Parameters:
  • size (int) – The size in bytes to pack/unpack. Defaults to wordsize (e.g. 4 bytes on a 32 bit architecture)

  • signed (bool) – Whether the data should be extracted signed/unsigned. Default unsigned

  • endness (archinfo.Endness) – The endian to use in packing/unpacking. Defaults to memory endness

read(nbytes)#

The stream-like function that reads up to a number of bytes starting from the current position and updates the current position. Use with seek().

Up to nbytes bytes will be read, halting at the beginning of the first unmapped region encountered.

seek(value)#

The stream-like function that sets the “file’s” current position. Use with read().

Parameters:

value – The position to seek to.

tell()#
unpack(addr, fmt)#

Use the struct module to unpack the data at address addr with the format fmt.

unpack_word(addr, size=None, signed=False, endness=None)#

Use the struct module to unpack a single integer from the address addr.

You may override any of the attributes of the word being extracted:

Parameters:
  • size (int) – The size in bytes to pack/unpack. Defaults to wordsize (e.g. 4 bytes on a 32 bit architecture)

  • signed (bool) – Whether the data should be extracted signed/unsigned. Default unsigned

  • endness (archinfo.Endness) – The endian to use in packing/unpacking. Defaults to memory endness

class cle.PatchedStream(stream, patches)[source]#

Bases: object

An object that wraps a readable stream, performing passthroughs on seek and read operations, except to make it seem like the data has actually been patched by the given patches.

read(*args, **kwargs)[source]#
seek(*args, **kwargs)[source]#
tell()[source]#
close()[source]#
class cle.AddressTranslator(rva, owner)[source]#

Bases: object

classmethod from_lva(lva, owner)[source]#

Loads address translator with LVA

classmethod from_mva(mva, owner)[source]#

Loads address translator with MVA

classmethod from_rva(rva, owner)[source]#

Loads address translator with RVA

classmethod from_raw(raw, owner)[source]#

Loads address translator with RAW address

classmethod from_linked_va(lva, owner)#

Loads address translator with LVA

classmethod from_va(mva, owner)#

Loads address translator with MVA

classmethod from_mapped_va(mva, owner)#

Loads address translator with MVA

classmethod from_relative_va(rva, owner)#

Loads address translator with RVA

to_lva()[source]#

VA -> LVA :rtype: int

to_mva()[source]#

RVA -> MVA :rtype: int

to_rva()[source]#

RVA -> RVA :rtype: int

to_raw()[source]#

RVA -> RAW :rtype: int

to_linked_va()#

VA -> LVA :rtype: int

to_va()#

RVA -> MVA :rtype: int

to_mapped_va()#

RVA -> MVA :rtype: int

to_relative_va()#

RVA -> RVA :rtype: int

cle.AT#

alias of AddressTranslator

class cle.Symbol(owner, name, relative_addr, size, sym_type)[source]#

Bases: object

Representation of a symbol from a binary file. Smart enough to rebase itself.

There should never be more than one Symbol instance representing a single symbol. To make sure of this, only use the cle.backends.Backend.get_symbol() to create new symbols.

Variables:
  • owner (cle.backends.Backend) – The object that contains this symbol

  • name (str) – The name of this symbol

  • addr (int) – The un-based address of this symbol, an RVA

  • size (int) – The size of this symbol

  • _type – The ABI-agnostic type of this symbol

  • resolved (bool) – Whether this import symbol has been resolved to a real symbol

  • resolvedby (None or cle.backends.Symbol) – The real symbol this import symbol has been resolve to

  • resolvewith (str) – The name of the library we must use to resolve this symbol, or None if none is required.

Parameters:
  • owner (Backend) –

  • name (str) –

  • relative_addr (int) –

  • size (int) –

  • sym_type (SymbolType) –

resolve(obj)[source]#
property type: SymbolType#

The ABI-agnostic SymbolType. Must be overridden by derived types.

property subtype: SymbolSubType#

A subclass’ ABI-specific types

property rebased_addr#

The address of this symbol in the global memory space

property linked_addr#
property is_function#

Whether this symbol is a function

is_static = False#
is_common = False#
is_import = False#
is_export = False#
is_local = False#
is_weak = False#
is_extern = False#
is_forward = False#
resolve_forwarder()[source]#

If this symbol is a forwarding export, return the symbol the forwarding refers to, or None if it cannot be found

property owner_obj#
class cle.SymbolType(value)[source]#

Bases: Enum

ABI-agnostic symbol types

TYPE_OTHER = 0#
TYPE_NONE = 1#
TYPE_FUNCTION = 2#
TYPE_OBJECT = 3#
TYPE_SECTION = 4#
TYPE_TLS_OBJECT = 5#
class cle.SymbolSubType(value)[source]#

Bases: Enum

Abstract base class for ABI-specific symbol types

to_base_type()[source]#

A subclass’ ABI-specific mapping to :SymbolType:

Return type:

SymbolType

class cle.TE(*args, **kwargs)[source]#

Bases: Backend

A “Terse Executable” format image, commonly used as part of UEFI firmware drivers.

is_default = True#
classmethod is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
class cle.UefiFirmware(*args, **kwargs)[source]#

Bases: Backend

A UEFI firmware blob loader. Support is provided by the uefi_firmware package.

is_default = True#
classmethod is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

property arch: Arch#
addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#

Loading Interface#

class cle.loader.Loader(main_binary, auto_load_libs=True, concrete_target=None, force_load_libs=(), skip_libs=(), main_opts=None, lib_opts=None, ld_path=(), use_system_libs=True, ignore_import_version_numbers=True, case_insensitive=False, rebase_granularity=1048576, except_missing_libs=False, aslr=False, perform_relocations=True, load_debug_info=False, page_size=1, preload_libs=(), arch=None)[source]#

Bases: object

The loader loads all the objects and exports an abstraction of the memory of the process. What you see here is an address space with loaded and rebased binaries.

Parameters:
  • main_binary (Union[str, BinaryIO, Path, Backend]) – The path to the main binary you’re loading, or a file-like object with the binary in it.

  • auto_load_libs (bool) –

  • force_load_libs (Iterable[str | BinaryIO | Path]) –

  • skip_libs (Iterable[str]) –

  • main_opts (Dict[str, Any] | None) –

  • lib_opts (Dict[str, Dict[str, Any]] | None) –

  • ld_path (Iterable[str | Path]) –

  • use_system_libs (bool) –

  • ignore_import_version_numbers (bool) –

  • case_insensitive (bool) –

  • rebase_granularity (int) –

  • except_missing_libs (bool) –

  • aslr (bool) –

  • perform_relocations (bool) –

  • load_debug_info (bool) –

  • page_size (int) –

  • preload_libs (Iterable[str | BinaryIO | Path]) –

  • arch (Arch | str | None) –

The following parameters are optional.

Parameters:
  • auto_load_libs (bool) – Whether to automatically load shared libraries that loaded objects depend on.

  • load_debug_info (bool) – Whether to automatically parse DWARF data and search for debug symbol files.

  • concrete_target – Whether to instantiate a concrete target for a concrete execution of the process. if this is the case we will need to instantiate a SimConcreteEngine that wraps the ConcreteTarget provided by the user.

  • force_load_libs (Iterable[Union[str, BinaryIO, Path]]) – A list of libraries to load regardless of if they’re required by a loaded object.

  • skip_libs (Iterable[str]) – A list of libraries to never load, even if they’re required by a loaded object.

  • main_opts (Optional[Dict[str, Any]]) – A dictionary of options to be used loading the main binary.

  • lib_opts (Optional[Dict[str, Dict[str, Any]]]) – A dictionary mapping library names to the dictionaries of options to be used when loading them.

  • ld_path (Iterable[Union[str, Path]]) – A list of paths in which we can search for shared libraries.

  • use_system_libs (bool) – Whether or not to search the system load path for requested libraries. Default True.

  • ignore_import_version_numbers (bool) – Whether libraries with different version numbers in the filename will be considered equivalent, for example libc.so.6 and libc.so.0

  • case_insensitive (bool) – If this is set to True, filesystem loads will be done case-insensitively regardless of the case-sensitivity of the underlying filesystem.

  • rebase_granularity (int) – The alignment to use for rebasing shared objects

  • except_missing_libs (bool) – Throw an exception when a shared library can’t be found.

  • aslr (bool) – Load libraries in symbolic address space. Do not use this option.

  • page_size (int) – The granularity with which data is mapped into memory. Set to 0x1000 if you are working in an environment where data will always be memory mapped in a page-graunlar way.

  • preload_libs (Iterable[Union[str, BinaryIO, Path]]) – Similar to force_load_libs but will provide for symbol resolution, with precedence over any dependencies.

  • main_binary (str | BinaryIO | Path | Backend) –

  • perform_relocations (bool) –

  • arch (Arch | str | None) –

Variables:
  • memory (cle.memory.Clemory) – The loaded, rebased, and relocated memory of the program.

  • main_object – The object representing the main binary (i.e., the executable).

  • shared_objects – A dictionary mapping loaded library names to the objects representing them.

  • all_objects – A list containing representations of all the different objects loaded.

  • requested_names – A set containing the names of all the different shared libraries that were marked as a dependency by somebody.

  • initial_load_objects – A list of all the objects that were loaded as a result of the initial load request.

When reference is made to a dictionary of options, it requires a dictionary with zero or more of the following keys:

  • backend : “elf”, “pe”, “mach-o”, “blob” : which loader backend to use

  • arch : The archinfo.Arch object to use for the binary

  • base_addr : The address to rebase the object at

  • entry_point : The entry point to use for the object

More keys are defined on a per-backend basis.

property main_object: Backend#
property memory: Clemory#
property tls: ThreadManager#
close()[source]#
property max_addr: int#

The maximum address loaded as part of any loaded object (i.e., the whole address space).

property min_addr: int#

The minimum address loaded as part of any loaded object (i.e., the whole address space).

property initializers: List[int]#

Return a list of all the initializers that should be run before execution reaches the entry point, in the order they should be run.

property finalizers: List[int]#

Return a list of all the finalizers that should be run before the program exits. I’m not sure what order they should be run in.

property linux_loader_object: Backend | None#

If the linux dynamic loader is present in memory, return it

property elfcore_object: ELFCore | None#

If a corefile was loaded, this returns the actual core object instead of the main binary

property extern_object: ExternObject#

Return the extern object used to provide addresses to unresolved symbols and angr internals.

Accessing this property will load this object into memory if it was not previously present.

proposed model for how multiple extern objects should work:

  1. extern objects are a linked list. the one in loader._extern_object is the head of the list

  2. each round of explicit loads generates a new extern object if it has unresolved dependencies. this object has exactly the size necessary to hold all its exports.

  3. All requests for size are passed down the chain until they reach an object which has the space to service it or an object which has not yet been mapped. If all objects have been mapped and are full, a new extern object is mapped with a fixed size.

property kernel_object: KernelObject#

Return the object used to provide addresses to syscalls.

Accessing this property will load this object into memory if it was not previously present.

property all_elf_objects: List[MetaELF]#

Return a list of every object that was loaded from an ELF file.

property all_pe_objects: List[PE]#

Return a list of every object that was loaded from an ELF file.

property missing_dependencies: Set[str]#

Return a set of every name that was requested as a shared object dependency but could not be loaded

property auto_load_libs: bool#
describe_addr(addr)[source]#

Returns a textual description of what’s in memory at the provided address

Return type:

str

Parameters:

addr (int) –

find_object(spec, extra_objects=())[source]#

If the given library specification has been loaded, return its object, otherwise return None.

Return type:

Optional[Backend]

Parameters:
find_object_containing(addr, membership_check=True)[source]#

Return the object that contains the given address, or None if the address is unmapped.

Parameters:
  • addr (int) – The address that should be contained in the object.

  • membership_check (bool) – Whether a membership check should be performed or not (True by default). This option can be set to False if you are certain that the target object does not have “holes”.

Return type:

Optional[Backend]

Returns:

The object or None.

find_segment_containing(addr, skip_pseudo_objects=True)[source]#

Find the section object that the address belongs to.

Parameters:
  • addr (int) – The address to test

  • skip_pseudo_objects (bool) – Skip objects that CLE adds during loading.

Returns:

The section that the address belongs to, or None if the address does not belong to any section, or if section information is not available.

Return type:

cle.Segment

find_section_containing(addr, skip_pseudo_objects=True)[source]#

Find the section object that the address belongs to.

Parameters:
  • addr (int) – The address to test.

  • skip_pseudo_objects (bool) – Skip objects that CLE adds during loading.

Returns:

The section that the address belongs to, or None if the address does not belong to any section, or if section information is not available.

Return type:

cle.Section

find_loadable_containing(addr, skip_pseudo_objects=True)[source]#

Find the section or segment object the address belongs to. Sections will only be used if the corresponding object does not have segments.

Parameters:
  • addr (int) – The address to test

  • skip_pseudo_objects – Skip objects that CLE adds during loading.

Return type:

Optional[Region]

Returns:

The section or segment that the address belongs to, or None if the address does not belong to any section or segment.

find_section_next_to(addr, skip_pseudo_objects=True)[source]#

Find the next section after the given address.

Parameters:
  • addr (int) – The address to test.

  • skip_pseudo_objects (bool) – Skip objects that CLE adds during loading.

Returns:

The next section that goes after the given address, or None if there is no section after the address, or if section information is not available.

Return type:

cle.Section

find_symbol(thing, fuzzy=False)[source]#

Search for the symbol with the given name or address.

Parameters:
  • thing – Either the name or address of a symbol to look up

  • fuzzy – Set to True to return the first symbol before or at the given address

Return type:

Optional[Symbol]

Returns:

A cle.backends.Symbol object if found, None otherwise.

property symbols: Iterator[Symbol]#
find_all_symbols(name, exclude_imports=True, exclude_externs=False, exclude_forwards=True)[source]#

Iterate over all symbols present in the set of loaded binaries that have the given name

Parameters:
  • name (str) – The name to search for

  • exclude_imports – Whether to exclude import symbols. Default True.

  • exclude_externs – Whether to exclude symbols in the extern object. Default False.

  • exclude_forwards – Whether to exclude forward symbols. Default True.

Return type:

Iterable[Symbol]

find_plt_stub_name(addr)[source]#

Return the name of the PLT stub starting at addr.

Return type:

Optional[str]

Parameters:

addr (int) –

find_relevant_relocations(name)[source]#

Iterate through all the relocations referring to the symbol with the given name

Return type:

Iterator[Relocation]

Parameters:

name (str) –

perform_irelative_relocs(resolver_func)[source]#

Use this method to satisfy IRelative relocations in the binary that require execution of loaded code.

Note that this does NOT handle IFunc symbols, which must be handled separately. (this could be changed, but at the moment it’s desirable to support lazy IFunc resolution, since emulation is usually slow)

Parameters:

resolver_func – A callback function that takes an address, runs the code at that address, and returns the return value from the emulated function.

dynamic_load(spec)[source]#

Load a file into the address space. Note that the sematics of auto_load_libs and except_missing_libs apply at all times.

Parameters:

spec – The path to the file to load. May be an absolute path, a relative path, or a name to search in the load path.

Returns:

A list of all the objects successfully loaded, which may be empty if this object was previously loaded. If the object specified in spec failed to load for any reason, including the file not being found, return None.

get_loader_symbolic_constraints()[source]#

Do not use this method.

fast_memory_load_pointer(addr, size=None)[source]#

Perform a fast memory loading of a pointer.

Parameters:
  • addr (int) – Address to read from.

  • size (Optional[int]) – Size of the pointer. Default to machine-word size.

Return type:

Optional[int]

Returns:

A pointer or None if the address does not exist.

Backends#

class cle.backends.FunctionHintSource[source]#

Bases: object

Enums that describe the source of function hints.

EH_FRAME = 0#
EXTERNAL_EH_FRAME = 1#
class cle.backends.FunctionHint(addr, size, source)[source]#

Bases: object

Describes a function hint.

Variables:
  • addr (int) – Address of the function.

  • size (int) – Size of the function.

  • source (int) – Source of this hint.

addr#
size#
source#
class cle.backends.ExceptionHandling(start_addr, size, handler_addr=None, type_=None, func_addr=None)[source]#

Bases: object

Describes an exception handling.

Exception handlers are usually language-specific. In C++, it is usually implemented as try {} catch {} blocks.

Variables:
  • start_addr (int) – The beginning of the try block.

  • size (int) – Size of the try block.

  • handler_addr (Optional[int]) – Address of the exception handler code.

  • type – Type of the exception handler. Optional.

  • func_addr (Optional[int]) – Address of the function. Optional.

start_addr#
size#
handler_addr#
type#
func_addr#
class cle.backends.Backend(binary, binary_stream, loader=None, is_main_bin=False, entry_point=None, arch=None, base_addr=None, force_rebase=False, has_memory=True, **kwargs)[source]#

Bases: object

Main base class for CLE binary objects.

An alternate interface to this constructor exists as the static method cle.loader.Loader.load_object()

Variables:
  • binary – The path to the file this object is loaded from

  • binary_basename – The basename of the filepath, or a short representation of the stream it was loaded from

  • is_main_bin – Whether this binary is loaded as the main executable

  • segments – A listing of all the loaded segments in this file

  • sections – A listing of all the demarked sections in the file

  • sections_map – A dict mapping from section name to section

  • imports – A mapping from symbol name to import relocation

  • resolved_imports – A list of all the import symbols that are successfully resolved

  • relocs – A list of all the relocations in this binary

  • irelatives – A list of tuples representing all the irelative relocations that need to be performed. The first item in the tuple is the address of the resolver function, and the second item is the address of where to write the result. The destination address is an RVA.

  • jmprel – A mapping from symbol name to the address of its jump slot relocation, i.e. its GOT entry.

  • arch (archinfo.arch.Arch) – The architecture of this binary

  • os (str) – The operating system this binary is meant to run under

  • mapped_base (int) – The base address of this object in virtual memory

  • deps – A list of names of shared libraries this binary depends on

  • linking – ‘dynamic’ or ‘static’

  • linked_base – The base address this object requests to be loaded at

  • pic (bool) – Whether this object is position-independent

  • execstack (bool) – Whether this executable has an executable stack

  • provides (str) – The name of the shared library dependancy that this object resolves

  • symbols (list) – A list of symbols provided by this object, sorted by address

  • has_memory – Whether this backend is backed by a Clemory or not. As it stands now, a backend should still define min_addr and max_addr even if has_memory is False.

is_default = False#
symbols: List[Symbol]#
imports: Dict[str, Relocation]#
relocs: List[Relocation]#
child_objects: List[Backend]#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
property arch: Arch#
property loader: Loader#
close()[source]#
Return type:

None

set_arch(arch)[source]#
property image_base_delta#
property entry#
property segments: Regions[Segment]#
property sections: Regions[Section]#
property symbols_by_addr#
rebase(new_base)[source]#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()[source]#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

contains_addr(addr)[source]#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

find_loadable_containing(addr)[source]#
find_segment_containing(addr)[source]#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

find_section_containing(addr)[source]#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

addr_to_offset(addr)[source]#
Return type:

Optional[int]

Parameters:

addr (int) –

offset_to_addr(offset)[source]#
Return type:

Optional[int]

Parameters:

offset (int) –

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

thread_registers(thread=None)[source]#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

initial_register_values()[source]#

Deprecated

get_symbol(name)[source]#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

static extract_soname(path)[source]#

Extracts the shared object identifier from the path, or returns None if it cannot.

Return type:

Optional[str]

classmethod is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

Return type:

bool

classmethod check_compatibility(spec, obj)[source]#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)[source]#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

cle.backends.register_backend(name, cls)[source]#
class cle.backends.ELF(*args, addend=None, debug_symbols=None, discard_section_headers=False, discard_program_headers=False, **kwargs)[source]#

Bases: MetaELF

The main loader class for statically loading ELF executables. Uses the pyreadelf library where useful.

Useful backend options:

  • debug_symbols: Provides the path to a separate file which contains the binary’s debug symbols

  • discard_section_headers: Do not parse section headers. Use this if they are corrupted or malicious.

  • discard_program_headers: Do not parse program headers. Use this if the binary is for a platform whose ELF

    loader only looks at section headers, but whose toolchain generates program headers anyway.

is_default = True#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
addr_to_line: SortedDict[int, Set[Tuple[int, int]]]#
variables: Optional[List[Variable]]#
compilation_units: Optional[List[CompilationUnit]]#
close()[source]#
classmethod check_compatibility(spec, obj)[source]#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

classmethod check_magic_compatibility(stream)[source]#

Check if a stream of bytes contains the same magic number as the main object

static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

static extract_arch(reader)[source]#
property initializers#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property finalizers#

Stub function. Like initializers, but with finalizers.

property symbols_by_name#
get_symbol(symid, symbol_table=None)[source]#

Gets a Symbol object for the specified symbol.

Parameters:

symid – Either an index into .dynsym or the name of a symbol.

rebase(new_base)[source]#

Rebase backend’s regions to the new base where they were mapped by the loader

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

property image_base_delta#
initial_register_values()#

Deprecated

property is_ppc64_abiv1#

Returns whether the arch is PowerPC64 ABIv1.

Returns:

True if PowerPC64 ABIv1, False otherwise.

property is_ppc64_abiv2#

Returns whether the arch is PowerPC64 ABIv2.

Returns:

True if PowerPC64 ABIv2, False otherwise.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

property plt#

Maps names to addresses.

property ppc64_initial_rtoc#

Get initial rtoc value for PowerPC64 architecture.

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property reverse_plt#

Maps addresses to names.

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
supported_filetypes = ['elf']#
property symbols_by_addr#
thread_registers(thread=None)#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

Return type:

Dict[str, Any]

property threads: List#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

symbols: List[Symbol]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
class cle.backends.ELFCore(*args, executable=None, remote_file_mapping=None, remote_file_mapper=None, **kwargs)[source]#

Bases: ELF

Loader class for ELF core files.

One key pain point when analyzing a core dump generated on a remote machine is that the paths to binaries are absolute (and may not exist or be the same on your local machine).

Therefore, you can use the options `remote_file_mapping to specify a dict mapping (easy if there are a small number of mappings) or remote_file_mapper to specify a function that accepts a remote file name and returns the local file name (useful if there are many mappings).

If you specify both remote_file_mapping and remote_file_mapper, remote_file_mapping is applied first, then the result is passed to remote_file_mapper.

Parameters:
  • executable – Optional path to the main binary of the core dump. If not supplied, ELFCore will attempt to figure it out automatically from the core dump.

  • remote_file_mapping – Optional dict that maps specific file names in the core dump to other file names.

  • remote_file_mapper – Optional function that is used to map every file name in the core dump to whatever is returned from this function.

is_default = True#
static is_compatible(stream)[source]#

Determine quickly whether this backend can load an object from this stream

property threads#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This property should contain a list of names for these threads, which should be unique.

thread_registers(thread=None)[source]#

If this backend represents a dump of a running program, it may contain one or more thread contexts, i.e. register files. This method should return the register file for a given thread (as named in Backend.threads) as a dict mapping register names (as seen in archinfo) to numbers. If the thread is not specified, it should return the context for a “default” thread. If there are no threads, it should return an empty dict.

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

close()#
contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
static extract_arch(reader)#
static extract_soname(path)#

Extracts the shared object identifier from the path, or returns None if it cannot.

property finalizers#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(symid, symbol_table=None)#

Gets a Symbol object for the specified symbol.

Parameters:

symid – Either an index into .dynsym or the name of a symbol.

property image_base_delta#
initial_register_values()#

Deprecated

property initializers#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Addresses should be rebased.

property is_ppc64_abiv1#

Returns whether the arch is PowerPC64 ABIv1.

Returns:

True if PowerPC64 ABIv1, False otherwise.

property is_ppc64_abiv2#

Returns whether the arch is PowerPC64 ABIv2.

Returns:

True if PowerPC64 ABIv2, False otherwise.

property loader: Loader#
property max_addr: int#

This returns the highest virtual address contained in any loaded segment of the binary.

property min_addr: int#

This returns the lowest virtual address contained in any loaded segment of the binary.

offset_to_addr(offset)#
Return type:

Optional[int]

Parameters:

offset (int) –

property plt#

Maps names to addresses.

property ppc64_initial_rtoc#

Get initial rtoc value for PowerPC64 architecture.

rebase(new_base)#

Rebase backend’s regions to the new base where they were mapped by the loader

relocate()#

Apply all resolved relocations to memory.

The meaning of “resolved relocations” is somewhat subtle - there is a linking step which attempts to resolve each relocation, currently only present in the main internal loading function since the calculation of which objects should be available

property reverse_plt#

Maps addresses to names.

property sections: Regions[Section]#
property segments: Regions[Segment]#
set_arch(arch)#
supported_filetypes = ['elf']#
property symbols_by_addr#
property symbols_by_name#
addr_to_line: SortedDict[int, Set[Tuple[int, int]]]#
variables: Optional[List[Variable]]#
compilation_units: Optional[List[CompilationUnit]]#
symbols: List[Symbol]#
imports: typing.Dict[str, 'Relocation']#
relocs: List[Relocation]#
child_objects: List['Backend']#
exception_handlings: List[ExceptionHandling]#
function_hints: List[FunctionHint]#
memory: Clemory#
class cle.backends.MetaELF(*args, **kwargs)[source]#

Bases: Backend

A base class that implements functions used by all backends that can load an ELF.

supported_filetypes = ['elf']#
property plt#

Maps names to addresses.

property reverse_plt#

Maps addresses to names.

property is_ppc64_abiv1#

Returns whether the arch is PowerPC64 ABIv1.

Returns:

True if PowerPC64 ABIv1, False otherwise.

property is_ppc64_abiv2#

Returns whether the arch is PowerPC64 ABIv2.

Returns:

True if PowerPC64 ABIv2, False otherwise.

property ppc64_initial_rtoc#

Get initial rtoc value for PowerPC64 architecture.

static extract_soname(path)[source]#

Extracts the shared object identifier from the path, or returns None if it cannot.

addr_to_offset(addr)#
Return type:

Optional[int]

Parameters:

addr (int) –

property arch: Arch#
classmethod check_compatibility(spec, obj)#

Performs a minimal static load of spec and returns whether it’s compatible with other_obj

Return type:

bool

classmethod check_magic_compatibility(stream)#

Check if a stream of bytes contains the same magic number as the main object

Return type:

bool

Parameters:

stream (BinaryIO) –

close()#
Return type:

None

contains_addr(addr)#

Is addr in one of the binary’s segments/sections we have loaded? (i.e. is it mapped into memory ?)

property entry#
property finalizers: List[int]#

Stub function. Like initializers, but with finalizers.

find_loadable_containing(addr)#
find_section_containing(addr)#

Returns the section that contains addr or None.

Return type:

Optional[Section]

Parameters:

addr (int) –

find_segment_containing(addr)#

Returns the segment that contains addr, or None.

Return type:

Optional[Segment]

Parameters:

addr (int) –

get_symbol(name)#

Stub function. Implement to find the symbol with name name.

Return type:

Optional[Symbol]

Parameters:

name (str) –

property image_base_delta#
initial_register_values()#

Deprecated

property initializers: List[int]#

Stub function. Should be overridden by backends that can provide initializer functions that ought to be run before execution reaches the entry point. Address