Source code for archinfo.arch_x86

from archinfo.types import RegisterOffset

from .arch import Arch, Endness, Register, register_arch
from .archerror import ArchError
from .tls import TLSArchInfo

try:
    import capstone as _capstone
except ImportError:
    _capstone = None

try:
    import keystone as _keystone
except ImportError:
    _keystone = None

try:
    import unicorn as _unicorn
except ImportError:
    _unicorn = None

try:
    import pyvex as _pyvex
except ImportError:
    _pyvex = None


_NATIVE_FUNCTION_PROLOGS = {
    rb"\x8b\xff\x55\x8b\xec",  # mov edi, edi; push ebp; mov ebp, esp
    rb"\x55\x8b\xec",  # push ebp; mov ebp, esp
    rb"\x55\x89\xe5",  # push ebp; mov ebp, esp
    rb"\x55\x57\x56",  # push ebp; push edi; push esi
    # mov eax, 0x000000??; (push ebp; push eax; push edi; push ebx; push esi; push edx; push ecx) sub esp
    rb"\xb8[\x00-\xff]\x00\x00\x00[\x50\x51\x52\x53\x55\x56\x57]{0,7}\x8b[\x00-\xff]{2}",
    # (push ebp; push eax; push edi; push ebx; push esi; push edx; push ecx) sub esp
    rb"[\x50\x51\x52\x53\x55\x56\x57]{1,7}\x83\xec[\x00-\xff]{2,4}",
    # (push ebp; push eax; push edi; push ebx; push esi; push edx; push ecx) mov xxx, xxx
    rb"[\x50\x51\x52\x53\x55\x56\x57]{1,7}\x8b[\x00-\xff]{2}",
    rb"(\x81|\x83)\xec",  # sub xxx %esp
}
# every function prolog can potentially be prefixed with endbr32
_endbr32 = b"\xf3\x0f\x1e\xfb"
_prefixed = {(_endbr32 + prolog) for prolog in _NATIVE_FUNCTION_PROLOGS}
_FUNCTION_PROLOGS = _prefixed | _NATIVE_FUNCTION_PROLOGS


[docs]class ArchX86(Arch):
[docs] def __init__(self, endness=Endness.LE): if endness != Endness.LE: raise ArchError("Arch i386 must be little endian") super().__init__(endness) if self.vex_archinfo: self.vex_archinfo["x86_cr0"] = 0xFFFFFFFF # Register blacklist reg_blacklist = ("cs", "ds", "es", "fs", "gs", "ss", "gdt", "ldt") if self.reg_blacklist is not None and self.reg_blacklist_offsets is not None: for register in self.register_list: if register.name in reg_blacklist: self.reg_blacklist.append(register.name) self.reg_blacklist_offsets.append(register.vex_offset) if _unicorn and _pyvex: # CPU flag registers uc_flags_reg = _unicorn.x86_const.UC_X86_REG_EFLAGS cpu_flag_registers = {"d": 1 << 10, "ac": 1 << 18, "id": 1 << 21} for reg, reg_bitmask in cpu_flag_registers.items(): reg_offset = self.get_register_offset(reg) self.cpu_flag_register_offsets_and_bitmasks_map[reg_offset] = (uc_flags_reg, reg_bitmask) mxcsr_registers = {"sseround": 1 << 14 | 1 << 13} uc_mxcsr_reg = _unicorn.x86_const.UC_X86_REG_MXCSR for reg, reg_bitmask in mxcsr_registers.items(): reg_offset = self.get_register_offset(reg) self.cpu_flag_register_offsets_and_bitmasks_map[reg_offset] = (uc_mxcsr_reg, reg_bitmask)
@property def capstone_x86_syntax(self): """ Get the current syntax Capstone uses for x86. It can be 'intel' or 'at&t' :return: Capstone's current x86 syntax :rtype: str """ return self._cs_x86_syntax @capstone_x86_syntax.setter def capstone_x86_syntax(self, new_syntax): """ Set the syntax that Capstone outputs for x86. """ if new_syntax not in ("intel", "at&t"): raise ArchError('Unsupported Capstone x86 syntax. It must be either "intel" or "at&t".') if new_syntax != self._cs_x86_syntax: self._cs = None self._cs_x86_syntax = new_syntax def _configure_capstone(self): self._cs.syntax = ( _capstone.CS_OPT_SYNTAX_ATT if self._cs_x86_syntax == "at&t" else _capstone.CS_OPT_SYNTAX_INTEL ) @property def keystone_x86_syntax(self): """ Get the current syntax Keystone uses for x86. It can be 'intel', 'at&t', 'nasm', 'masm', 'gas' or 'radix16' :return: Keystone's current x86 syntax :rtype: str """ return self._ks_x86_syntax @keystone_x86_syntax.setter def keystone_x86_syntax(self, new_syntax): """ Set the syntax that Keystone uses for x86. """ if new_syntax not in ("intel", "at&t", "nasm", "masm", "gas", "radix16"): raise ArchError( "Unsupported Keystone x86 syntax. It must be one of the following: " '"intel", "at&t", "nasm", "masm", "gas" or "radix16".' ) if new_syntax != self._ks_x86_syntax: self._ks = None self._ks_x86_syntax = new_syntax def _configure_keystone(self): if self._ks_x86_syntax == "at&t": self._ks.syntax = _keystone.KS_OPT_SYNTAX_ATT elif self._ks_x86_syntax == "nasm": self._ks.syntax = _keystone.KS_OPT_SYNTAX_NASM elif self._ks_x86_syntax == "masm": self._ks.syntax = _keystone.KS_OPT_SYNTAX_MASM elif self._ks_x86_syntax == "gas": self._ks.syntax = _keystone.KS_OPT_SYNTAX_GAS elif self._ks_x86_syntax == "radix16": self._ks.syntax = _keystone.KS_OPT_SYNTAX_RADIX16 else: self._ks.syntax = _keystone.KS_OPT_SYNTAX_INTEL bits = 32 vex_arch = "VexArchX86" name = "X86" qemu_name = "i386" ida_processor = "metapc" linux_name = "i386" triplet = "i386-linux-gnu" max_inst_bytes = 15 call_sp_fix = -4 ret_offset = RegisterOffset(8) vex_conditional_helpers = True syscall_num_offset = 8 call_pushes_ret = True stack_change = -4 memory_endness = Endness.LE register_endness = Endness.LE sizeof = {"short": 16, "int": 32, "long": 32, "long long": 64} if _capstone: cs_arch = _capstone.CS_ARCH_X86 cs_mode = _capstone.CS_MODE_32 + _capstone.CS_MODE_LITTLE_ENDIAN _cs_x86_syntax = None # Set it to 'att' in order to use AT&T syntax for x86 if _keystone: ks_arch = _keystone.KS_ARCH_X86 ks_mode = _keystone.KS_MODE_32 + _keystone.KS_MODE_LITTLE_ENDIAN _ks_x86_syntax = None uc_arch = _unicorn.UC_ARCH_X86 if _unicorn else None uc_mode = (_unicorn.UC_MODE_32 + _unicorn.UC_MODE_LITTLE_ENDIAN) if _unicorn else None uc_const = _unicorn.x86_const if _unicorn else None uc_prefix = "UC_X86_" if _unicorn else None function_prologs = _FUNCTION_PROLOGS function_epilogs = { rb"\xc9\xc3", # leave; ret rb"([^\x41][\x50-\x5f]{1}|\x41[\x50-\x5f])\xc3", # pop <reg>; ret rb"[^\x48][\x83,\x81]\xc4([\x00-\xff]{1}|[\x00-\xff]{4})\xc3", # add esp, <siz>; retq } ret_instruction = b"\xc3" nop_instruction = b"\x90" instruction_alignment = 1 register_list = [ Register( name="eax", size=4, subregisters=[("ax", 0, 2), ("al", 0, 1), ("ah", 1, 1)], general_purpose=True, argument=True, linux_entry_value=0x1C, ), Register( name="ecx", size=4, subregisters=[("cx", 0, 2), ("cl", 0, 1), ("ch", 1, 1)], general_purpose=True, argument=True, ), Register( name="edx", size=4, subregisters=[("dx", 0, 2), ("dl", 0, 1), ("dh", 1, 1)], general_purpose=True, argument=True, linux_entry_value="ld_destructor", ), Register( name="ebx", size=4, subregisters=[("bx", 0, 2), ("bl", 0, 1), ("bh", 1, 1)], general_purpose=True, argument=True, ), Register( name="esp", size=4, alias_names=("sp",), general_purpose=True, default_value=(Arch.initial_sp, True, "global"), ), Register(name="ebp", size=4, alias_names=("bp",), general_purpose=True, argument=True, linux_entry_value=0), Register( name="esi", size=4, subregisters=[("si", 0, 2), ("sil", 0, 1), ("sih", 1, 1)], general_purpose=True, argument=True, ), Register( name="edi", size=4, subregisters=[("di", 0, 2), ("dil", 0, 1), ("dih", 1, 1)], general_purpose=True, argument=True, ), Register(name="cc_op", size=4, default_value=(0, False, None), concrete=False, artificial=True), Register(name="cc_dep1", size=4, concrete=False, artificial=True), Register(name="cc_dep2", size=4, concrete=False, artificial=True), Register(name="cc_ndep", size=4, concrete=False, artificial=True), Register(name="d", size=4, alias_names=("dflag",), default_value=(1, False, None), concrete=False), Register(name="id", size=4, alias_names=("idflag",), default_value=(1, False, None), concrete=False), Register(name="ac", size=4, alias_names=("acflag",), default_value=(0, False, None), concrete=False), Register(name="eip", size=4, alias_names=("ip", "pc")), Register( name="fpreg", size=64, subregisters=[ ("mm0", 0, 8), ("mm1", 8, 8), ("mm2", 16, 8), ("mm3", 24, 8), ("mm4", 32, 8), ("mm5", 40, 8), ("mm6", 48, 8), ("mm7", 56, 8), ], alias_names=("fpu_regs",), floating_point=True, concrete=False, ), Register(name="fptag", size=8, alias_names=("fpu_tags",), floating_point=True, default_value=(0, False, None)), Register(name="fpround", size=4, floating_point=True, default_value=(0, False, None)), Register(name="fc3210", size=4, floating_point=True), Register(name="ftop", size=4, floating_point=True, default_value=(7, False, None), artificial=True), Register(name="sseround", size=4, vector=True, default_value=(0, False, None)), Register(name="xmm0", size=16, vector=True), Register(name="xmm1", size=16, vector=True), Register(name="xmm2", size=16, vector=True), Register(name="xmm3", size=16, vector=True), Register(name="xmm4", size=16, vector=True), Register(name="xmm5", size=16, vector=True), Register(name="xmm6", size=16, vector=True), Register(name="xmm7", size=16, vector=True), Register(name="cs", size=2), Register(name="ds", size=2), Register(name="es", size=2), Register(name="fs", size=2, default_value=(0, False, None), concrete=False), Register(name="gs", size=2, default_value=(0, False, None), concrete=False), Register(name="ss", size=2), Register(name="ldt", size=8, default_value=(0, False, None), concrete=False), Register(name="gdt", size=8, default_value=(0, False, None), concrete=False), Register(name="emnote", size=4, artificial=True), Register(name="cmstart", size=4), Register(name="cmlen", size=4), Register(name="nraddr", size=4), Register(name="sc_class", size=4), Register(name="ip_at_syscall", size=4, concrete=False, artificial=True), ] symbol_type_translation = {10: "STT_GNU_IFUNC", "STT_LOOS": "STT_GNU_IFUNC"} lib_paths = ["/lib32", "/usr/lib32"] got_section_name = ".got.plt" ld_linux_name = "ld-linux.so.2" elf_tls = TLSArchInfo(2, 56, [8], [4], [0], 0, 0) dwarf_registers = [ "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", "eip", "eflags", "<none>", "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7", "<none>", "<none>", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7", "fcw", "fsw", "mxcsr", "es", "cs", "ss", "ds", "fs", "gs", "<none>", "<none>", "tr", "ldtr", ]
register_arch([r".*i?\d86|.*x32|.*x86|.*metapc"], 32, Endness.LE, ArchX86)