mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-26 18:04:06 +00:00
Detect calls using absolute indirect addressing (#784)
* Detect calls using absolute indirect addressing * Ignore imports we can't match
This commit is contained in:
@@ -7,10 +7,10 @@ so that virtual addresses are replaced by symbol name or a generic
|
||||
placeholder string."""
|
||||
|
||||
import re
|
||||
import struct
|
||||
from functools import cache
|
||||
from typing import Callable, List, Optional, Tuple
|
||||
from collections import namedtuple
|
||||
from isledecomp.bin import InvalidVirtualAddressError
|
||||
from .const import JUMP_MNEMONICS, SINGLE_OPERAND_INSTS
|
||||
from .instgen import InstructGen, SectionType
|
||||
|
||||
@@ -35,16 +35,33 @@ def from_hex(string: str) -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
def bytes_to_float(b: bytes) -> Optional[float]:
|
||||
if len(b) == 4:
|
||||
return struct.unpack("<f", b)[0]
|
||||
|
||||
if len(b) == 8:
|
||||
return struct.unpack("<d", b)[0]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def bytes_to_dword(b: bytes) -> Optional[int]:
|
||||
if len(b) == 4:
|
||||
return struct.unpack("<L", b)[0]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class ParseAsm:
|
||||
def __init__(
|
||||
self,
|
||||
relocate_lookup: Optional[Callable[[int], bool]] = None,
|
||||
name_lookup: Optional[Callable[[int], str]] = None,
|
||||
float_lookup: Optional[Callable[[int, int], Optional[str]]] = None,
|
||||
bin_lookup: Optional[Callable[[int, int], Optional[bytes]]] = None,
|
||||
) -> None:
|
||||
self.relocate_lookup = relocate_lookup
|
||||
self.name_lookup = name_lookup
|
||||
self.float_lookup = float_lookup
|
||||
self.bin_lookup = bin_lookup
|
||||
self.replacements = {}
|
||||
self.number_placeholders = True
|
||||
|
||||
@@ -58,14 +75,14 @@ class ParseAsm:
|
||||
return False
|
||||
|
||||
def float_replace(self, addr: int, data_size: int) -> Optional[str]:
|
||||
if callable(self.float_lookup):
|
||||
try:
|
||||
float_str = self.float_lookup(addr, data_size)
|
||||
except InvalidVirtualAddressError:
|
||||
# probably caused by reading an invalid instruction
|
||||
if callable(self.bin_lookup):
|
||||
float_bytes = self.bin_lookup(addr, data_size)
|
||||
if float_bytes is None:
|
||||
return None
|
||||
if float_str is not None:
|
||||
return f"{float_str} (FLOAT)"
|
||||
|
||||
float_value = bytes_to_float(float_bytes)
|
||||
if float_value is not None:
|
||||
return f"{float_value} (FLOAT)"
|
||||
|
||||
return None
|
||||
|
||||
@@ -122,6 +139,30 @@ class ParseAsm:
|
||||
|
||||
return match.group(0)
|
||||
|
||||
def hex_replace_indirect(self, match: re.Match) -> str:
|
||||
"""Edge case for hex_replace_always. The context of the instruction
|
||||
tells us that the pointer value is an absolute indirect.
|
||||
So we go to that location in the binary to get the address.
|
||||
If we cannot identify the indirect address, fall back to a lookup
|
||||
on the original pointer value so we might display something useful."""
|
||||
value = int(match.group(1), 16)
|
||||
indirect_value = None
|
||||
|
||||
if callable(self.bin_lookup):
|
||||
indirect_value = self.bin_lookup(value, 4)
|
||||
|
||||
if indirect_value is not None:
|
||||
indirect_addr = bytes_to_dword(indirect_value)
|
||||
if (
|
||||
indirect_addr is not None
|
||||
and self.lookup(indirect_addr, use_cache=False) is not None
|
||||
):
|
||||
return match.group(0).replace(
|
||||
match.group(1), "->" + self.replace(indirect_addr)
|
||||
)
|
||||
|
||||
return match.group(0).replace(match.group(1), self.replace(value))
|
||||
|
||||
def hex_replace_float(self, match: re.Match) -> str:
|
||||
"""Special case for replacements on float instructions.
|
||||
If the pointer is a float constant, read it from the binary."""
|
||||
@@ -178,7 +219,10 @@ class ParseAsm:
|
||||
jump_displacement = op_str_address - (inst.address + inst.size)
|
||||
return (inst.mnemonic, hex(jump_displacement))
|
||||
|
||||
if inst.mnemonic.startswith("f"):
|
||||
if inst.mnemonic == "call":
|
||||
# Special handling for absolute indirect CALL.
|
||||
op_str = ptr_replace_regex.sub(self.hex_replace_indirect, inst.op_str)
|
||||
elif inst.mnemonic.startswith("f"):
|
||||
# If floating point instruction
|
||||
op_str = ptr_replace_regex.sub(self.hex_replace_float, inst.op_str)
|
||||
else:
|
||||
|
||||
@@ -4,7 +4,7 @@ import difflib
|
||||
import struct
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable, Iterable, List, Optional
|
||||
from isledecomp.bin import Bin as IsleBin
|
||||
from isledecomp.bin import Bin as IsleBin, InvalidVirtualAddressError
|
||||
from isledecomp.cvdump.demangler import demangle_string_const
|
||||
from isledecomp.cvdump import Cvdump, CvdumpAnalysis
|
||||
from isledecomp.parser import DecompCodebase
|
||||
@@ -50,20 +50,13 @@ def create_reloc_lookup(bin_file: IsleBin) -> Callable[[int], bool]:
|
||||
return lookup
|
||||
|
||||
|
||||
def create_float_lookup(bin_file: IsleBin) -> Callable[[int, int], Optional[str]]:
|
||||
"""Function generator for floating point lookup"""
|
||||
def create_bin_lookup(bin_file: IsleBin) -> Callable[[int, int], Optional[str]]:
|
||||
"""Function generator for reading from the bin file"""
|
||||
|
||||
def lookup(addr: int, size: int) -> Optional[str]:
|
||||
data = bin_file.read(addr, size)
|
||||
# If this is a float constant, it should be initialized data.
|
||||
if data is None:
|
||||
return None
|
||||
|
||||
struct_str = "<f" if size == 4 else "<d"
|
||||
def lookup(addr: int, size: int) -> Optional[bytes]:
|
||||
try:
|
||||
(float_value,) = struct.unpack(struct_str, data)
|
||||
return str(float_value)
|
||||
except struct.error:
|
||||
return bin_file.read(addr, size)
|
||||
except InvalidVirtualAddressError:
|
||||
return None
|
||||
|
||||
return lookup
|
||||
@@ -273,8 +266,37 @@ class Compare:
|
||||
# the connection between the thunk functions.
|
||||
# We already have the symbol name we need from the PDB.
|
||||
for orig, recomp in orig_to_recomp.items():
|
||||
if orig is None or recomp is None:
|
||||
continue
|
||||
|
||||
# Match the __imp__ symbol
|
||||
self._db.set_pair(orig, recomp, SymbolType.POINTER)
|
||||
|
||||
# Read the relative address from .idata
|
||||
try:
|
||||
(recomp_rva,) = struct.unpack("<L", self.recomp_bin.read(recomp, 4))
|
||||
(orig_rva,) = struct.unpack("<L", self.orig_bin.read(orig, 4))
|
||||
except ValueError:
|
||||
# Bail out if there's a problem with struct.unpack
|
||||
continue
|
||||
|
||||
# Strictly speaking, this is a hack to support asm sanitize.
|
||||
# When calling an import, we will recognize that the address for the
|
||||
# CALL instruction is a pointer to the actual address, but this is
|
||||
# not only not the address of a function, it is not an address at all.
|
||||
# To make the asm display work correctly (i.e. to match what you see
|
||||
# in ghidra) create a function match on the RVA. This is not a valid
|
||||
# virtual address because it is before the imagebase, but it will
|
||||
# do what we need it to do in the sanitize function.
|
||||
|
||||
(dll_name, func_name) = orig_byaddr[orig]
|
||||
fullname = dll_name + ":" + func_name
|
||||
self._db.set_recomp_symbol(
|
||||
recomp_rva, SymbolType.FUNCTION, fullname, None, 4
|
||||
)
|
||||
self._db.set_pair(orig_rva, recomp_rva, SymbolType.FUNCTION)
|
||||
self._db.skip_compare(orig_rva)
|
||||
|
||||
def _match_thunks(self):
|
||||
"""Thunks are (by nature) matched by indirection. If a thunk from orig
|
||||
points at a function we have already matched, we can find the matching
|
||||
@@ -444,18 +466,18 @@ class Compare:
|
||||
orig_should_replace = create_reloc_lookup(self.orig_bin)
|
||||
recomp_should_replace = create_reloc_lookup(self.recomp_bin)
|
||||
|
||||
orig_float = create_float_lookup(self.orig_bin)
|
||||
recomp_float = create_float_lookup(self.recomp_bin)
|
||||
orig_bin_lookup = create_bin_lookup(self.orig_bin)
|
||||
recomp_bin_lookup = create_bin_lookup(self.recomp_bin)
|
||||
|
||||
orig_parse = ParseAsm(
|
||||
relocate_lookup=orig_should_replace,
|
||||
name_lookup=orig_lookup,
|
||||
float_lookup=orig_float,
|
||||
bin_lookup=orig_bin_lookup,
|
||||
)
|
||||
recomp_parse = ParseAsm(
|
||||
relocate_lookup=recomp_should_replace,
|
||||
name_lookup=recomp_lookup,
|
||||
float_lookup=recomp_float,
|
||||
bin_lookup=recomp_bin_lookup,
|
||||
)
|
||||
|
||||
orig_combined = orig_parse.parse_asm(orig_raw, match.orig_addr)
|
||||
|
||||
Reference in New Issue
Block a user