reccmp: Show float constants (#473)

This commit is contained in:
MS
2024-01-20 20:19:49 -05:00
committed by GitHub
parent b5a3c5feea
commit 6ed3e89ed2
3 changed files with 124 additions and 19 deletions

View File

@@ -13,7 +13,7 @@ from capstone import Cs, CS_ARCH_X86, CS_MODE_32
disassembler = Cs(CS_ARCH_X86, CS_MODE_32)
ptr_replace_regex = re.compile(r"ptr \[(0x[0-9a-fA-F]+)\]")
ptr_replace_regex = re.compile(r"(?P<data_size>\w+) ptr \[(?P<addr>0x[0-9a-fA-F]+)\]")
DisasmLiteInst = namedtuple("DisasmLiteInst", "address, size, mnemonic, op_str")
@@ -27,14 +27,20 @@ def from_hex(string: str) -> Optional[int]:
return None
def get_float_size(size_str: str) -> int:
return 8 if size_str == "qword" else 4
class ParseAsm:
def __init__(
self,
relocate_lookup: Optional[Callable[[int], bool]] = None,
name_lookup: Optional[Callable[[int], str]] = None,
float_lookup: Optional[Callable[[int, int], Optional[str]]] = None,
) -> None:
self.relocate_lookup = relocate_lookup
self.name_lookup = name_lookup
self.float_lookup = float_lookup
self.replacements = {}
self.number_placeholders = True
@@ -47,6 +53,14 @@ class ParseAsm:
return False
def float_replace(self, addr: int, data_size: int) -> Optional[str]:
if callable(self.float_lookup):
float_str = self.float_lookup(addr, data_size)
if float_str is not None:
return f"{float_str} (FLOAT)"
return None
def lookup(self, addr: int) -> Optional[str]:
"""Return a replacement name for this address if we find one."""
if (cached := self.replacements.get(addr, None)) is not None:
@@ -108,18 +122,45 @@ class ParseAsm:
def filter_out_ptr(match):
"""Helper for re.sub, see below"""
offset = from_hex(match.group(1))
offset = from_hex(match.group("addr"))
if offset is not None:
# We assume this is always an address to replace
placeholder = self.replace(offset)
return f"ptr [{placeholder}]"
return f'{match.group("data_size")} ptr [{placeholder}]'
# Strict regex should ensure we can read the hex number.
# But just in case: return the string with no changes
return match.group(0)
op_str = ptr_replace_regex.sub(filter_out_ptr, inst.op_str)
def float_ptr_replace(match):
offset = from_hex(match.group("addr"))
if offset is not None:
# If we can find a variable name for this pointer, use it.
placeholder = self.lookup(offset)
# Read what's under the pointer and show the decimal value.
if placeholder is None:
placeholder = self.float_replace(
offset, get_float_size(match.group("data_size"))
)
# If we can't read the float, use a regular placeholder.
if placeholder is None:
placeholder = self.replace(offset)
return f'{match.group("data_size")} ptr [{placeholder}]'
# Strict regex should ensure we can read the hex number.
# But just in case: return the string with no changes
return match.group(0)
if inst.mnemonic.startswith("f"):
# If floating point instruction
op_str = ptr_replace_regex.sub(float_ptr_replace, inst.op_str)
else:
op_str = ptr_replace_regex.sub(filter_out_ptr, inst.op_str)
# Performance hack:
# Skip this step if there is nothing left to consider replacing.

View File

@@ -3,7 +3,8 @@ import logging
import difflib
import struct
from dataclasses import dataclass
from typing import Iterable, List, Optional
from typing import Callable, Iterable, List, Optional
from isledecomp.bin import Bin as IsleBin
from isledecomp.cvdump.demangler import demangle_string_const
from isledecomp.cvdump import Cvdump, CvdumpAnalysis
from isledecomp.parser import DecompCodebase
@@ -36,9 +37,39 @@ class DiffReport:
return f"{self.name} (0x{self.orig_addr:x}) {self.ratio*100:.02f}%{'*' if self.is_effective_match else ''}"
def create_reloc_lookup(bin_file: IsleBin) -> Callable[[int], bool]:
"""Function generator for relocation table lookup"""
def lookup(addr: int) -> bool:
return addr > bin_file.imagebase and bin_file.is_relocated_addr(addr)
return lookup
def create_float_lookup(bin_file: IsleBin) -> Callable[[int, int], Optional[str]]:
"""Function generator for floating point lookup"""
def lookup(addr: int, size: int) -> Optional[str]:
data = bin_file.read(addr, size)
# If this is a float constant, it should be initialized data.
if data is None:
return None
struct_str = "<f" if size == 4 else "<d"
try:
(float_value,) = struct.unpack(struct_str, data)
return str(float_value)
except struct.error:
return None
return lookup
class Compare:
# pylint: disable=too-many-instance-attributes
def __init__(self, orig_bin, recomp_bin, pdb_file, code_dir):
def __init__(
self, orig_bin: IsleBin, recomp_bin: IsleBin, pdb_file: str, code_dir: str
):
self.orig_bin = orig_bin
self.recomp_bin = recomp_bin
self.pdb_file = pdb_file
@@ -229,17 +260,6 @@ class Compare:
orig_raw = self.orig_bin.read(match.orig_addr, match.size)
recomp_raw = self.recomp_bin.read(match.recomp_addr, match.size)
def orig_should_replace(addr: int) -> bool:
return addr > self.orig_bin.imagebase and self.orig_bin.is_relocated_addr(
addr
)
def recomp_should_replace(addr: int) -> bool:
return (
addr > self.recomp_bin.imagebase
and self.recomp_bin.is_relocated_addr(addr)
)
def orig_lookup(addr: int) -> Optional[str]:
m = self._db.get_by_orig(addr)
if m is None:
@@ -254,11 +274,21 @@ class Compare:
return m.match_name()
orig_should_replace = create_reloc_lookup(self.orig_bin)
recomp_should_replace = create_reloc_lookup(self.recomp_bin)
orig_float = create_float_lookup(self.orig_bin)
recomp_float = create_float_lookup(self.recomp_bin)
orig_parse = ParseAsm(
relocate_lookup=orig_should_replace, name_lookup=orig_lookup
relocate_lookup=orig_should_replace,
name_lookup=orig_lookup,
float_lookup=orig_float,
)
recomp_parse = ParseAsm(
relocate_lookup=recomp_should_replace, name_lookup=recomp_lookup
relocate_lookup=recomp_should_replace,
name_lookup=recomp_lookup,
float_lookup=recomp_float,
)
orig_asm = orig_parse.parse_asm(orig_raw, match.orig_addr)