Pointer substitution can use offset into variable (#841)

This commit is contained in:
MS
2024-04-23 17:06:43 -04:00
committed by GitHub
parent 9025d5ed06
commit 41be78ed1c
5 changed files with 91 additions and 19 deletions

View File

@@ -56,7 +56,7 @@ class ParseAsm:
def __init__(
self,
relocate_lookup: Optional[Callable[[int], bool]] = None,
name_lookup: Optional[Callable[[int], str]] = None,
name_lookup: Optional[Callable[[int, bool], str]] = None,
bin_lookup: Optional[Callable[[int, int], Optional[bytes]]] = None,
) -> None:
self.relocate_lookup = relocate_lookup
@@ -86,13 +86,15 @@ class ParseAsm:
return None
def lookup(self, addr: int, use_cache: bool = True) -> Optional[str]:
def lookup(
self, addr: int, use_cache: bool = True, exact: bool = False
) -> Optional[str]:
"""Return a replacement name for this address if we find one."""
if use_cache and (cached := self.replacements.get(addr, None)) is not None:
return cached
if callable(self.name_lookup):
if (name := self.name_lookup(addr)) is not None:
if (name := self.name_lookup(addr, exact)) is not None:
if use_cache:
self.replacements[addr] = name
@@ -210,7 +212,7 @@ class ParseAsm:
# If we have a name for this address, use it. If not,
# do not create a new placeholder. We will instead
# fall through to generic jump handling below.
potential_name = self.lookup(op_str_address)
potential_name = self.lookup(op_str_address, exact=True)
if potential_name is not None:
return (inst.mnemonic, potential_name)

View File

@@ -2,6 +2,7 @@ import os
import logging
import difflib
import struct
import uuid
from dataclasses import dataclass
from typing import Callable, Iterable, List, Optional
from isledecomp.bin import Bin as IsleBin, InvalidVirtualAddressError
@@ -71,6 +72,9 @@ class Compare:
self.recomp_bin = recomp_bin
self.pdb_file = pdb_file
self.code_dir = code_dir
# Controls whether we dump the asm output to a file
self.debug: bool = False
self.runid: str = uuid.uuid4().hex[:8]
self._lines_db = LinesDb(code_dir)
self._db = CompareDb()
@@ -452,6 +456,16 @@ class Compare:
)
self._db.set_function_pair(orig_addr, recomp_addr)
def _dump_asm(self, orig_combined, recomp_combined):
"""Append the provided assembly output to the debug files"""
with open(f"orig-{self.runid}.txt", "a", encoding="utf-8") as f:
for addr, line in orig_combined:
f.write(f"{addr}: {line}\n")
with open(f"recomp-{self.runid}.txt", "a", encoding="utf-8") as f:
for addr, line in recomp_combined:
f.write(f"{addr}: {line}\n")
def _compare_function(self, match: MatchInfo) -> DiffReport:
# Detect when the recomp function size would cause us to read
# enough bytes from the original function that we cross into
@@ -478,19 +492,33 @@ class Compare:
except IndexError:
pass
def orig_lookup(addr: int) -> Optional[str]:
m = self._db.get_by_orig(addr)
def orig_lookup(addr: int, exact: bool) -> Optional[str]:
m = self._db.get_by_orig(addr, exact)
if m is None:
return None
return m.match_name()
if m.orig_addr == addr:
return m.match_name()
def recomp_lookup(addr: int) -> Optional[str]:
m = self._db.get_by_recomp(addr)
offset = addr - m.orig_addr
if m.compare_type != SymbolType.DATA or offset >= m.size:
return None
return m.offset_name(offset)
def recomp_lookup(addr: int, exact: bool) -> Optional[str]:
m = self._db.get_by_recomp(addr, exact)
if m is None:
return None
return m.match_name()
if m.recomp_addr == addr:
return m.match_name()
offset = addr - m.recomp_addr
if m.compare_type != SymbolType.DATA or offset >= m.size:
return None
return m.offset_name(offset)
orig_should_replace = create_reloc_lookup(self.orig_bin)
recomp_should_replace = create_reloc_lookup(self.recomp_bin)
@@ -512,6 +540,9 @@ class Compare:
orig_combined = orig_parse.parse_asm(orig_raw, match.orig_addr)
recomp_combined = recomp_parse.parse_asm(recomp_raw, match.recomp_addr)
if self.debug:
self._dump_asm(orig_combined, recomp_combined)
# Detach addresses from asm lines for the text diff.
orig_asm = [x[1] for x in orig_combined]
recomp_asm = [x[1] for x in recomp_combined]

View File

@@ -53,7 +53,7 @@ class MatchInfo:
self.name = name
self.size = size
def match_name(self) -> str:
def match_name(self) -> Optional[str]:
"""Combination of the name and compare type.
Intended for name substitution in the diff. If there is a diff,
it will be more obvious what this symbol indicates."""
@@ -64,6 +64,12 @@ class MatchInfo:
name = repr(self.name) if ctype == "STRING" else self.name
return f"{name} ({ctype})"
def offset_name(self, ofs: int) -> Optional[str]:
if self.name is None:
return None
return f"{self.name}+{ofs} (OFFSET)"
def matchinfo_factory(_, row):
return MatchInfo(*row)
@@ -135,7 +141,32 @@ class CompareDb:
cur.row_factory = matchinfo_factory
return cur.fetchone()
def get_by_orig(self, addr: int) -> Optional[MatchInfo]:
def _get_closest_orig(self, addr: int) -> Optional[int]:
value = self._db.execute(
"""SELECT max(orig_addr) FROM `symbols`
WHERE ? >= orig_addr
LIMIT 1
""",
(addr,),
).fetchone()
return value[0] if value is not None else None
def _get_closest_recomp(self, addr: int) -> Optional[int]:
value = self._db.execute(
"""SELECT max(recomp_addr) FROM `symbols`
WHERE ? >= recomp_addr
LIMIT 1
""",
(addr,),
).fetchone()
return value[0] if value is not None else None
def get_by_orig(self, addr: int, exact: bool = True) -> Optional[MatchInfo]:
if not exact and not self._orig_used(addr):
addr = self._get_closest_orig(addr)
if addr is None:
return None
cur = self._db.execute(
"""SELECT * FROM `match_info`
WHERE orig_addr = ?
@@ -145,7 +176,12 @@ class CompareDb:
cur.row_factory = matchinfo_factory
return cur.fetchone()
def get_by_recomp(self, addr: int) -> Optional[MatchInfo]:
def get_by_recomp(self, addr: int, exact: bool = True) -> Optional[MatchInfo]:
if not exact and not self._recomp_used(addr):
addr = self._get_closest_recomp(addr)
if addr is None:
return None
cur = self._db.execute(
"""SELECT * FROM `match_info`
WHERE recomp_addr = ?