Roadmap tool to compare binary structure (#479)

2025-12-09 23:53:02 +00:00 · 2024-01-22 10:15:12 -05:00
parent 05bc94f030
commit a65eb9a4e0
7 changed files with 320 additions and 3 deletions
--- a/tools/isledecomp/isledecomp/bin.py
+++ b/tools/isledecomp/isledecomp/bin.py
@@ -1,6 +1,6 @@
 import logging
 import struct
-from typing import List, Optional
+from typing import List, Optional, Tuple
 from dataclasses import dataclass
 from collections import namedtuple

@@ -365,6 +365,14 @@ class Bin:
        into an absolute vaddr."""
        return self.get_section_offset_by_index(section) + offset

+    def get_relative_addr(self, addr: int) -> Tuple[int, int]:
+        """Convert an absolute address back into a (section, offset) pair."""
+        for i, section in enumerate(self.sections):
+            if section.contains_vaddr(addr):
+                return (i + 1, addr - section.virtual_address)
+
+        return (0, 0)
+
    def get_raw_addr(self, vaddr: int) -> int:
        """Returns the raw offset in the PE binary for the given virtual address."""
        self._set_section_for_vaddr(vaddr)
--- a/tools/isledecomp/isledecomp/compare/core.py
+++ b/tools/isledecomp/isledecomp/compare/core.py
@@ -409,6 +409,9 @@ class Compare:

    ## Public API

+    def get_all(self) -> List[MatchInfo]:
+        return self._db.get_all()
+
    def get_functions(self) -> List[MatchInfo]:
        return self._db.get_matches_by_type(SymbolType.FUNCTION)

--- a/tools/isledecomp/isledecomp/compare/db.py
+++ b/tools/isledecomp/isledecomp/compare/db.py
@@ -82,6 +82,17 @@ class CompareDb:

        return [string for (string,) in cur.fetchall()]

+    def get_all(self) -> List[MatchInfo]:
+        cur = self._db.execute(
+            """SELECT compare_type, orig_addr, recomp_addr, name, size
+            FROM `symbols`
+            ORDER BY orig_addr NULLS LAST
+            """,
+        )
+        cur.row_factory = matchinfo_factory
+
+        return cur.fetchall()
+
    def get_matches(self) -> Optional[MatchInfo]:
        cur = self._db.execute(
            """SELECT compare_type, orig_addr, recomp_addr, name, size
--- a/tools/isledecomp/isledecomp/cvdump/parser.py
+++ b/tools/isledecomp/isledecomp/cvdump/parser.py
@@ -39,6 +39,9 @@ _gdata32_regex = re.compile(
    r"S_GDATA32: \[(?P<section>\w{4}):(?P<offset>\w{8})\], Type:\s*(?P<type>\S+), (?P<name>.+)"
 )

+# e.g. 0003 "CMakeFiles/isle.dir/ISLE/res/isle.rc.res"
+# e.g. 0004 "C:\work\lego-island\isle\3rdparty\smartheap\SHLW32MT.LIB" "check.obj"
+_module_regex = re.compile(r"(?P<id>\w{4})(?: \"(?P<lib>.+?)\")?(?: \"(?P<obj>.+?)\")")

 # User functions only
 LinesEntry = namedtuple("LinesEntry", "filename line_no section offset")
@@ -52,13 +55,16 @@ PublicsEntry = namedtuple("PublicsEntry", "type section offset flags name")
 SymbolsEntry = namedtuple("SymbolsEntry", "type section offset size name")

 # (Estimated) size of any symbol
-SizeRefEntry = namedtuple("SizeRefEntry", "section offset size")
+SizeRefEntry = namedtuple("SizeRefEntry", "module section offset size")

 # global variables
 GdataEntry = namedtuple("GdataEntry", "section offset type name")

+ModuleEntry = namedtuple("ModuleEntry", "id lib obj")
+

 class CvdumpParser:
+    # pylint: disable=too-many-instance-attributes
    def __init__(self) -> None:
        self._section: str = ""
        self._lines_function: Tuple[str, int] = ("", 0)
@@ -68,6 +74,7 @@ class CvdumpParser:
        self.symbols = []
        self.sizerefs = []
        self.globals = []
+        self.modules = []

    def _lines_section(self, line: str):
        """Parsing entries from the LINES section. We only care about the pairs of
@@ -144,12 +151,26 @@ class CvdumpParser:
        if (match := _section_contrib_regex.match(line)) is not None:
            self.sizerefs.append(
                SizeRefEntry(
+                    module=int(match.group("module"), 16),
                    section=int(match.group("section"), 16),
                    offset=int(match.group("offset"), 16),
                    size=int(match.group("size"), 16),
                )
            )

+    def _modules_section(self, line: str):
+        """Record the object file (and lib file, if used) linked into the binary.
+        The auto-incrementing id is cross-referenced in SECTION CONTRIBUTIONS
+        (and perhaps other locations)"""
+        if (match := _module_regex.match(line)) is not None:
+            self.modules.append(
+                ModuleEntry(
+                    id=int(match.group("id"), 16),
+                    lib=match.group("lib"),
+                    obj=match.group("obj"),
+                )
+            )
+
    def read_line(self, line: str):
        # Blank lines are there to help the reader; they have no context significance
        if line.strip() == "":
@@ -174,6 +195,9 @@ class CvdumpParser:
        elif self._section == "GLOBALS":
            self._globals_section(line)

+        elif self._section == "MODULES":
+            self._modules_section(line)
+
    def read_lines(self, lines: Iterable[str]):
        for line in lines:
            self.read_line(line)
--- a/tools/isledecomp/isledecomp/cvdump/runner.py
+++ b/tools/isledecomp/isledecomp/cvdump/runner.py
@@ -13,6 +13,7 @@ class DumpOpt(Enum):
    GLOBALS = 2
    PUBLICS = 3
    SECTION_CONTRIB = 4
+    MODULES = 5


 cvdump_opt_map = {
@@ -21,6 +22,7 @@ cvdump_opt_map = {
    DumpOpt.GLOBALS: "-g",
    DumpOpt.PUBLICS: "-p",
    DumpOpt.SECTION_CONTRIB: "-seccontrib",
+    DumpOpt.MODULES: "-m",
 }


@@ -49,6 +51,10 @@ class Cvdump:
        self._options.add(DumpOpt.SECTION_CONTRIB)
        return self

+    def modules(self):
+        self._options.add(DumpOpt.MODULES)
+        return self
+
    def cmd_line(self) -> List[str]:
        cvdump_exe = lib_path_join("cvdump.exe")
        flags = [cvdump_opt_map[opt] for opt in self._options]