reccmp: New diff option (#563)

2025-10-23 16:34:06 +00:00 · 2024-02-15 03:33:40 -05:00
parent 271df035fd
commit 8aa9d9a8b3
2 changed files with 283 additions and 46 deletions
--- a/tools/isledecomp/isledecomp/utils.py
+++ b/tools/isledecomp/isledecomp/utils.py
@@ -1,5 +1,7 @@
 import os
 import sys
+from datetime import datetime
+import logging
 import colorama


@@ -27,5 +29,217 @@ def print_diff(udiff, plain):
    return has_diff


+def get_percent_color(value: float) -> str:
+    """Return colorama ANSI escape character for the given decimal value."""
+    if value == 1.0:
+        return colorama.Fore.GREEN
+    if value > 0.8:
+        return colorama.Fore.YELLOW
+
+    return colorama.Fore.RED
+
+
+def percent_string(
+    ratio: float, is_effective: bool = False, is_plain: bool = False
+) -> str:
+    """Helper to construct a percentage string from the given ratio.
+    If is_effective (i.e. effective match), indicate that with the asterisk.
+    If is_plain, don't use colorama ANSI codes."""
+
+    percenttext = f"{(ratio * 100):.2f}%"
+    effective_star = "*" if is_effective else ""
+
+    if is_plain:
+        return percenttext + effective_star
+
+    return "".join(
+        [
+            get_percent_color(ratio),
+            percenttext,
+            colorama.Fore.RED if is_effective else "",
+            effective_star,
+            colorama.Style.RESET_ALL,
+        ]
+    )
+
+
+def diff_json_display(show_both_addrs: bool = False, is_plain: bool = False):
+    """Generate a function that will display the diff according to
+    the reccmp display preferences."""
+
+    def formatter(orig_addr, saved, new) -> str:
+        old_pct = "new"
+        new_pct = "gone"
+        name = ""
+        recomp_addr = "n/a"
+
+        if new is not None:
+            new_pct = (
+                "stub"
+                if new.get("stub", False)
+                else percent_string(
+                    new["matching"], new.get("effective", False), is_plain
+                )
+            )
+
+            # Prefer the current name of this function if we have it.
+            # We are using the original address as the key.
+            # A function being renamed is not of interest here.
+            name = new.get("name", "")
+            recomp_addr = new.get("recomp", "n/a")
+
+        if saved is not None:
+            old_pct = (
+                "stub"
+                if saved.get("stub", False)
+                else percent_string(
+                    saved["matching"], saved.get("effective", False), is_plain
+                )
+            )
+
+            if name == "":
+                name = saved.get("name", "")
+
+        if show_both_addrs:
+            addr_string = f"{orig_addr} / {recomp_addr:10}"
+        else:
+            addr_string = orig_addr
+
+        # The ANSI codes from colorama counted towards string length,
+        # so displaying this as an ascii-like spreadsheet
+        # (using f-string formatting) would take some effort.
+        return f"{addr_string} - {name} ({old_pct} -> {new_pct})"
+
+    return formatter
+
+
+def diff_json(
+    saved_data,
+    new_data,
+    orig_file: str,
+    show_both_addrs: bool = False,
+    is_plain: bool = False,
+):
+    """Using a saved copy of the diff summary and the current data, print a
+    report showing which functions/symbols have changed match percentage."""
+
+    # Don't try to diff a report generated for a different binary file
+    base_file = os.path.basename(orig_file).lower()
+
+    if saved_data.get("file") != base_file:
+        logging.getLogger().error(
+            "Diff report for '%s' does not match current file '%s'",
+            saved_data.get("file"),
+            base_file,
+        )
+        return
+
+    if "timestamp" in saved_data:
+        now = datetime.now().replace(microsecond=0)
+        then = datetime.fromtimestamp(saved_data["timestamp"]).replace(microsecond=0)
+
+        print(
+            " ".join(
+                [
+                    "Saved diff report generated",
+                    then.strftime("%B %d %Y, %H:%M:%S"),
+                    f"({str(now - then)} ago)",
+                ]
+            )
+        )
+
+        print()
+
+    # Convert to dict, using orig_addr as key
+    saved_invert = {obj["address"]: obj for obj in saved_data["data"]}
+    new_invert = {obj["address"]: obj for obj in new_data}
+
+    all_addrs = set(saved_invert.keys()).union(new_invert.keys())
+
+    # Put all the information in one place so we can decide how each item changed.
+    combined = {
+        addr: (
+            saved_invert.get(addr),
+            new_invert.get(addr),
+        )
+        for addr in sorted(all_addrs)
+    }
+
+    # The criteria for diff judgement is in these dict comprehensions:
+    # Any function not in the saved file
+    new_functions = {
+        key: (saved, new) for key, (saved, new) in combined.items() if saved is None
+    }
+
+    # Any function now missing from the saved file
+    # or a non-stub -> stub conversion
+    dropped_functions = {
+        key: (saved, new)
+        for key, (saved, new) in combined.items()
+        if new is None
+        or (
+            new is not None
+            and saved is not None
+            and new.get("stub", False)
+            and not saved.get("stub", False)
+        )
+    }
+
+    # TODO: move these two into functions if the assessment gets more complex
+    # Any function with increased match percentage
+    # or stub -> non-stub conversion
+    improved_functions = {
+        key: (saved, new)
+        for key, (saved, new) in combined.items()
+        if saved is not None
+        and new is not None
+        and (
+            new["matching"] > saved["matching"]
+            or (not new.get("stub", False) and saved.get("stub", False))
+        )
+    }
+
+    # Any non-stub function with decreased match percentage
+    degraded_functions = {
+        key: (saved, new)
+        for key, (saved, new) in combined.items()
+        if saved is not None
+        and new is not None
+        and new["matching"] < saved["matching"]
+        and not saved.get("stub")
+        and not new.get("stub")
+    }
+
+    # Any function with former or current "effective" match
+    entropy_functions = {
+        key: (saved, new)
+        for key, (saved, new) in combined.items()
+        if saved is not None
+        and new is not None
+        and new["matching"] == 1.0
+        and saved["matching"] == 1.0
+        and new.get("effective", False) != saved.get("effective", False)
+    }
+
+    get_diff_str = diff_json_display(show_both_addrs, is_plain)
+
+    for diff_name, diff_dict in [
+        ("New", new_functions),
+        ("Increased", improved_functions),
+        ("Decreased", degraded_functions),
+        ("Dropped", dropped_functions),
+        ("Compiler entropy", entropy_functions),
+    ]:
+        if len(diff_dict) == 0:
+            continue
+
+        print(f"{diff_name} ({len(diff_dict)}):")
+
+        for addr, (saved, new) in diff_dict.items():
+            print(get_diff_str(addr, saved, new))
+
+        print()
+
+
 def get_file_in_script_dir(fn):
    return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)