mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-23 16:34:06 +00:00
reccmp: New diff option (#563)
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import colorama
|
||||
|
||||
|
||||
@@ -27,5 +29,217 @@ def print_diff(udiff, plain):
|
||||
return has_diff
|
||||
|
||||
|
||||
def get_percent_color(value: float) -> str:
|
||||
"""Return colorama ANSI escape character for the given decimal value."""
|
||||
if value == 1.0:
|
||||
return colorama.Fore.GREEN
|
||||
if value > 0.8:
|
||||
return colorama.Fore.YELLOW
|
||||
|
||||
return colorama.Fore.RED
|
||||
|
||||
|
||||
def percent_string(
|
||||
ratio: float, is_effective: bool = False, is_plain: bool = False
|
||||
) -> str:
|
||||
"""Helper to construct a percentage string from the given ratio.
|
||||
If is_effective (i.e. effective match), indicate that with the asterisk.
|
||||
If is_plain, don't use colorama ANSI codes."""
|
||||
|
||||
percenttext = f"{(ratio * 100):.2f}%"
|
||||
effective_star = "*" if is_effective else ""
|
||||
|
||||
if is_plain:
|
||||
return percenttext + effective_star
|
||||
|
||||
return "".join(
|
||||
[
|
||||
get_percent_color(ratio),
|
||||
percenttext,
|
||||
colorama.Fore.RED if is_effective else "",
|
||||
effective_star,
|
||||
colorama.Style.RESET_ALL,
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def diff_json_display(show_both_addrs: bool = False, is_plain: bool = False):
|
||||
"""Generate a function that will display the diff according to
|
||||
the reccmp display preferences."""
|
||||
|
||||
def formatter(orig_addr, saved, new) -> str:
|
||||
old_pct = "new"
|
||||
new_pct = "gone"
|
||||
name = ""
|
||||
recomp_addr = "n/a"
|
||||
|
||||
if new is not None:
|
||||
new_pct = (
|
||||
"stub"
|
||||
if new.get("stub", False)
|
||||
else percent_string(
|
||||
new["matching"], new.get("effective", False), is_plain
|
||||
)
|
||||
)
|
||||
|
||||
# Prefer the current name of this function if we have it.
|
||||
# We are using the original address as the key.
|
||||
# A function being renamed is not of interest here.
|
||||
name = new.get("name", "")
|
||||
recomp_addr = new.get("recomp", "n/a")
|
||||
|
||||
if saved is not None:
|
||||
old_pct = (
|
||||
"stub"
|
||||
if saved.get("stub", False)
|
||||
else percent_string(
|
||||
saved["matching"], saved.get("effective", False), is_plain
|
||||
)
|
||||
)
|
||||
|
||||
if name == "":
|
||||
name = saved.get("name", "")
|
||||
|
||||
if show_both_addrs:
|
||||
addr_string = f"{orig_addr} / {recomp_addr:10}"
|
||||
else:
|
||||
addr_string = orig_addr
|
||||
|
||||
# The ANSI codes from colorama counted towards string length,
|
||||
# so displaying this as an ascii-like spreadsheet
|
||||
# (using f-string formatting) would take some effort.
|
||||
return f"{addr_string} - {name} ({old_pct} -> {new_pct})"
|
||||
|
||||
return formatter
|
||||
|
||||
|
||||
def diff_json(
|
||||
saved_data,
|
||||
new_data,
|
||||
orig_file: str,
|
||||
show_both_addrs: bool = False,
|
||||
is_plain: bool = False,
|
||||
):
|
||||
"""Using a saved copy of the diff summary and the current data, print a
|
||||
report showing which functions/symbols have changed match percentage."""
|
||||
|
||||
# Don't try to diff a report generated for a different binary file
|
||||
base_file = os.path.basename(orig_file).lower()
|
||||
|
||||
if saved_data.get("file") != base_file:
|
||||
logging.getLogger().error(
|
||||
"Diff report for '%s' does not match current file '%s'",
|
||||
saved_data.get("file"),
|
||||
base_file,
|
||||
)
|
||||
return
|
||||
|
||||
if "timestamp" in saved_data:
|
||||
now = datetime.now().replace(microsecond=0)
|
||||
then = datetime.fromtimestamp(saved_data["timestamp"]).replace(microsecond=0)
|
||||
|
||||
print(
|
||||
" ".join(
|
||||
[
|
||||
"Saved diff report generated",
|
||||
then.strftime("%B %d %Y, %H:%M:%S"),
|
||||
f"({str(now - then)} ago)",
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
print()
|
||||
|
||||
# Convert to dict, using orig_addr as key
|
||||
saved_invert = {obj["address"]: obj for obj in saved_data["data"]}
|
||||
new_invert = {obj["address"]: obj for obj in new_data}
|
||||
|
||||
all_addrs = set(saved_invert.keys()).union(new_invert.keys())
|
||||
|
||||
# Put all the information in one place so we can decide how each item changed.
|
||||
combined = {
|
||||
addr: (
|
||||
saved_invert.get(addr),
|
||||
new_invert.get(addr),
|
||||
)
|
||||
for addr in sorted(all_addrs)
|
||||
}
|
||||
|
||||
# The criteria for diff judgement is in these dict comprehensions:
|
||||
# Any function not in the saved file
|
||||
new_functions = {
|
||||
key: (saved, new) for key, (saved, new) in combined.items() if saved is None
|
||||
}
|
||||
|
||||
# Any function now missing from the saved file
|
||||
# or a non-stub -> stub conversion
|
||||
dropped_functions = {
|
||||
key: (saved, new)
|
||||
for key, (saved, new) in combined.items()
|
||||
if new is None
|
||||
or (
|
||||
new is not None
|
||||
and saved is not None
|
||||
and new.get("stub", False)
|
||||
and not saved.get("stub", False)
|
||||
)
|
||||
}
|
||||
|
||||
# TODO: move these two into functions if the assessment gets more complex
|
||||
# Any function with increased match percentage
|
||||
# or stub -> non-stub conversion
|
||||
improved_functions = {
|
||||
key: (saved, new)
|
||||
for key, (saved, new) in combined.items()
|
||||
if saved is not None
|
||||
and new is not None
|
||||
and (
|
||||
new["matching"] > saved["matching"]
|
||||
or (not new.get("stub", False) and saved.get("stub", False))
|
||||
)
|
||||
}
|
||||
|
||||
# Any non-stub function with decreased match percentage
|
||||
degraded_functions = {
|
||||
key: (saved, new)
|
||||
for key, (saved, new) in combined.items()
|
||||
if saved is not None
|
||||
and new is not None
|
||||
and new["matching"] < saved["matching"]
|
||||
and not saved.get("stub")
|
||||
and not new.get("stub")
|
||||
}
|
||||
|
||||
# Any function with former or current "effective" match
|
||||
entropy_functions = {
|
||||
key: (saved, new)
|
||||
for key, (saved, new) in combined.items()
|
||||
if saved is not None
|
||||
and new is not None
|
||||
and new["matching"] == 1.0
|
||||
and saved["matching"] == 1.0
|
||||
and new.get("effective", False) != saved.get("effective", False)
|
||||
}
|
||||
|
||||
get_diff_str = diff_json_display(show_both_addrs, is_plain)
|
||||
|
||||
for diff_name, diff_dict in [
|
||||
("New", new_functions),
|
||||
("Increased", improved_functions),
|
||||
("Decreased", degraded_functions),
|
||||
("Dropped", dropped_functions),
|
||||
("Compiler entropy", entropy_functions),
|
||||
]:
|
||||
if len(diff_dict) == 0:
|
||||
continue
|
||||
|
||||
print(f"{diff_name} ({len(diff_dict)}):")
|
||||
|
||||
for addr, (saved, new) in diff_dict.items():
|
||||
print(get_diff_str(addr, saved, new))
|
||||
|
||||
print()
|
||||
|
||||
|
||||
def get_file_in_script_dir(fn):
|
||||
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
|
||||
|
Reference in New Issue
Block a user