mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-26 09:54:18 +00:00
reccmp: HTML refactor and diff address display (#581)
* reccmp: HTML refactor and diff address display * Restore the @@ range indicator
This commit is contained in:
@@ -192,11 +192,13 @@ class ParseAsm:
|
||||
def parse_asm(self, data: bytes, start_addr: Optional[int] = 0) -> List[str]:
|
||||
asm = []
|
||||
|
||||
for inst in disassembler.disasm_lite(data, start_addr):
|
||||
for raw_inst in disassembler.disasm_lite(data, start_addr):
|
||||
# Use heuristics to disregard some differences that aren't representative
|
||||
# of the accuracy of a function (e.g. global offsets)
|
||||
result = self.sanitize(DisasmLiteInst(*inst))
|
||||
inst = DisasmLiteInst(*raw_inst)
|
||||
result = self.sanitize(inst)
|
||||
|
||||
# mnemonic + " " + op_str
|
||||
asm.append(" ".join(result))
|
||||
asm.append((hex(inst.address), " ".join(result)))
|
||||
|
||||
return asm
|
||||
|
||||
@@ -12,6 +12,7 @@ from isledecomp.dir import walk_source_dir
|
||||
from isledecomp.types import SymbolType
|
||||
from isledecomp.compare.asm import ParseAsm, can_resolve_register_differences
|
||||
from .db import CompareDb, MatchInfo
|
||||
from .diff import combined_diff
|
||||
from .lines import LinesDb
|
||||
|
||||
|
||||
@@ -307,8 +308,12 @@ class Compare:
|
||||
float_lookup=recomp_float,
|
||||
)
|
||||
|
||||
orig_asm = orig_parse.parse_asm(orig_raw, match.orig_addr)
|
||||
recomp_asm = recomp_parse.parse_asm(recomp_raw, match.recomp_addr)
|
||||
orig_combined = orig_parse.parse_asm(orig_raw, match.orig_addr)
|
||||
recomp_combined = recomp_parse.parse_asm(recomp_raw, match.recomp_addr)
|
||||
|
||||
# Detach addresses from asm lines for the text diff.
|
||||
orig_asm = [x[1] for x in orig_combined]
|
||||
recomp_asm = [x[1] for x in recomp_combined]
|
||||
|
||||
diff = difflib.SequenceMatcher(None, orig_asm, recomp_asm)
|
||||
ratio = diff.ratio()
|
||||
@@ -317,7 +322,9 @@ class Compare:
|
||||
# Check whether we can resolve register swaps which are actually
|
||||
# perfect matches modulo compiler entropy.
|
||||
is_effective_match = can_resolve_register_differences(orig_asm, recomp_asm)
|
||||
unified_diff = difflib.unified_diff(orig_asm, recomp_asm, n=10)
|
||||
unified_diff = combined_diff(
|
||||
diff, orig_combined, recomp_combined, context_size=10
|
||||
)
|
||||
else:
|
||||
is_effective_match = False
|
||||
unified_diff = []
|
||||
@@ -352,9 +359,7 @@ class Compare:
|
||||
[t for (t,) in struct.iter_unpack("<L", recomp_table)],
|
||||
)
|
||||
|
||||
def match_text(
|
||||
i: int, m: Optional[MatchInfo], raw_addr: Optional[int] = None
|
||||
) -> str:
|
||||
def match_text(m: Optional[MatchInfo], raw_addr: Optional[int] = None) -> str:
|
||||
"""Format the function reference at this vtable index as text.
|
||||
If we have not identified this function, we have the option to
|
||||
display the raw address. This is only worth doing for the original addr
|
||||
@@ -363,19 +368,18 @@ class Compare:
|
||||
should override the given function from the superclass, but we have not
|
||||
implemented this yet.
|
||||
"""
|
||||
index = f"vtable0x{i*4:02x}"
|
||||
|
||||
if m is not None:
|
||||
orig = hex(m.orig_addr) if m.orig_addr is not None else "no orig"
|
||||
recomp = (
|
||||
hex(m.recomp_addr) if m.recomp_addr is not None else "no recomp"
|
||||
)
|
||||
return f"{index:>12} : ({orig:10} / {recomp:10}) : {m.name}"
|
||||
return f"({orig} / {recomp}) : {m.name}"
|
||||
|
||||
if raw_addr is not None:
|
||||
return f"{index:>12} : 0x{raw_addr:x} from orig not annotated."
|
||||
return f"0x{raw_addr:x} from orig not annotated."
|
||||
|
||||
return f"{index:>12} : (no match)"
|
||||
return "(no match)"
|
||||
|
||||
orig_text = []
|
||||
recomp_text = []
|
||||
@@ -395,14 +399,22 @@ class Compare:
|
||||
ratio += 1
|
||||
|
||||
n_entries += 1
|
||||
orig_text.append(match_text(i, orig, raw_orig))
|
||||
recomp_text.append(match_text(i, recomp))
|
||||
index = f"vtable0x{i*4:02x}"
|
||||
orig_text.append((index, match_text(orig, raw_orig)))
|
||||
recomp_text.append((index, match_text(recomp)))
|
||||
|
||||
ratio = ratio / float(n_entries) if n_entries > 0 else 0
|
||||
|
||||
# n=100: Show the entire table if there is a diff to display.
|
||||
# Otherwise it would be confusing if the table got cut off.
|
||||
unified_diff = difflib.unified_diff(orig_text, recomp_text, n=100)
|
||||
|
||||
sm = difflib.SequenceMatcher(
|
||||
None,
|
||||
[x[1] for x in orig_text],
|
||||
[x[1] for x in recomp_text],
|
||||
)
|
||||
|
||||
unified_diff = combined_diff(sm, orig_text, recomp_text, context_size=100)
|
||||
|
||||
return DiffReport(
|
||||
match_type=SymbolType.VTABLE,
|
||||
|
||||
81
tools/isledecomp/isledecomp/compare/diff.py
Normal file
81
tools/isledecomp/isledecomp/compare/diff.py
Normal file
@@ -0,0 +1,81 @@
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
CombinedDiffInput = List[Tuple[str, str]]
|
||||
CombinedDiffOutput = List[Tuple[str, List[Dict[str, Tuple[str, str]]]]]
|
||||
|
||||
|
||||
def combined_diff(
|
||||
diff: SequenceMatcher,
|
||||
orig_combined: CombinedDiffInput,
|
||||
recomp_combined: CombinedDiffInput,
|
||||
context_size: int = 3,
|
||||
) -> CombinedDiffOutput:
|
||||
"""We want to diff the original and recomp assembly. The "combined" assembly
|
||||
input has two components: the address of the instruction and the assembly text.
|
||||
We have already diffed the text only. This is the SequenceMatcher object.
|
||||
The SequenceMatcher can generate "opcodes" that describe how to turn "Text A"
|
||||
into "Text B". These refer to list indices of the original arrays, so we can
|
||||
use those to create the final diff and include the address for each line of assembly.
|
||||
This is almost the same procedure as the difflib.unified_diff function, but we
|
||||
are reusing the already generated SequenceMatcher object.
|
||||
"""
|
||||
|
||||
unified_diff = []
|
||||
|
||||
for group in diff.get_grouped_opcodes(context_size):
|
||||
subgroups = []
|
||||
|
||||
# Keep track of the addresses we've seen in this diff group.
|
||||
# This helps create the "@@" line. (Does this have a name?)
|
||||
# Do it this way because not every line in each list will have an
|
||||
# address. If our context begins or ends on a line that does not
|
||||
# have one, we will have an incomplete range string.
|
||||
orig_addrs = set()
|
||||
recomp_addrs = set()
|
||||
|
||||
for code, i1, i2, j1, j2 in group:
|
||||
if code == "equal":
|
||||
# The sections are equal, so the list slices are guaranteed
|
||||
# to have the same length. We only need the diffed value (asm text)
|
||||
# from one of the lists, but we need the addresses from both.
|
||||
# Use zip to put the two lists together and then take out what we want.
|
||||
both = [
|
||||
(a, b, c)
|
||||
for ((a, b), (c, _)) in zip(
|
||||
orig_combined[i1:i2], recomp_combined[j1:j2]
|
||||
)
|
||||
]
|
||||
|
||||
for orig_addr, _, recomp_addr in both:
|
||||
if orig_addr is not None:
|
||||
orig_addrs.add(orig_addr)
|
||||
|
||||
if recomp_addr is not None:
|
||||
recomp_addrs.add(recomp_addr)
|
||||
|
||||
subgroups.append({"both": both})
|
||||
else:
|
||||
for orig_addr, _ in orig_combined[i1:i2]:
|
||||
if orig_addr is not None:
|
||||
orig_addrs.add(orig_addr)
|
||||
|
||||
for recomp_addr, _ in recomp_combined[j1:j2]:
|
||||
if recomp_addr is not None:
|
||||
recomp_addrs.add(recomp_addr)
|
||||
|
||||
subgroups.append(
|
||||
{
|
||||
"orig": orig_combined[i1:i2],
|
||||
"recomp": recomp_combined[j1:j2],
|
||||
}
|
||||
)
|
||||
|
||||
orig_sorted = sorted(orig_addrs)
|
||||
recomp_sorted = sorted(recomp_addrs)
|
||||
|
||||
diff_slug = f"@@ -{orig_sorted[0]},{orig_sorted[-1]} +{recomp_sorted[0]},{recomp_sorted[-1]} @@"
|
||||
|
||||
unified_diff.append((diff_slug, subgroups))
|
||||
|
||||
return unified_diff
|
||||
Reference in New Issue
Block a user