mirror of
				https://github.com/isledecomp/isle.git
				synced 2025-10-26 01:44:19 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			266 lines
		
	
	
		
			8.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			266 lines
		
	
	
		
			8.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """For all addresses matched by code annotations or recomp pdb,
 | |
| report how "far off" the recomp symbol is from its proper place
 | |
| in the original binary."""
 | |
| 
 | |
| import os
 | |
| import argparse
 | |
| import logging
 | |
| from typing import List, Optional
 | |
| from collections import namedtuple
 | |
| from isledecomp import Bin as IsleBin
 | |
| from isledecomp.cvdump import Cvdump
 | |
| from isledecomp.compare import Compare as IsleCompare
 | |
| from isledecomp.types import SymbolType
 | |
| 
 | |
| # Ignore all compare-db messages.
 | |
| logging.getLogger("isledecomp.compare").addHandler(logging.NullHandler())
 | |
| 
 | |
| 
 | |
| def or_blank(value) -> str:
 | |
|     """Helper for dealing with potential None values in text output."""
 | |
|     return "" if value is None else str(value)
 | |
| 
 | |
| 
 | |
| class ModuleMap:
 | |
|     """Load a subset of sections from the pdb to allow you to look up the
 | |
|     module number based on the recomp address."""
 | |
| 
 | |
|     def __init__(self, pdb, binfile) -> None:
 | |
|         cvdump = Cvdump(pdb).section_contributions().modules().run()
 | |
|         self.module_lookup = {m.id: (m.lib, m.obj) for m in cvdump.modules}
 | |
|         self.section_contrib = [
 | |
|             (
 | |
|                 binfile.get_abs_addr(sizeref.section, sizeref.offset),
 | |
|                 sizeref.size,
 | |
|                 sizeref.module,
 | |
|             )
 | |
|             for sizeref in cvdump.sizerefs
 | |
|             if binfile.is_valid_section(sizeref.section)
 | |
|         ]
 | |
| 
 | |
|     def get_module(self, addr: int) -> Optional[str]:
 | |
|         for start, size, module_id in self.section_contrib:
 | |
|             if start <= addr < start + size:
 | |
|                 if (module := self.module_lookup.get(module_id)) is not None:
 | |
|                     return module
 | |
| 
 | |
|         return None
 | |
| 
 | |
| 
 | |
| def print_sections(sections):
 | |
|     print("    name |    start |   v.size | raw size")
 | |
|     print("---------|----------|----------|----------")
 | |
|     for sect in sections:
 | |
|         name = sect.name.decode("ascii").rstrip("\x00")
 | |
|         print(
 | |
|             f"{name:>8} | {sect.virtual_address:8x} | {sect.virtual_size:8x} | {sect.size_of_raw_data:8x}"
 | |
|         )
 | |
|     print()
 | |
| 
 | |
| 
 | |
| def match_type_abbreviation(mtype: Optional[SymbolType]) -> str:
 | |
|     """Return abbreviation of the given SymbolType name"""
 | |
|     if mtype is None:
 | |
|         return ""
 | |
| 
 | |
|     return mtype.name.lower()[:3]
 | |
| 
 | |
| 
 | |
| RoadmapRow = namedtuple(
 | |
|     "RoadmapRow",
 | |
|     [
 | |
|         "orig_sect_ofs",
 | |
|         "recomp_sect_ofs",
 | |
|         "orig_addr",
 | |
|         "recomp_addr",
 | |
|         "displacement",
 | |
|         "sym_type",
 | |
|         "size",
 | |
|         "name",
 | |
|         "module",
 | |
|     ],
 | |
| )
 | |
| 
 | |
| 
 | |
| def print_text_report(results: List[RoadmapRow]):
 | |
|     """Print the result with original and recomp addresses."""
 | |
|     for row in results:
 | |
|         print(
 | |
|             "  ".join(
 | |
|                 [
 | |
|                     f"{or_blank(row.orig_sect_ofs):14}",
 | |
|                     f"{or_blank(row.recomp_sect_ofs):14}",
 | |
|                     f"{or_blank(row.displacement):>8}",
 | |
|                     f"{row.sym_type:3}",
 | |
|                     f"{or_blank(row.size):6}",
 | |
|                     or_blank(row.name),
 | |
|                 ]
 | |
|             )
 | |
|         )
 | |
| 
 | |
| 
 | |
| def print_diff_report(results: List[RoadmapRow]):
 | |
|     """Print only entries where we have the recomp address.
 | |
|     This is intended for generating a file to diff against.
 | |
|     The recomp addresses are always changing so we hide those."""
 | |
|     for row in results:
 | |
|         if row.orig_addr is None or row.recomp_addr is None:
 | |
|             continue
 | |
| 
 | |
|         print(
 | |
|             "  ".join(
 | |
|                 [
 | |
|                     f"{or_blank(row.orig_sect_ofs):14}",
 | |
|                     f"{or_blank(row.displacement):>8}",
 | |
|                     f"{row.sym_type:3}",
 | |
|                     f"{or_blank(row.size):6}",
 | |
|                     or_blank(row.name),
 | |
|                 ]
 | |
|             )
 | |
|         )
 | |
| 
 | |
| 
 | |
| def export_to_csv(csv_file: str, results: List[RoadmapRow]):
 | |
|     with open(csv_file, "w+", encoding="utf-8") as f:
 | |
|         f.write(
 | |
|             "orig_sect_ofs,recomp_sect_ofs,orig_addr,recomp_addr,displacement,row_type,size,name,module\n"
 | |
|         )
 | |
|         for row in results:
 | |
|             f.write(",".join(map(or_blank, row)))
 | |
|             f.write("\n")
 | |
| 
 | |
| 
 | |
| def parse_args() -> argparse.Namespace:
 | |
|     parser = argparse.ArgumentParser(
 | |
|         description="Show all addresses from original and recomp."
 | |
|     )
 | |
|     parser.add_argument(
 | |
|         "original", metavar="original-binary", help="The original binary"
 | |
|     )
 | |
|     parser.add_argument(
 | |
|         "recompiled", metavar="recompiled-binary", help="The recompiled binary"
 | |
|     )
 | |
|     parser.add_argument(
 | |
|         "pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
 | |
|     )
 | |
|     parser.add_argument(
 | |
|         "decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
 | |
|     )
 | |
|     parser.add_argument("--csv", metavar="<file>", help="If set, export to CSV")
 | |
|     parser.add_argument(
 | |
|         "--verbose", "-v", action="store_true", help="Show recomp addresses in output"
 | |
|     )
 | |
| 
 | |
|     (args, _) = parser.parse_known_args()
 | |
| 
 | |
|     if not os.path.isfile(args.original):
 | |
|         parser.error(f"Original binary {args.original} does not exist")
 | |
| 
 | |
|     if not os.path.isfile(args.recompiled):
 | |
|         parser.error(f"Recompiled binary {args.recompiled} does not exist")
 | |
| 
 | |
|     if not os.path.isfile(args.pdb):
 | |
|         parser.error(f"Symbols PDB {args.pdb} does not exist")
 | |
| 
 | |
|     if not os.path.isdir(args.decomp_dir):
 | |
|         parser.error(f"Source directory {args.decomp_dir} does not exist")
 | |
| 
 | |
|     return args
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     args = parse_args()
 | |
| 
 | |
|     with IsleBin(args.original, find_str=True) as orig_bin, IsleBin(
 | |
|         args.recompiled
 | |
|     ) as recomp_bin:
 | |
|         engine = IsleCompare(orig_bin, recomp_bin, args.pdb, args.decomp_dir)
 | |
| 
 | |
|         module_map = ModuleMap(args.pdb, recomp_bin)
 | |
| 
 | |
|         def is_same_section(orig: int, recomp: int) -> bool:
 | |
|             """Compare the section name instead of the index.
 | |
|             LEGO1.dll adds extra sections for some reason. (Smacker library?)"""
 | |
| 
 | |
|             try:
 | |
|                 orig_name = orig_bin.sections[orig - 1].name
 | |
|                 recomp_name = recomp_bin.sections[recomp - 1].name
 | |
|                 return orig_name == recomp_name
 | |
|             except IndexError:
 | |
|                 return False
 | |
| 
 | |
|         def to_roadmap_row(match):
 | |
|             orig_sect = None
 | |
|             orig_ofs = None
 | |
|             orig_sect_ofs = None
 | |
|             recomp_sect = None
 | |
|             recomp_ofs = None
 | |
|             recomp_sect_ofs = None
 | |
|             orig_addr = None
 | |
|             recomp_addr = None
 | |
|             displacement = None
 | |
|             module_name = None
 | |
| 
 | |
|             if match.recomp_addr is not None:
 | |
|                 if (module_ref := module_map.get_module(match.recomp_addr)) is not None:
 | |
|                     (_, module_name) = module_ref
 | |
| 
 | |
|             row_type = match_type_abbreviation(match.compare_type)
 | |
|             name = (
 | |
|                 repr(match.name)
 | |
|                 if match.compare_type == SymbolType.STRING
 | |
|                 else match.name
 | |
|             )
 | |
| 
 | |
|             if match.orig_addr is not None:
 | |
|                 orig_addr = match.orig_addr
 | |
|                 (orig_sect, orig_ofs) = orig_bin.get_relative_addr(match.orig_addr)
 | |
|                 orig_sect_ofs = f"{orig_sect:04}:{orig_ofs:08x}"
 | |
| 
 | |
|             if match.recomp_addr is not None:
 | |
|                 recomp_addr = match.recomp_addr
 | |
|                 (recomp_sect, recomp_ofs) = recomp_bin.get_relative_addr(
 | |
|                     match.recomp_addr
 | |
|                 )
 | |
|                 recomp_sect_ofs = f"{recomp_sect:04}:{recomp_ofs:08x}"
 | |
| 
 | |
|             if (
 | |
|                 orig_sect is not None
 | |
|                 and recomp_sect is not None
 | |
|                 and is_same_section(orig_sect, recomp_sect)
 | |
|             ):
 | |
|                 displacement = recomp_ofs - orig_ofs
 | |
| 
 | |
|             return RoadmapRow(
 | |
|                 orig_sect_ofs,
 | |
|                 recomp_sect_ofs,
 | |
|                 orig_addr,
 | |
|                 recomp_addr,
 | |
|                 displacement,
 | |
|                 row_type,
 | |
|                 match.size,
 | |
|                 name,
 | |
|                 module_name,
 | |
|             )
 | |
| 
 | |
|         results = list(map(to_roadmap_row, engine.get_all()))
 | |
| 
 | |
|         if args.csv is None:
 | |
|             if args.verbose:
 | |
|                 print("ORIG sections:")
 | |
|                 print_sections(orig_bin.sections)
 | |
| 
 | |
|                 print("RECOMP sections:")
 | |
|                 print_sections(recomp_bin.sections)
 | |
| 
 | |
|                 print_text_report(results)
 | |
|             else:
 | |
|                 print_diff_report(results)
 | |
| 
 | |
|         if args.csv is not None:
 | |
|             export_to_csv(args.csv, results)
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 | 
