mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-24 17:04:17 +00:00
roadmap: Suggest order of modules (#507)
* roadmap: Suggest order of modules * Include more modules in the list * Sort by avg address with outliers removed * Mark order-adjusted modules. Show library order. * Use bisect for performance * Use average address for library order * Bugfix for get_module
This commit is contained in:
@@ -5,7 +5,9 @@ in the original binary."""
|
||||
import os
|
||||
import argparse
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
import statistics
|
||||
import bisect
|
||||
from typing import Iterator, List, Optional, Tuple
|
||||
from collections import namedtuple
|
||||
from isledecomp import Bin as IsleBin
|
||||
from isledecomp.cvdump import Cvdump
|
||||
@@ -28,6 +30,7 @@ class ModuleMap:
|
||||
def __init__(self, pdb, binfile) -> None:
|
||||
cvdump = Cvdump(pdb).section_contributions().modules().run()
|
||||
self.module_lookup = {m.id: (m.lib, m.obj) for m in cvdump.modules}
|
||||
self.library_lookup = {m.obj: m.lib for m in cvdump.modules}
|
||||
self.section_contrib = [
|
||||
(
|
||||
binfile.get_abs_addr(sizeref.section, sizeref.offset),
|
||||
@@ -38,11 +41,37 @@ class ModuleMap:
|
||||
if binfile.is_valid_section(sizeref.section)
|
||||
]
|
||||
|
||||
# For bisect performance enhancement
|
||||
self.contrib_starts = [start for (start, _, __) in self.section_contrib]
|
||||
|
||||
def get_lib_for_module(self, module: str) -> Optional[str]:
|
||||
return self.library_lookup.get(module)
|
||||
|
||||
def get_all_cmake_modules(self) -> List[str]:
|
||||
return [
|
||||
obj
|
||||
for (_, (__, obj)) in self.module_lookup.items()
|
||||
if obj.startswith("CMakeFiles")
|
||||
]
|
||||
|
||||
def get_module(self, addr: int) -> Optional[str]:
|
||||
for start, size, module_id in self.section_contrib:
|
||||
if start <= addr < start + size:
|
||||
if (module := self.module_lookup.get(module_id)) is not None:
|
||||
return module
|
||||
i = bisect.bisect_left(self.contrib_starts, addr)
|
||||
# If the addr matches the section contribution start, we are in the
|
||||
# right spot. Otherwise, we need to subtract one here.
|
||||
# We don't want the insertion point given by bisect, but the
|
||||
# section contribution that contains the address.
|
||||
|
||||
(potential_start, _, __) = self.section_contrib[i]
|
||||
if potential_start != addr:
|
||||
i -= 1
|
||||
|
||||
# Safety catch: clamp to range of indices from section_contrib.
|
||||
i = max(0, min(i, len(self.section_contrib) - 1))
|
||||
|
||||
(start, size, module_id) = self.section_contrib[i]
|
||||
if start <= addr < start + size:
|
||||
if (module := self.module_lookup.get(module_id)) is not None:
|
||||
return module
|
||||
|
||||
return None
|
||||
|
||||
@@ -58,6 +87,9 @@ def print_sections(sections):
|
||||
print()
|
||||
|
||||
|
||||
ALLOWED_TYPE_ABBREVIATIONS = ["fun", "dat", "poi", "str", "vta"]
|
||||
|
||||
|
||||
def match_type_abbreviation(mtype: Optional[SymbolType]) -> str:
|
||||
"""Return abbreviation of the given SymbolType name"""
|
||||
if mtype is None:
|
||||
@@ -66,6 +98,42 @@ def match_type_abbreviation(mtype: Optional[SymbolType]) -> str:
|
||||
return mtype.name.lower()[:3]
|
||||
|
||||
|
||||
def get_cmakefiles_prefix(module: str) -> str:
|
||||
"""For the given .obj, get the "CMakeFiles/something.dir/" prefix.
|
||||
For lack of a better option, this is the library for this module."""
|
||||
if module.startswith("CMakeFiles"):
|
||||
return "/".join(module.split("/", 2)[:2]) + "/"
|
||||
|
||||
return module
|
||||
|
||||
|
||||
def truncate_module_name(prefix: str, module: str) -> str:
|
||||
"""Remove the CMakeFiles prefix and the .obj suffix for the given module.
|
||||
Input: CMakeFiles/lego1.dir/, CMakeFiles/lego1.dir/LEGO1/define.cpp.obj
|
||||
Output: LEGO1/define.cpp"""
|
||||
|
||||
if module.startswith(prefix):
|
||||
module = module[len(prefix) :]
|
||||
|
||||
if module.endswith(".obj"):
|
||||
module = module[:-4]
|
||||
|
||||
return module
|
||||
|
||||
|
||||
def avg_remove_outliers(entries: List[int]) -> int:
|
||||
"""Compute the average from this list of entries (addresses)
|
||||
after removing outlier values."""
|
||||
|
||||
if len(entries) == 1:
|
||||
return entries[0]
|
||||
|
||||
avg = statistics.mean(entries)
|
||||
sd = statistics.pstdev(entries)
|
||||
|
||||
return int(statistics.mean([e for e in entries if abs(e - avg) <= 2 * sd]))
|
||||
|
||||
|
||||
RoadmapRow = namedtuple(
|
||||
"RoadmapRow",
|
||||
[
|
||||
@@ -82,6 +150,144 @@ RoadmapRow = namedtuple(
|
||||
)
|
||||
|
||||
|
||||
class DeltaCollector:
|
||||
"""Reads each row of the results and aggregates information about the
|
||||
placement of each module."""
|
||||
|
||||
def __init__(self, match_type: str = "fun") -> None:
|
||||
# The displacement for each symbol from each module
|
||||
self.disp_map = {}
|
||||
|
||||
# Each address for each module
|
||||
self.addresses = {}
|
||||
|
||||
# The earliest address for each module
|
||||
self.earliest = {}
|
||||
|
||||
# String abbreviation for which symbol type we are checking
|
||||
self.match_type = "fun"
|
||||
|
||||
match_type = str(match_type).strip().lower()[:3]
|
||||
if match_type in ALLOWED_TYPE_ABBREVIATIONS:
|
||||
self.match_type = match_type
|
||||
|
||||
def read_row(self, row: RoadmapRow):
|
||||
if row.module is None:
|
||||
return
|
||||
|
||||
if row.sym_type != self.match_type:
|
||||
return
|
||||
|
||||
if row.orig_addr is not None:
|
||||
if row.module not in self.addresses:
|
||||
self.addresses[row.module] = []
|
||||
|
||||
self.addresses[row.module].append(row.orig_addr)
|
||||
|
||||
if row.orig_addr < self.earliest.get(row.module, 0xFFFFFFFFF):
|
||||
self.earliest[row.module] = row.orig_addr
|
||||
|
||||
if row.displacement is not None:
|
||||
if row.module not in self.disp_map:
|
||||
self.disp_map[row.module] = []
|
||||
|
||||
self.disp_map[row.module].append(row.displacement)
|
||||
|
||||
def iter_sorted(self) -> Iterator[Tuple[int, int]]:
|
||||
"""Compute the average address for each module, then generate them
|
||||
in ascending order."""
|
||||
avg_address = {
|
||||
mod: avg_remove_outliers(values) for mod, values in self.addresses.items()
|
||||
}
|
||||
for mod, avg in sorted(avg_address.items(), key=lambda x: x[1]):
|
||||
yield (avg, mod)
|
||||
|
||||
|
||||
def suggest_order(results: List[RoadmapRow], module_map: ModuleMap, match_type: str):
|
||||
"""Suggest the order of modules for CMakeLists.txt"""
|
||||
|
||||
dc = DeltaCollector(match_type)
|
||||
for row in results:
|
||||
dc.read_row(row)
|
||||
|
||||
# First, show the order of .obj files for the "CMake Modules"
|
||||
# Meaning: the modules where the .obj file begins with "CMakeFiles".
|
||||
# These are the libraries where we directly control the order.
|
||||
# The library name (from cvdump) doesn't make it obvious that these are
|
||||
# our libraries so we derive the name based on the CMakeFiles prefix.
|
||||
leftover_modules = set(module_map.get_all_cmake_modules())
|
||||
|
||||
# A little convoluted, but we want to take the first two tokens
|
||||
# of the string with '/' as the delimiter.
|
||||
# i.e. CMakeFiles/isle.dir/
|
||||
# The idea is to print exactly what appears in CMakeLists.txt.
|
||||
cmake_prefixes = sorted(set(get_cmakefiles_prefix(mod) for mod in leftover_modules))
|
||||
|
||||
# Save this off because we'll use it again later.
|
||||
computed_order = list(dc.iter_sorted())
|
||||
|
||||
for prefix in cmake_prefixes:
|
||||
print(prefix)
|
||||
|
||||
last_earliest = 0
|
||||
# Show modules ordered by the computed average of addresses
|
||||
for _, module in computed_order:
|
||||
if not module.startswith(prefix):
|
||||
continue
|
||||
|
||||
leftover_modules.remove(module)
|
||||
|
||||
avg_displacement = None
|
||||
displacements = dc.disp_map.get(module)
|
||||
if displacements is not None and len(displacements) > 0:
|
||||
avg_displacement = int(statistics.mean(displacements))
|
||||
|
||||
# Call attention to any modules where ordering by earliest
|
||||
# address is different from the computed order we display.
|
||||
earliest = dc.earliest.get(module)
|
||||
ooo_mark = "*" if earliest < last_earliest else " "
|
||||
last_earliest = earliest
|
||||
|
||||
code_file = truncate_module_name(prefix, module)
|
||||
print(f"0x{earliest:08x}{ooo_mark} {avg_displacement:10} {code_file}")
|
||||
|
||||
# These modules are included in the final binary (in some form) but
|
||||
# don't contribute any symbols of the type we are checking.
|
||||
# n.b. There could still be other modules that are part of
|
||||
# CMakeLists.txt but are not included in the pdb for whatever reason.
|
||||
# In other words: don't take the list we provide as the final word on
|
||||
# what should or should not be included.
|
||||
# This is merely a suggestion of the order.
|
||||
for module in leftover_modules:
|
||||
if not module.startswith(prefix):
|
||||
continue
|
||||
|
||||
# aligned with previous print
|
||||
code_file = truncate_module_name(prefix, module)
|
||||
print(f" no suggestion {code_file}")
|
||||
|
||||
print()
|
||||
|
||||
# Now display the order of all libaries in the final file.
|
||||
library_order = {}
|
||||
|
||||
for start, module in computed_order:
|
||||
lib = module_map.get_lib_for_module(module)
|
||||
if lib is None:
|
||||
lib = get_cmakefiles_prefix(module)
|
||||
|
||||
if start < library_order.get(lib, 0xFFFFFFFFF):
|
||||
library_order[lib] = start
|
||||
|
||||
print("Library order (average address shown):")
|
||||
for lib, start in sorted(library_order.items(), key=lambda x: x[1]):
|
||||
# Strip off any OS path for brevity
|
||||
if not lib.startswith("CMakeFiles"):
|
||||
lib = os.path.basename(lib)
|
||||
|
||||
print(f"{lib:40} {start:08x}")
|
||||
|
||||
|
||||
def print_text_report(results: List[RoadmapRow]):
|
||||
"""Print the result with original and recomp addresses."""
|
||||
for row in results:
|
||||
@@ -150,6 +356,13 @@ def parse_args() -> argparse.Namespace:
|
||||
parser.add_argument(
|
||||
"--verbose", "-v", action="store_true", help="Show recomp addresses in output"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--order",
|
||||
const="fun",
|
||||
nargs="?",
|
||||
type=str,
|
||||
help="Show suggested order of modules (using the specified symbol type)",
|
||||
)
|
||||
|
||||
(args, _) = parser.parse_known_args()
|
||||
|
||||
@@ -245,6 +458,10 @@ def main():
|
||||
|
||||
results = list(map(to_roadmap_row, engine.get_all()))
|
||||
|
||||
if args.order is not None:
|
||||
suggest_order(results, module_map, args.order)
|
||||
return
|
||||
|
||||
if args.csv is None:
|
||||
if args.verbose:
|
||||
print("ORIG sections:")
|
||||
|
Reference in New Issue
Block a user