BETA10: reccomp support and Ghidra imports (#1091)

* Implement core functionality (before linter)

* run linter + formatter

* Review: remove redundant code

* Implement end of range check for vtables

---------

Co-authored-by: jonschz <jonschz@users.noreply.github.com>
This commit is contained in:
jonschz
2024-08-29 20:54:23 +02:00
committed by GitHub
parent ce3fc57025
commit b898d98515
11 changed files with 254 additions and 97 deletions

View File

@@ -21,6 +21,7 @@
# Disable spurious warnings in vscode / pylance
# pyright: reportMissingModuleSource=false
from enum import Enum
import importlib
from dataclasses import dataclass, field
import logging.handlers
@@ -64,6 +65,25 @@ class Globals:
statistics: Statistics = field(default_factory=Statistics)
class SupportedModules(Enum):
LEGO1 = 1
BETA10 = 2
def orig_filename(self):
if self == self.LEGO1:
return "LEGO1.DLL"
return "BETA10.DLL"
def recomp_filename_without_extension(self):
# in case we want to support more functions
return "LEGO1"
def build_dir_name(self):
if self == self.BETA10:
return "build_debug"
return "build"
# hard-coded settings that we don't want to prompt in Ghidra every time
GLOBALS = Globals(
verbose=False,
@@ -133,7 +153,7 @@ def import_function_into_ghidra(
# Find the Ghidra function at that address
ghidra_address = getAddressFactory().getAddress(hex_original_address)
# pylint: disable=possibly-used-before-assignment
function_importer = PdbFunctionImporter(api, pdb_function, type_importer)
function_importer = PdbFunctionImporter.build(api, pdb_function, type_importer)
ghidra_function = getFunctionAt(ghidra_address)
if ghidra_function is None:
@@ -208,11 +228,29 @@ def log_and_track_failure(
def main():
if GLOBALS.running_from_ghidra:
origfile_name = getProgramFile().getName()
if origfile_name == "LEGO1.DLL":
module = SupportedModules.LEGO1
elif origfile_name in ["LEGO1D.DLL", "BETA10.DLL"]:
module = SupportedModules.BETA10
else:
raise Lego1Exception(
f"Unsupported file name in import script: {origfile_name}"
)
else:
module = SupportedModules.LEGO1
logger.info("Importing file: %s", module.orig_filename())
repo_root = get_repository_root()
origfile_path = repo_root.joinpath("LEGO1.DLL")
build_path = repo_root.joinpath("build")
recompiledfile_path = build_path.joinpath("LEGO1.DLL")
pdb_path = build_path.joinpath("LEGO1.pdb")
origfile_path = repo_root.joinpath("legobin").joinpath(module.orig_filename())
build_directory = repo_root.joinpath(module.build_dir_name())
recompiledfile_name = f"{module.recomp_filename_without_extension()}.DLL"
recompiledfile_path = build_directory.joinpath(recompiledfile_name)
pdbfile_name = f"{module.recomp_filename_without_extension()}.PDB"
pdbfile_path = build_directory.joinpath(pdbfile_name)
if not GLOBALS.verbose:
logging.getLogger("isledecomp.bin").setLevel(logging.WARNING)
@@ -225,7 +263,9 @@ def main():
with Bin(str(origfile_path), find_str=True) as origfile, Bin(
str(recompiledfile_path)
) as recompfile:
isle_compare = IsleCompare(origfile, recompfile, str(pdb_path), str(repo_root))
isle_compare = IsleCompare(
origfile, recompfile, str(pdbfile_path), str(repo_root)
)
logger.info("Comparison complete.")

View File

@@ -5,6 +5,7 @@
import logging
from typing import Optional
from abc import ABC, abstractmethod
from ghidra.program.model.listing import Function, Parameter
from ghidra.program.flatapi import FlatProgramAPI
@@ -24,6 +25,7 @@ from lego_util.pdb_extraction import (
)
from lego_util.ghidra_helper import (
add_data_type_or_reuse_existing,
create_ghidra_namespace,
get_or_add_pointer_type,
get_ghidra_namespace,
sanitize_name,
@@ -32,12 +34,10 @@ from lego_util.ghidra_helper import (
from lego_util.exceptions import StackOffsetMismatchError, Lego1Exception
from lego_util.type_importer import PdbTypeImporter
logger = logging.getLogger(__name__)
# pylint: disable=too-many-instance-attributes
class PdbFunctionImporter:
class PdbFunctionImporter(ABC):
"""A representation of a function from the PDB with each type replaced by a Ghidra type instance."""
def __init__(
@@ -48,20 +48,79 @@ class PdbFunctionImporter:
):
self.api = api
self.match_info = func.match_info
self.signature = func.signature
self.is_stub = func.is_stub
self.type_importer = type_importer
if self.signature.class_type is not None:
# Import the base class so the namespace exists
self.type_importer.import_pdb_type_into_ghidra(self.signature.class_type)
assert self.match_info.name is not None
colon_split = sanitize_name(self.match_info.name).split("::")
self.name = colon_split.pop()
namespace_hierachy = colon_split
self.namespace = get_ghidra_namespace(api, namespace_hierachy)
self.namespace = self._do_get_namespace(namespace_hierachy)
def _do_get_namespace(self, namespace_hierarchy: list[str]):
return get_ghidra_namespace(self.api, namespace_hierarchy)
def get_full_name(self) -> str:
return f"{self.namespace.getName()}::{self.name}"
@staticmethod
def build(api: FlatProgramAPI, func: PdbFunction, type_importer: "PdbTypeImporter"):
return (
ThunkPdbFunctionImport(api, func, type_importer)
if func.signature is None
else FullPdbFunctionImporter(api, func, type_importer)
)
@abstractmethod
def matches_ghidra_function(self, ghidra_function: Function) -> bool:
...
@abstractmethod
def overwrite_ghidra_function(self, ghidra_function: Function):
...
class ThunkPdbFunctionImport(PdbFunctionImporter):
"""For importing thunk functions (like vtordisp or debug build thunks) into Ghidra.
Only the name of the function will be imported."""
def _do_get_namespace(self, namespace_hierarchy: list[str]):
"""We need to create the namespace because we don't import the return type here"""
return create_ghidra_namespace(self.api, namespace_hierarchy)
def matches_ghidra_function(self, ghidra_function: Function) -> bool:
name_match = self.name == ghidra_function.getName(False)
namespace_match = self.namespace == ghidra_function.getParentNamespace()
logger.debug("Matches: namespace=%s name=%s", namespace_match, name_match)
return name_match and namespace_match
def overwrite_ghidra_function(self, ghidra_function: Function):
ghidra_function.setName(self.name, SourceType.USER_DEFINED)
ghidra_function.setParentNamespace(self.namespace)
# pylint: disable=too-many-instance-attributes
class FullPdbFunctionImporter(PdbFunctionImporter):
"""For importing functions into Ghidra where all information are available."""
def __init__(
self,
api: FlatProgramAPI,
func: PdbFunction,
type_importer: "PdbTypeImporter",
):
super().__init__(api, func, type_importer)
assert func.signature is not None
self.signature = func.signature
self.is_stub = func.is_stub
if self.signature.class_type is not None:
# Import the base class so the namespace exists
self.type_importer.import_pdb_type_into_ghidra(self.signature.class_type)
self.return_type = type_importer.import_pdb_type_into_ghidra(
self.signature.return_type
@@ -75,17 +134,6 @@ class PdbFunctionImporter:
for (index, type_name) in enumerate(self.signature.arglist)
]
@property
def call_type(self):
return self.signature.call_type
@property
def stack_symbols(self):
return self.signature.stack_symbols
def get_full_name(self) -> str:
return f"{self.namespace.getName()}::{self.name}"
def matches_ghidra_function(self, ghidra_function: Function) -> bool:
"""Checks whether this function declaration already matches the description in Ghidra"""
name_match = self.name == ghidra_function.getName(False)
@@ -235,7 +283,7 @@ class PdbFunctionImporter:
ghidra_function.setName(self.name, SourceType.USER_DEFINED)
ghidra_function.setParentNamespace(self.namespace)
ghidra_function.setReturnType(self.return_type, SourceType.USER_DEFINED)
ghidra_function.setCallingConvention(self.call_type)
ghidra_function.setCallingConvention(self.signature.call_type)
if self.is_stub:
logger.debug(
@@ -306,7 +354,7 @@ class PdbFunctionImporter:
return next(
(
symbol
for symbol in self.stack_symbols
for symbol in self.signature.stack_symbols
if isinstance(symbol, CppStackSymbol)
and symbol.stack_offset == stack_offset
),
@@ -319,7 +367,7 @@ class PdbFunctionImporter:
return next(
(
symbol
for symbol in self.stack_symbols
for symbol in self.signature.stack_symbols
if isinstance(symbol, CppRegisterSymbol) and symbol.register == register
),
None,

View File

@@ -1,6 +1,7 @@
"""A collection of helper functions for the interaction with Ghidra."""
import logging
import re
from lego_util.exceptions import (
ClassOrNamespaceNotFoundInGhidraError,
@@ -80,25 +81,42 @@ def create_ghidra_namespace(
return namespace
# These appear in debug builds
THUNK_OF_RE = re.compile(r"^Thunk of '(.*)'$")
def sanitize_name(name: str) -> str:
"""
Takes a full class or function name and replaces characters not accepted by Ghidra.
Applies mostly to templates and names like `vbase destructor`.
Applies mostly to templates, names like `vbase destructor`, and thunks in debug build.
"""
new_class_name = (
if (match := THUNK_OF_RE.fullmatch(name)) is not None:
is_thunk = True
name = match.group(1)
else:
is_thunk = False
# Replace characters forbidden in Ghidra
new_name = (
name.replace("<", "[")
.replace(">", "]")
.replace("*", "#")
.replace(" ", "_")
.replace("`", "'")
)
if "<" in name:
new_class_name = "_template_" + new_class_name
if new_class_name != name:
logger.warning(
"Class or function name contains characters forbidden by Ghidra, changing from '%s' to '%s'",
if "<" in name:
new_name = "_template_" + new_name
if is_thunk:
split = new_name.split("::")
split[-1] = "_thunk_" + split[-1]
new_name = "::".join(split)
if new_name != name:
logger.info(
"Changed class or function name from '%s' to '%s' to avoid Ghidra issues",
name,
new_class_name,
new_name,
)
return new_class_name
return new_name

View File

@@ -1,4 +1,4 @@
from typing import TypeVar
from typing import TypeVar, Any
import ghidra
# pylint: disable=invalid-name,unused-argument
@@ -17,3 +17,4 @@ def getFunctionAt(
def createFunction(
entryPoint: ghidra.program.model.address.Address, name: str
) -> ghidra.program.model.listing.Function: ...
def getProgramFile() -> Any: ... # actually java.io.File

View File

@@ -3,6 +3,7 @@ import re
from typing import Any, Optional
import logging
from isledecomp.bin import InvalidVirtualAddressError
from isledecomp.cvdump.symbols import SymbolsEntry
from isledecomp.compare import Compare as IsleCompare
from isledecomp.compare.db import MatchInfo
@@ -43,7 +44,7 @@ class FunctionSignature:
@dataclass
class PdbFunction:
match_info: MatchInfo
signature: FunctionSignature
signature: Optional[FunctionSignature]
is_stub: bool
@@ -74,9 +75,7 @@ class PdbFunctionExtractor:
def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]:
function_type_str = fn.func_type
if function_type_str == "T_NOTYPE(0000)":
logger.debug(
"Skipping a NOTYPE (synthetic or template + synthetic): %s", fn.name
)
logger.debug("Treating NOTYPE function as thunk: %s", fn.name)
return None
# get corresponding function type
@@ -145,8 +144,6 @@ class PdbFunctionExtractor:
assert match_info.orig_addr is not None
match_options = self.compare.get_match_options(match_info.orig_addr)
assert match_options is not None
if match_options.get("skip", False):
return None
function_data = next(
(
@@ -156,11 +153,19 @@ class PdbFunctionExtractor:
),
None,
)
if not function_data:
logger.error(
"Did not find function in nodes, skipping: %s", match_info.name
)
return None
if function_data is None:
try:
# this can be either a thunk (which we want) or an external function
# (which we don't want), so we tell them apart based on the validity of their address.
self.compare.orig_bin.get_relative_addr(match_info.orig_addr)
return PdbFunction(match_info, None, False)
except InvalidVirtualAddressError:
logger.debug(
"Skipping external function %s (address 0x%x not in original binary)",
match_info.name,
match_info.orig_addr,
)
return None
function_symbol = function_data.symbol_entry
if function_symbol is None:
@@ -171,8 +176,6 @@ class PdbFunctionExtractor:
return None
function_signature = self.get_func_signature(function_symbol)
if function_signature is None:
return None
is_stub = match_options.get("stub", False)