mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-23 08:24:16 +00:00
Ghidra Import: Support virtual inheritance (#1071)
* Implement/fix Ghidra imports for multiple and virtual inheritance Unfortunately, the handling in Ghidra is still far from perfect. This is a good place to start, though. * Support offsets in vbase pointers * Support `this adjust` * minor stylistic improvement * Improvements to documentation --------- Co-authored-by: jonschz <jonschz@users.noreply.github.com>
This commit is contained in:
@@ -10,6 +10,12 @@ from ghidra.program.model.listing import Function, Parameter
|
||||
from ghidra.program.flatapi import FlatProgramAPI
|
||||
from ghidra.program.model.listing import ParameterImpl
|
||||
from ghidra.program.model.symbol import SourceType
|
||||
from ghidra.program.model.data import (
|
||||
TypeDef,
|
||||
TypedefDataType,
|
||||
Pointer,
|
||||
ComponentOffsetSettingsDefinition,
|
||||
)
|
||||
|
||||
from lego_util.pdb_extraction import (
|
||||
PdbFunction,
|
||||
@@ -17,12 +23,13 @@ from lego_util.pdb_extraction import (
|
||||
CppStackSymbol,
|
||||
)
|
||||
from lego_util.ghidra_helper import (
|
||||
add_pointer_type,
|
||||
add_data_type_or_reuse_existing,
|
||||
get_or_add_pointer_type,
|
||||
get_ghidra_namespace,
|
||||
sanitize_name,
|
||||
)
|
||||
|
||||
from lego_util.exceptions import StackOffsetMismatchError
|
||||
from lego_util.exceptions import StackOffsetMismatchError, Lego1Exception
|
||||
from lego_util.type_importer import PdbTypeImporter
|
||||
|
||||
|
||||
@@ -91,7 +98,10 @@ class PdbFunctionImporter:
|
||||
if (
|
||||
(not return_type_match)
|
||||
and (self.return_type.getLength() > 4)
|
||||
and (add_pointer_type(self.api, self.return_type) == ghidra_return_type)
|
||||
and (
|
||||
get_or_add_pointer_type(self.api, self.return_type)
|
||||
== ghidra_return_type
|
||||
)
|
||||
and any(
|
||||
param
|
||||
for param in ghidra_function.getParameters()
|
||||
@@ -103,19 +113,22 @@ class PdbFunctionImporter:
|
||||
)
|
||||
return_type_match = True
|
||||
|
||||
# match arguments: decide if thiscall or not
|
||||
# match arguments: decide if thiscall or not, and whether the `this` type matches
|
||||
thiscall_matches = (
|
||||
self.signature.call_type == ghidra_function.getCallingConventionName()
|
||||
)
|
||||
|
||||
ghidra_params_without_this = list(ghidra_function.getParameters())
|
||||
|
||||
if thiscall_matches and self.signature.call_type == "__thiscall":
|
||||
this_argument = ghidra_params_without_this.pop(0)
|
||||
thiscall_matches = self._this_type_match(this_argument)
|
||||
|
||||
if self.is_stub:
|
||||
# We do not import the argument list for stubs, so it should be excluded in matches
|
||||
args_match = True
|
||||
elif thiscall_matches:
|
||||
if self.signature.call_type == "__thiscall":
|
||||
args_match = self._matches_thiscall_parameters(ghidra_function)
|
||||
else:
|
||||
args_match = self._matches_non_thiscall_parameters(ghidra_function)
|
||||
args_match = self._parameter_lists_match(ghidra_params_without_this)
|
||||
else:
|
||||
args_match = False
|
||||
|
||||
@@ -136,16 +149,22 @@ class PdbFunctionImporter:
|
||||
and args_match
|
||||
)
|
||||
|
||||
def _matches_non_thiscall_parameters(self, ghidra_function: Function) -> bool:
|
||||
return self._parameter_lists_match(ghidra_function.getParameters())
|
||||
def _this_type_match(self, this_parameter: Parameter) -> bool:
|
||||
if this_parameter.getName() != "this":
|
||||
logger.info("Expected first argument to be `this` in __thiscall")
|
||||
return False
|
||||
|
||||
def _matches_thiscall_parameters(self, ghidra_function: Function) -> bool:
|
||||
ghidra_params = list(ghidra_function.getParameters())
|
||||
if self.signature.this_adjust != 0:
|
||||
# In this case, the `this` argument should be custom defined
|
||||
if not isinstance(this_parameter.getDataType(), TypeDef):
|
||||
logger.info(
|
||||
"`this` argument is not a typedef while `this adjust` = %d",
|
||||
self.signature.this_adjust,
|
||||
)
|
||||
return False
|
||||
# We are not checking for the _correct_ `this` type here, which we could do in the future
|
||||
|
||||
# remove the `this` argument which we don't generate ourselves
|
||||
ghidra_params.pop(0)
|
||||
|
||||
return self._parameter_lists_match(ghidra_params)
|
||||
return True
|
||||
|
||||
def _parameter_lists_match(self, ghidra_params: "list[Parameter]") -> bool:
|
||||
# Remove return storage pointer from comparison if present.
|
||||
@@ -194,6 +213,25 @@ class PdbFunctionImporter:
|
||||
|
||||
def overwrite_ghidra_function(self, ghidra_function: Function):
|
||||
"""Replace the function declaration in Ghidra by the one derived from C++."""
|
||||
|
||||
if ghidra_function.hasCustomVariableStorage():
|
||||
# Unfortunately, calling `ghidra_function.setCustomVariableStorage(False)`
|
||||
# leads to two `this` parameters. Therefore, we first need to remove all `this` parameters
|
||||
# and then re-generate a new one
|
||||
ghidra_function.replaceParameters(
|
||||
Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS, # this implicitly sets custom variable storage to False
|
||||
True,
|
||||
SourceType.USER_DEFINED,
|
||||
[
|
||||
param
|
||||
for param in ghidra_function.getParameters()
|
||||
if param.getName() != "this"
|
||||
],
|
||||
)
|
||||
|
||||
if ghidra_function.hasCustomVariableStorage():
|
||||
raise Lego1Exception("Failed to disable custom variable storage.")
|
||||
|
||||
ghidra_function.setName(self.name, SourceType.USER_DEFINED)
|
||||
ghidra_function.setParentNamespace(self.namespace)
|
||||
ghidra_function.setReturnType(self.return_type, SourceType.USER_DEFINED)
|
||||
@@ -203,16 +241,18 @@ class PdbFunctionImporter:
|
||||
logger.debug(
|
||||
"%s is a stub, skipping parameter import", self.get_full_name()
|
||||
)
|
||||
return
|
||||
else:
|
||||
ghidra_function.replaceParameters(
|
||||
Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS,
|
||||
True, # force
|
||||
SourceType.USER_DEFINED,
|
||||
self.arguments,
|
||||
)
|
||||
self._import_parameter_names(ghidra_function)
|
||||
|
||||
ghidra_function.replaceParameters(
|
||||
Function.FunctionUpdateType.DYNAMIC_STORAGE_ALL_PARAMS,
|
||||
True, # force
|
||||
SourceType.USER_DEFINED,
|
||||
self.arguments,
|
||||
)
|
||||
|
||||
self._import_parameter_names(ghidra_function)
|
||||
# Special handling for `this adjust` and virtual inheritance
|
||||
if self.signature.this_adjust != 0:
|
||||
self._set_this_adjust(ghidra_function)
|
||||
|
||||
def _import_parameter_names(self, ghidra_function: Function):
|
||||
# When we call `ghidra_function.replaceParameters`, Ghidra will generate the layout.
|
||||
@@ -284,3 +324,50 @@ class PdbFunctionImporter:
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
def _set_this_adjust(
|
||||
self,
|
||||
ghidra_function: Function,
|
||||
):
|
||||
"""
|
||||
When `this adjust` is non-zero, the pointer type of `this` needs to be replaced by an offset version.
|
||||
The offset can only be set on a typedef on the pointer. We also must enable custom storage so we can modify
|
||||
the auto-generated `this` parameter.
|
||||
"""
|
||||
|
||||
# Necessary in order to overwite the auto-generated `this`
|
||||
ghidra_function.setCustomVariableStorage(True)
|
||||
|
||||
this_parameter = next(
|
||||
(
|
||||
param
|
||||
for param in ghidra_function.getParameters()
|
||||
if param.isRegisterVariable() and param.getName() == "this"
|
||||
),
|
||||
None,
|
||||
)
|
||||
|
||||
if this_parameter is None:
|
||||
logger.error(
|
||||
"Failed to find `this` parameter in a function with `this adjust = %d`",
|
||||
self.signature.this_adjust,
|
||||
)
|
||||
else:
|
||||
current_ghidra_type = this_parameter.getDataType()
|
||||
assert isinstance(current_ghidra_type, Pointer)
|
||||
class_name = current_ghidra_type.getDataType().getName()
|
||||
typedef_name = f"{class_name}PtrOffset0x{self.signature.this_adjust:x}"
|
||||
|
||||
typedef_ghidra_type = TypedefDataType(
|
||||
current_ghidra_type.getCategoryPath(),
|
||||
typedef_name,
|
||||
current_ghidra_type,
|
||||
)
|
||||
ComponentOffsetSettingsDefinition.DEF.setValue(
|
||||
typedef_ghidra_type.getDefaultSettings(), self.signature.this_adjust
|
||||
)
|
||||
typedef_ghidra_type = add_data_type_or_reuse_existing(
|
||||
self.api, typedef_ghidra_type
|
||||
)
|
||||
|
||||
this_parameter.setDataType(typedef_ghidra_type, SourceType.USER_DEFINED)
|
||||
|
@@ -11,10 +11,8 @@ from lego_util.exceptions import (
|
||||
# Disable spurious warnings in vscode / pylance
|
||||
# pyright: reportMissingModuleSource=false
|
||||
|
||||
from ghidra.program.model.data import PointerDataType
|
||||
from ghidra.program.model.data import DataTypeConflictHandler
|
||||
from ghidra.program.flatapi import FlatProgramAPI
|
||||
from ghidra.program.model.data import DataType
|
||||
from ghidra.program.model.data import DataType, DataTypeConflictHandler, PointerDataType
|
||||
from ghidra.program.model.symbol import Namespace
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -37,9 +35,15 @@ def get_ghidra_type(api: FlatProgramAPI, type_name: str):
|
||||
raise MultipleTypesFoundInGhidraError(type_name, result)
|
||||
|
||||
|
||||
def add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType:
|
||||
new_data_type = PointerDataType(pointee)
|
||||
new_data_type.setCategoryPath(pointee.getCategoryPath())
|
||||
def get_or_add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType:
|
||||
new_pointer_data_type = PointerDataType(pointee)
|
||||
new_pointer_data_type.setCategoryPath(pointee.getCategoryPath())
|
||||
return add_data_type_or_reuse_existing(api, new_pointer_data_type)
|
||||
|
||||
|
||||
def add_data_type_or_reuse_existing(
|
||||
api: FlatProgramAPI, new_data_type: DataType
|
||||
) -> DataType:
|
||||
result_data_type = (
|
||||
api.getCurrentProgram()
|
||||
.getDataTypeManager()
|
||||
@@ -47,7 +51,7 @@ def add_pointer_type(api: FlatProgramAPI, pointee: DataType) -> DataType:
|
||||
)
|
||||
if result_data_type is not new_data_type:
|
||||
logger.debug(
|
||||
"New pointer replaced by existing one. Fresh pointer: %s (class: %s)",
|
||||
"Reusing existing data type instead of new one: %s (class: %s)",
|
||||
result_data_type,
|
||||
result_data_type.__class__,
|
||||
)
|
||||
|
@@ -36,6 +36,8 @@ class FunctionSignature:
|
||||
return_type: str
|
||||
class_type: Optional[str]
|
||||
stack_symbols: list[CppStackOrRegisterSymbol]
|
||||
# if non-zero: an offset to the `this` parameter in a __thiscall
|
||||
this_adjust: int
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -119,6 +121,9 @@ class PdbFunctionExtractor:
|
||||
|
||||
call_type = self._call_type_map[function_type["call_type"]]
|
||||
|
||||
# parse as hex number, default to 0
|
||||
this_adjust = int(function_type.get("this_adjust", "0"), 16)
|
||||
|
||||
return FunctionSignature(
|
||||
original_function_symbol=fn,
|
||||
call_type=call_type,
|
||||
@@ -126,6 +131,7 @@ class PdbFunctionExtractor:
|
||||
return_type=function_type["return_type"],
|
||||
class_type=class_type,
|
||||
stack_symbols=stack_symbols,
|
||||
this_adjust=this_adjust,
|
||||
)
|
||||
|
||||
def get_function_list(self) -> list[PdbFunction]:
|
||||
|
@@ -1,5 +1,5 @@
|
||||
import logging
|
||||
from typing import Any, Callable, TypeVar
|
||||
from typing import Any, Callable, Iterator, Optional, TypeVar
|
||||
|
||||
# Disable spurious warnings in vscode / pylance
|
||||
# pyright: reportMissingModuleSource=false
|
||||
@@ -7,6 +7,7 @@ from typing import Any, Callable, TypeVar
|
||||
# pylint: disable=too-many-return-statements # a `match` would be better, but for now we are stuck with Python 3.9
|
||||
# pylint: disable=no-else-return # Not sure why this rule even is a thing, this is great for checking exhaustiveness
|
||||
|
||||
from isledecomp.cvdump.types import VirtualBasePointer
|
||||
from lego_util.exceptions import (
|
||||
ClassOrNamespaceNotFoundInGhidraError,
|
||||
TypeNotFoundError,
|
||||
@@ -15,7 +16,8 @@ from lego_util.exceptions import (
|
||||
StructModificationError,
|
||||
)
|
||||
from lego_util.ghidra_helper import (
|
||||
add_pointer_type,
|
||||
add_data_type_or_reuse_existing,
|
||||
get_or_add_pointer_type,
|
||||
create_ghidra_namespace,
|
||||
get_ghidra_namespace,
|
||||
get_ghidra_type,
|
||||
@@ -33,6 +35,8 @@ from ghidra.program.model.data import (
|
||||
EnumDataType,
|
||||
StructureDataType,
|
||||
StructureInternal,
|
||||
TypedefDataType,
|
||||
ComponentOffsetSettingsDefinition,
|
||||
)
|
||||
from ghidra.util.task import ConsoleTaskMonitor
|
||||
|
||||
@@ -56,10 +60,19 @@ class PdbTypeImporter:
|
||||
def types(self):
|
||||
return self.extraction.compare.cv.types
|
||||
|
||||
def import_pdb_type_into_ghidra(self, type_index: str) -> DataType:
|
||||
def import_pdb_type_into_ghidra(
|
||||
self, type_index: str, slim_for_vbase: bool = False
|
||||
) -> DataType:
|
||||
"""
|
||||
Recursively imports a type from the PDB into Ghidra.
|
||||
@param type_index Either a scalar type like `T_INT4(...)` or a PDB reference like `0x10ba`
|
||||
@param slim_for_vbase If true, the current invocation
|
||||
imports a superclass of some class where virtual inheritance is involved (directly or indirectly).
|
||||
This case requires special handling: Let's say we have `class C: B` and `class B: virtual A`. Then cvdump
|
||||
reports a size for B that includes both B's fields as well as the A contained at an offset within B,
|
||||
which is not the correct structure to be contained in C. Therefore, we need to create a "slim" version of B
|
||||
that fits inside C.
|
||||
This value should always be `False` when the referenced type is not (a pointer to) a class.
|
||||
"""
|
||||
type_index_lower = type_index.lower()
|
||||
if type_index_lower.startswith("t_"):
|
||||
@@ -76,14 +89,19 @@ class PdbTypeImporter:
|
||||
|
||||
# follow forward reference (class, struct, union)
|
||||
if type_pdb.get("is_forward_ref", False):
|
||||
return self._import_forward_ref_type(type_index_lower, type_pdb)
|
||||
return self._import_forward_ref_type(
|
||||
type_index_lower, type_pdb, slim_for_vbase
|
||||
)
|
||||
|
||||
if type_category == "LF_POINTER":
|
||||
return add_pointer_type(
|
||||
self.api, self.import_pdb_type_into_ghidra(type_pdb["element_type"])
|
||||
return get_or_add_pointer_type(
|
||||
self.api,
|
||||
self.import_pdb_type_into_ghidra(
|
||||
type_pdb["element_type"], slim_for_vbase
|
||||
),
|
||||
)
|
||||
elif type_category in ["LF_CLASS", "LF_STRUCTURE"]:
|
||||
return self._import_class_or_struct(type_pdb)
|
||||
return self._import_class_or_struct(type_pdb, slim_for_vbase)
|
||||
elif type_category == "LF_ARRAY":
|
||||
return self._import_array(type_pdb)
|
||||
elif type_category == "LF_ENUM":
|
||||
@@ -120,7 +138,10 @@ class PdbTypeImporter:
|
||||
return get_ghidra_type(self.api, scalar_cpp_type)
|
||||
|
||||
def _import_forward_ref_type(
|
||||
self, type_index, type_pdb: dict[str, Any]
|
||||
self,
|
||||
type_index,
|
||||
type_pdb: dict[str, Any],
|
||||
slim_for_vbase: bool = False,
|
||||
) -> DataType:
|
||||
referenced_type = type_pdb.get("udt") or type_pdb.get("modifies")
|
||||
if referenced_type is None:
|
||||
@@ -136,7 +157,7 @@ class PdbTypeImporter:
|
||||
type_index,
|
||||
referenced_type,
|
||||
)
|
||||
return self.import_pdb_type_into_ghidra(referenced_type)
|
||||
return self.import_pdb_type_into_ghidra(referenced_type, slim_for_vbase)
|
||||
|
||||
def _import_array(self, type_pdb: dict[str, Any]) -> DataType:
|
||||
inner_type = self.import_pdb_type_into_ghidra(type_pdb["array_type"])
|
||||
@@ -182,12 +203,18 @@ class PdbTypeImporter:
|
||||
|
||||
return result
|
||||
|
||||
def _import_class_or_struct(self, type_in_pdb: dict[str, Any]) -> DataType:
|
||||
def _import_class_or_struct(
|
||||
self,
|
||||
type_in_pdb: dict[str, Any],
|
||||
slim_for_vbase: bool = False,
|
||||
) -> DataType:
|
||||
field_list_type: str = type_in_pdb["field_list_type"]
|
||||
field_list = self.types.keys[field_list_type.lower()]
|
||||
|
||||
class_size: int = type_in_pdb["size"]
|
||||
class_name_with_namespace: str = sanitize_name(type_in_pdb["name"])
|
||||
if slim_for_vbase:
|
||||
class_name_with_namespace += "_vbase_slim"
|
||||
|
||||
if class_name_with_namespace in self.handled_structs:
|
||||
logger.debug(
|
||||
@@ -205,11 +232,11 @@ class PdbTypeImporter:
|
||||
|
||||
self._get_or_create_namespace(class_name_with_namespace)
|
||||
|
||||
data_type = self._get_or_create_struct_data_type(
|
||||
new_ghidra_struct = self._get_or_create_struct_data_type(
|
||||
class_name_with_namespace, class_size
|
||||
)
|
||||
|
||||
if (old_size := data_type.getLength()) != class_size:
|
||||
if (old_size := new_ghidra_struct.getLength()) != class_size:
|
||||
logger.warning(
|
||||
"Existing class %s had incorrect size %d. Setting to %d...",
|
||||
class_name_with_namespace,
|
||||
@@ -220,39 +247,189 @@ class PdbTypeImporter:
|
||||
logger.info("Adding class data type %s", class_name_with_namespace)
|
||||
logger.debug("Class information: %s", type_in_pdb)
|
||||
|
||||
data_type.deleteAll()
|
||||
data_type.growStructure(class_size)
|
||||
components: list[dict[str, Any]] = []
|
||||
components.extend(self._get_components_from_base_classes(field_list))
|
||||
# can be missing when no new fields are declared
|
||||
components.extend(self._get_components_from_members(field_list))
|
||||
components.extend(
|
||||
self._get_components_from_vbase(
|
||||
field_list, class_name_with_namespace, new_ghidra_struct
|
||||
)
|
||||
)
|
||||
|
||||
components.sort(key=lambda c: c["offset"])
|
||||
|
||||
if slim_for_vbase:
|
||||
# Make a "slim" version: shrink the size to the fields that are actually present.
|
||||
# This makes a difference when the current class uses virtual inheritance
|
||||
assert (
|
||||
len(components) > 0
|
||||
), f"Error: {class_name_with_namespace} should not be empty. There must be at least one direct or indirect vbase pointer."
|
||||
last_component = components[-1]
|
||||
class_size = last_component["offset"] + last_component["type"].getLength()
|
||||
|
||||
self._overwrite_struct(
|
||||
class_name_with_namespace,
|
||||
new_ghidra_struct,
|
||||
class_size,
|
||||
components,
|
||||
)
|
||||
|
||||
logger.info("Finished importing class %s", class_name_with_namespace)
|
||||
|
||||
return new_ghidra_struct
|
||||
|
||||
def _get_components_from_base_classes(self, field_list) -> Iterator[dict[str, Any]]:
|
||||
non_virtual_base_classes: dict[str, int] = field_list.get("super", {})
|
||||
|
||||
for super_type, offset in non_virtual_base_classes.items():
|
||||
# If we have virtual inheritance _and_ a non-virtual base class here, we play safe and import slim version.
|
||||
# This is technically not needed if only one of the superclasses uses virtual inheritance, but I am not aware of any instance.
|
||||
import_slim_vbase_version_of_superclass = "vbase" in field_list
|
||||
ghidra_type = self.import_pdb_type_into_ghidra(
|
||||
super_type, slim_for_vbase=import_slim_vbase_version_of_superclass
|
||||
)
|
||||
|
||||
yield {
|
||||
"type": ghidra_type,
|
||||
"offset": offset,
|
||||
"name": "base" if offset == 0 else f"base_{ghidra_type.getName()}",
|
||||
}
|
||||
|
||||
def _get_components_from_members(self, field_list: dict[str, Any]):
|
||||
members: list[dict[str, Any]] = field_list.get("members") or []
|
||||
for member in members:
|
||||
yield member | {"type": self.import_pdb_type_into_ghidra(member["type"])}
|
||||
|
||||
def _get_components_from_vbase(
|
||||
self,
|
||||
field_list: dict[str, Any],
|
||||
class_name_with_namespace: str,
|
||||
current_type: StructureInternal,
|
||||
) -> Iterator[dict[str, Any]]:
|
||||
vbasepointer: Optional[VirtualBasePointer] = field_list.get("vbase", None)
|
||||
|
||||
if vbasepointer is not None and any(x.direct for x in vbasepointer.bases):
|
||||
vbaseptr_type = get_or_add_pointer_type(
|
||||
self.api,
|
||||
self._import_vbaseptr(
|
||||
current_type, class_name_with_namespace, vbasepointer
|
||||
),
|
||||
)
|
||||
yield {
|
||||
"type": vbaseptr_type,
|
||||
"offset": vbasepointer.vboffset,
|
||||
"name": "vbase_offset",
|
||||
}
|
||||
|
||||
def _import_vbaseptr(
|
||||
self,
|
||||
current_type: StructureInternal,
|
||||
class_name_with_namespace: str,
|
||||
vbasepointer: VirtualBasePointer,
|
||||
) -> StructureInternal:
|
||||
pointer_size = 4 # hard-code to 4 because of 32 bit
|
||||
|
||||
components = [
|
||||
{
|
||||
"offset": 0,
|
||||
"type": get_or_add_pointer_type(self.api, current_type),
|
||||
"name": "o_self",
|
||||
}
|
||||
]
|
||||
for vbase in vbasepointer.bases:
|
||||
vbase_ghidra_type = self.import_pdb_type_into_ghidra(vbase.type)
|
||||
|
||||
type_name = vbase_ghidra_type.getName()
|
||||
|
||||
vbase_ghidra_pointer = get_or_add_pointer_type(self.api, vbase_ghidra_type)
|
||||
vbase_ghidra_pointer_typedef = TypedefDataType(
|
||||
vbase_ghidra_pointer.getCategoryPath(),
|
||||
f"{type_name}PtrOffset",
|
||||
vbase_ghidra_pointer,
|
||||
)
|
||||
# Set a default value of -4 for the pointer offset. While this appears to be correct in many cases,
|
||||
# it does not always lead to the best decompile. It can be fine-tuned by hand; the next function call
|
||||
# makes sure that we don't overwrite this value on re-running the import.
|
||||
ComponentOffsetSettingsDefinition.DEF.setValue(
|
||||
vbase_ghidra_pointer_typedef.getDefaultSettings(), -4
|
||||
)
|
||||
|
||||
vbase_ghidra_pointer_typedef = add_data_type_or_reuse_existing(
|
||||
self.api, vbase_ghidra_pointer_typedef
|
||||
)
|
||||
|
||||
components.append(
|
||||
{
|
||||
"offset": vbase.index * pointer_size,
|
||||
"type": vbase_ghidra_pointer_typedef,
|
||||
"name": f"o_{type_name}",
|
||||
}
|
||||
)
|
||||
|
||||
size = len(components) * pointer_size
|
||||
|
||||
new_ghidra_struct = self._get_or_create_struct_data_type(
|
||||
f"{class_name_with_namespace}::VBasePtr", size
|
||||
)
|
||||
|
||||
self._overwrite_struct(
|
||||
f"{class_name_with_namespace}::VBasePtr",
|
||||
new_ghidra_struct,
|
||||
size,
|
||||
components,
|
||||
)
|
||||
|
||||
return new_ghidra_struct
|
||||
|
||||
def _overwrite_struct(
|
||||
self,
|
||||
class_name_with_namespace: str,
|
||||
new_ghidra_struct: StructureInternal,
|
||||
class_size: int,
|
||||
components: list[dict[str, Any]],
|
||||
):
|
||||
new_ghidra_struct.deleteAll()
|
||||
new_ghidra_struct.growStructure(class_size)
|
||||
|
||||
# this case happened e.g. for IUnknown, which linked to an (incorrect) existing library, and some other types as well.
|
||||
# Unfortunately, we don't get proper error handling for read-only types.
|
||||
# However, we really do NOT want to do this every time because the type might be self-referential and partially imported.
|
||||
if data_type.getLength() != class_size:
|
||||
data_type = self._delete_and_recreate_struct_data_type(
|
||||
class_name_with_namespace, class_size, data_type
|
||||
if new_ghidra_struct.getLength() != class_size:
|
||||
new_ghidra_struct = self._delete_and_recreate_struct_data_type(
|
||||
class_name_with_namespace, class_size, new_ghidra_struct
|
||||
)
|
||||
|
||||
# can be missing when no new fields are declared
|
||||
components: list[dict[str, Any]] = field_list.get("members") or []
|
||||
|
||||
super_type = field_list.get("super")
|
||||
if super_type is not None:
|
||||
components.insert(0, {"type": super_type, "offset": 0, "name": "base"})
|
||||
|
||||
for component in components:
|
||||
ghidra_type = self.import_pdb_type_into_ghidra(component["type"])
|
||||
logger.debug("Adding component to class: %s", component)
|
||||
offset: int = component["offset"]
|
||||
logger.debug(
|
||||
"Adding component %s to class: %s", component, class_name_with_namespace
|
||||
)
|
||||
|
||||
try:
|
||||
# for better logs
|
||||
data_type.replaceAtOffset(
|
||||
component["offset"], ghidra_type, -1, component["name"], None
|
||||
# Make sure there is room for the new structure and that we have no collision.
|
||||
existing_type = new_ghidra_struct.getComponentAt(offset)
|
||||
assert (
|
||||
existing_type is not None
|
||||
), f"Struct collision: Offset {offset} in {class_name_with_namespace} is overlapped by another component"
|
||||
|
||||
if existing_type.getDataType().getName() != "undefined":
|
||||
# collision of structs beginning in the same place -> likely due to unions
|
||||
logger.warning(
|
||||
"Struct collision: Offset %d of %s already has a field (likely an inline union)",
|
||||
offset,
|
||||
class_name_with_namespace,
|
||||
)
|
||||
|
||||
new_ghidra_struct.replaceAtOffset(
|
||||
offset,
|
||||
component["type"],
|
||||
-1, # set to -1 for fixed-size components
|
||||
component["name"], # name
|
||||
None, # comment
|
||||
)
|
||||
except Exception as e:
|
||||
raise StructModificationError(type_in_pdb) from e
|
||||
|
||||
logger.info("Finished importing class %s", class_name_with_namespace)
|
||||
|
||||
return data_type
|
||||
raise StructModificationError(class_name_with_namespace) from e
|
||||
|
||||
def _get_or_create_namespace(self, class_name_with_namespace: str):
|
||||
colon_split = class_name_with_namespace.split("::")
|
||||
|
Reference in New Issue
Block a user