mirror of
				https://github.com/isledecomp/isle.git
				synced 2025-10-26 01:44:19 +00:00 
			
		
		
		
	 f26c30974a
			
		
	
	f26c30974a
	
	
	
		
			
			* Add draft for Ghidra function import script * feature: Basic PDB analysis [skip ci] This is a draft with a lot of open questions left. Please do not merge * Refactor: Introduce submodules and reload remedy * refactor types and make them Python 3.9 compatible * run black * WIP: save progress * fix types and small type safety violations * fix another Python 3.9 syntax incompatibility * Implement struct imports [skip ci] - This code is still in dire need of refactoring and tests - There are only single-digit issues left, and 2600 functions can be imported - The biggest remaining error is mismatched stacks * Refactor, implement enums, fix lots of bugs * fix Python 3.9 issue * refactor: address review comments Not sure why VS Code suddenly decides to remove some empty spaces, but they don't make sense anyway * add unit tests for new type parsers, fix linter issue * refactor: db access from pdb_extraction.py * Fix stack layout offset error * fix: Undo incorrect reference change * Fix CI issue * Improve READMEs (fix typos, add information) --------- Co-authored-by: jonschz <jonschz@users.noreply.github.com>
		
			
				
	
	
		
			167 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			167 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from dataclasses import dataclass
 | |
| import re
 | |
| from typing import Any, Optional
 | |
| import logging
 | |
| 
 | |
| from isledecomp.cvdump.symbols import SymbolsEntry
 | |
| from isledecomp.compare import Compare as IsleCompare
 | |
| from isledecomp.compare.db import MatchInfo
 | |
| 
 | |
| logger = logging.getLogger(__file__)
 | |
| 
 | |
| 
 | |
| @dataclass
 | |
| class CppStackOrRegisterSymbol:
 | |
|     name: str
 | |
|     data_type: str
 | |
| 
 | |
| 
 | |
| @dataclass
 | |
| class CppStackSymbol(CppStackOrRegisterSymbol):
 | |
|     stack_offset: int
 | |
|     """Should have a value iff `symbol_type=='S_BPREL32'."""
 | |
| 
 | |
| 
 | |
| @dataclass
 | |
| class CppRegisterSymbol(CppStackOrRegisterSymbol):
 | |
|     register: str
 | |
|     """Should have a value iff `symbol_type=='S_REGISTER'.` Should always be set/converted to lowercase."""
 | |
| 
 | |
| 
 | |
| @dataclass
 | |
| class FunctionSignature:
 | |
|     original_function_symbol: SymbolsEntry
 | |
|     call_type: str
 | |
|     arglist: list[str]
 | |
|     return_type: str
 | |
|     class_type: Optional[str]
 | |
|     stack_symbols: list[CppStackOrRegisterSymbol]
 | |
| 
 | |
| 
 | |
| class PdbFunctionExtractor:
 | |
|     """
 | |
|     Extracts all information on a given function from the parsed PDB
 | |
|     and prepares the data for the import in Ghidra.
 | |
|     """
 | |
| 
 | |
|     def __init__(self, compare: IsleCompare):
 | |
|         self.compare = compare
 | |
| 
 | |
|     scalar_type_regex = re.compile(r"t_(?P<typename>\w+)(?:\((?P<type_id>\d+)\))?")
 | |
| 
 | |
|     _call_type_map = {
 | |
|         "ThisCall": "__thiscall",
 | |
|         "C Near": "__thiscall",
 | |
|         "STD Near": "__stdcall",
 | |
|     }
 | |
| 
 | |
|     def _get_cvdump_type(self, type_name: Optional[str]) -> Optional[dict[str, Any]]:
 | |
|         return (
 | |
|             None
 | |
|             if type_name is None
 | |
|             else self.compare.cv.types.keys.get(type_name.lower())
 | |
|         )
 | |
| 
 | |
|     def get_func_signature(self, fn: SymbolsEntry) -> Optional[FunctionSignature]:
 | |
|         function_type_str = fn.func_type
 | |
|         if function_type_str == "T_NOTYPE(0000)":
 | |
|             logger.debug(
 | |
|                 "Skipping a NOTYPE (synthetic or template + synthetic): %s", fn.name
 | |
|             )
 | |
|             return None
 | |
| 
 | |
|         # get corresponding function type
 | |
| 
 | |
|         function_type = self.compare.cv.types.keys.get(function_type_str.lower())
 | |
|         if function_type is None:
 | |
|             logger.error(
 | |
|                 "Could not find function type %s for function %s", fn.func_type, fn.name
 | |
|             )
 | |
|             return None
 | |
| 
 | |
|         class_type = function_type.get("class_type")
 | |
| 
 | |
|         arg_list_type = self._get_cvdump_type(function_type.get("arg_list_type"))
 | |
|         assert arg_list_type is not None
 | |
|         arg_list_pdb_types = arg_list_type.get("args", [])
 | |
|         assert arg_list_type["argcount"] == len(arg_list_pdb_types)
 | |
| 
 | |
|         stack_symbols: list[CppStackOrRegisterSymbol] = []
 | |
| 
 | |
|         # for some unexplained reason, the reported stack is offset by 4 when this flag is set
 | |
|         stack_offset_delta = -4 if fn.frame_pointer_present else 0
 | |
| 
 | |
|         for symbol in fn.stack_symbols:
 | |
|             if symbol.symbol_type == "S_REGISTER":
 | |
|                 stack_symbols.append(
 | |
|                     CppRegisterSymbol(
 | |
|                         symbol.name,
 | |
|                         symbol.data_type,
 | |
|                         symbol.location,
 | |
|                     )
 | |
|                 )
 | |
|             elif symbol.symbol_type == "S_BPREL32":
 | |
|                 stack_offset = int(symbol.location[1:-1], 16)
 | |
|                 stack_symbols.append(
 | |
|                     CppStackSymbol(
 | |
|                         symbol.name,
 | |
|                         symbol.data_type,
 | |
|                         stack_offset + stack_offset_delta,
 | |
|                     )
 | |
|                 )
 | |
| 
 | |
|         call_type = self._call_type_map[function_type["call_type"]]
 | |
| 
 | |
|         return FunctionSignature(
 | |
|             original_function_symbol=fn,
 | |
|             call_type=call_type,
 | |
|             arglist=arg_list_pdb_types,
 | |
|             return_type=function_type["return_type"],
 | |
|             class_type=class_type,
 | |
|             stack_symbols=stack_symbols,
 | |
|         )
 | |
| 
 | |
|     def get_function_list(self) -> list[tuple[MatchInfo, FunctionSignature]]:
 | |
|         handled = (
 | |
|             self.handle_matched_function(match)
 | |
|             for match in self.compare.get_functions()
 | |
|         )
 | |
|         return [signature for signature in handled if signature is not None]
 | |
| 
 | |
|     def handle_matched_function(
 | |
|         self, match_info: MatchInfo
 | |
|     ) -> Optional[tuple[MatchInfo, FunctionSignature]]:
 | |
|         assert match_info.orig_addr is not None
 | |
|         match_options = self.compare.get_match_options(match_info.orig_addr)
 | |
|         assert match_options is not None
 | |
|         if match_options.get("skip", False) or match_options.get("stub", False):
 | |
|             return None
 | |
| 
 | |
|         function_data = next(
 | |
|             (
 | |
|                 y
 | |
|                 for y in self.compare.cvdump_analysis.nodes
 | |
|                 if y.addr == match_info.recomp_addr
 | |
|             ),
 | |
|             None,
 | |
|         )
 | |
|         if not function_data:
 | |
|             logger.error(
 | |
|                 "Did not find function in nodes, skipping: %s", match_info.name
 | |
|             )
 | |
|             return None
 | |
| 
 | |
|         function_symbol = function_data.symbol_entry
 | |
|         if function_symbol is None:
 | |
|             logger.debug(
 | |
|                 "Could not find function symbol (likely a PUBLICS entry): %s",
 | |
|                 match_info.name,
 | |
|             )
 | |
|             return None
 | |
| 
 | |
|         function_signature = self.get_func_signature(function_symbol)
 | |
|         if function_signature is None:
 | |
|             return None
 | |
| 
 | |
|         return match_info, function_signature
 |