mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-23 00:14:22 +00:00
Add Ghidra function import script (#909)
* Add draft for Ghidra function import script * feature: Basic PDB analysis [skip ci] This is a draft with a lot of open questions left. Please do not merge * Refactor: Introduce submodules and reload remedy * refactor types and make them Python 3.9 compatible * run black * WIP: save progress * fix types and small type safety violations * fix another Python 3.9 syntax incompatibility * Implement struct imports [skip ci] - This code is still in dire need of refactoring and tests - There are only single-digit issues left, and 2600 functions can be imported - The biggest remaining error is mismatched stacks * Refactor, implement enums, fix lots of bugs * fix Python 3.9 issue * refactor: address review comments Not sure why VS Code suddenly decides to remove some empty spaces, but they don't make sense anyway * add unit tests for new type parsers, fix linter issue * refactor: db access from pdb_extraction.py * Fix stack layout offset error * fix: Undo incorrect reference change * Fix CI issue * Improve READMEs (fix typos, add information) --------- Co-authored-by: jonschz <jonschz@users.noreply.github.com>
This commit is contained in:
@@ -4,7 +4,7 @@ import difflib
|
||||
import struct
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable, Iterable, List, Optional
|
||||
from typing import Any, Callable, Iterable, List, Optional
|
||||
from isledecomp.bin import Bin as IsleBin, InvalidVirtualAddressError
|
||||
from isledecomp.cvdump.demangler import demangle_string_const
|
||||
from isledecomp.cvdump import Cvdump, CvdumpAnalysis
|
||||
@@ -90,7 +90,7 @@ class Compare:
|
||||
|
||||
def _load_cvdump(self):
|
||||
logger.info("Parsing %s ...", self.pdb_file)
|
||||
cv = (
|
||||
self.cv = (
|
||||
Cvdump(self.pdb_file)
|
||||
.lines()
|
||||
.globals()
|
||||
@@ -100,9 +100,9 @@ class Compare:
|
||||
.types()
|
||||
.run()
|
||||
)
|
||||
res = CvdumpAnalysis(cv)
|
||||
self.cvdump_analysis = CvdumpAnalysis(self.cv)
|
||||
|
||||
for sym in res.nodes:
|
||||
for sym in self.cvdump_analysis.nodes:
|
||||
# Skip nodes where we have almost no information.
|
||||
# These probably came from SECTION CONTRIBUTIONS.
|
||||
if sym.name() is None and sym.node_type is None:
|
||||
@@ -116,6 +116,7 @@ class Compare:
|
||||
continue
|
||||
|
||||
addr = self.recomp_bin.get_abs_addr(sym.section, sym.offset)
|
||||
sym.addr = addr
|
||||
|
||||
# If this symbol is the final one in its section, we were not able to
|
||||
# estimate its size because we didn't have the total size of that section.
|
||||
@@ -165,7 +166,10 @@ class Compare:
|
||||
addr, sym.node_type, sym.name(), sym.decorated_name, sym.size()
|
||||
)
|
||||
|
||||
for (section, offset), (filename, line_no) in res.verified_lines.items():
|
||||
for (section, offset), (
|
||||
filename,
|
||||
line_no,
|
||||
) in self.cvdump_analysis.verified_lines.items():
|
||||
addr = self.recomp_bin.get_abs_addr(section, offset)
|
||||
self._lines_db.add_line(filename, line_no, addr)
|
||||
|
||||
@@ -736,6 +740,9 @@ class Compare:
|
||||
def get_variables(self) -> List[MatchInfo]:
|
||||
return self._db.get_matches_by_type(SymbolType.DATA)
|
||||
|
||||
def get_match_options(self, addr: int) -> Optional[dict[str, Any]]:
|
||||
return self._db.get_match_options(addr)
|
||||
|
||||
def compare_address(self, addr: int) -> Optional[DiffReport]:
|
||||
match = self._db.get_one_match(addr)
|
||||
if match is None:
|
||||
|
@@ -2,7 +2,7 @@
|
||||
addresses/symbols that we want to compare between the original and recompiled binaries."""
|
||||
import sqlite3
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
from typing import Any, List, Optional
|
||||
from isledecomp.types import SymbolType
|
||||
from isledecomp.cvdump.demangler import get_vtordisp_name
|
||||
|
||||
@@ -335,7 +335,7 @@ class CompareDb:
|
||||
def skip_compare(self, orig: int):
|
||||
self._set_opt_bool(orig, "skip")
|
||||
|
||||
def get_match_options(self, addr: int) -> Optional[dict]:
|
||||
def get_match_options(self, addr: int) -> Optional[dict[str, Any]]:
|
||||
cur = self._db.execute(
|
||||
"""SELECT name, value FROM `match_options` WHERE addr = ?""", (addr,)
|
||||
)
|
||||
|
@@ -1,3 +1,4 @@
|
||||
from .symbols import SymbolsEntry
|
||||
from .analysis import CvdumpAnalysis
|
||||
from .parser import CvdumpParser
|
||||
from .runner import Cvdump
|
||||
|
@@ -1,5 +1,7 @@
|
||||
"""For collating the results from parsing cvdump.exe into a more directly useful format."""
|
||||
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
from isledecomp.cvdump import SymbolsEntry
|
||||
from isledecomp.types import SymbolType
|
||||
from .parser import CvdumpParser
|
||||
from .demangler import demangle_string_const, demangle_vtable
|
||||
@@ -31,6 +33,8 @@ class CvdumpNode:
|
||||
# Size as reported by SECTION CONTRIBUTIONS section. Not guaranteed to be
|
||||
# accurate.
|
||||
section_contribution: Optional[int] = None
|
||||
addr: Optional[int] = None
|
||||
symbol_entry: Optional[SymbolsEntry] = None
|
||||
|
||||
def __init__(self, section: int, offset: int) -> None:
|
||||
self.section = section
|
||||
@@ -87,13 +91,12 @@ class CvdumpAnalysis:
|
||||
"""Collects the results from CvdumpParser into a list of nodes (i.e. symbols).
|
||||
These can then be analyzed by a downstream tool."""
|
||||
|
||||
nodes = List[CvdumpNode]
|
||||
verified_lines = Dict[Tuple[str, str], Tuple[str, str]]
|
||||
verified_lines: Dict[Tuple[str, str], Tuple[str, str]]
|
||||
|
||||
def __init__(self, parser: CvdumpParser):
|
||||
"""Read in as much information as we have from the parser.
|
||||
The more sections we have, the better our information will be."""
|
||||
node_dict = {}
|
||||
node_dict: Dict[Tuple[int, int], CvdumpNode] = {}
|
||||
|
||||
# PUBLICS is our roadmap for everything that follows.
|
||||
for pub in parser.publics:
|
||||
@@ -158,8 +161,11 @@ class CvdumpAnalysis:
|
||||
node_dict[key].friendly_name = sym.name
|
||||
node_dict[key].confirmed_size = sym.size
|
||||
node_dict[key].node_type = SymbolType.FUNCTION
|
||||
node_dict[key].symbol_entry = sym
|
||||
|
||||
self.nodes = [v for _, v in dict(sorted(node_dict.items())).items()]
|
||||
self.nodes: List[CvdumpNode] = [
|
||||
v for _, v in dict(sorted(node_dict.items())).items()
|
||||
]
|
||||
self._estimate_size()
|
||||
|
||||
def _estimate_size(self):
|
||||
|
@@ -2,6 +2,7 @@ import re
|
||||
from typing import Iterable, Tuple
|
||||
from collections import namedtuple
|
||||
from .types import CvdumpTypesParser
|
||||
from .symbols import CvdumpSymbolsParser
|
||||
|
||||
# e.g. `*** PUBLICS`
|
||||
_section_change_regex = re.compile(r"\*\*\* (?P<section>[A-Z/ ]{2,})")
|
||||
@@ -20,11 +21,6 @@ _publics_line_regex = re.compile(
|
||||
r"^(?P<type>\w+): \[(?P<section>\w{4}):(?P<offset>\w{8})], Flags: (?P<flags>\w{8}), (?P<name>\S+)"
|
||||
)
|
||||
|
||||
# e.g. `(00008C) S_GPROC32: [0001:00034E90], Cb: 00000007, Type: 0x1024, ViewROI::IntrinsicImportance`
|
||||
_symbol_line_regex = re.compile(
|
||||
r"\(\w+\) (?P<type>\S+): \[(?P<section>\w{4}):(?P<offset>\w{8})\], Cb: (?P<size>\w+), Type:\s+\S+, (?P<name>.+)"
|
||||
)
|
||||
|
||||
# e.g. ` Debug start: 00000008, Debug end: 0000016E`
|
||||
_gproc_debug_regex = re.compile(
|
||||
r"\s*Debug start: (?P<start>\w{8}), Debug end: (?P<end>\w{8})"
|
||||
@@ -52,9 +48,6 @@ LinesEntry = namedtuple("LinesEntry", "filename line_no section offset")
|
||||
# only place you can find the C symbols (library functions, smacker, etc)
|
||||
PublicsEntry = namedtuple("PublicsEntry", "type section offset flags name")
|
||||
|
||||
# S_GPROC32 = functions
|
||||
SymbolsEntry = namedtuple("SymbolsEntry", "type section offset size name")
|
||||
|
||||
# (Estimated) size of any symbol
|
||||
SizeRefEntry = namedtuple("SizeRefEntry", "module section offset size")
|
||||
|
||||
@@ -72,12 +65,16 @@ class CvdumpParser:
|
||||
|
||||
self.lines = {}
|
||||
self.publics = []
|
||||
self.symbols = []
|
||||
self.sizerefs = []
|
||||
self.globals = []
|
||||
self.modules = []
|
||||
|
||||
self.types = CvdumpTypesParser()
|
||||
self.symbols_parser = CvdumpSymbolsParser()
|
||||
|
||||
@property
|
||||
def symbols(self):
|
||||
return self.symbols_parser.symbols
|
||||
|
||||
def _lines_section(self, line: str):
|
||||
"""Parsing entries from the LINES section. We only care about the pairs of
|
||||
@@ -127,20 +124,6 @@ class CvdumpParser:
|
||||
)
|
||||
)
|
||||
|
||||
def _symbols_section(self, line: str):
|
||||
"""We are interested in S_GPROC32 symbols only."""
|
||||
if (match := _symbol_line_regex.match(line)) is not None:
|
||||
if match.group("type") == "S_GPROC32":
|
||||
self.symbols.append(
|
||||
SymbolsEntry(
|
||||
type=match.group("type"),
|
||||
section=int(match.group("section"), 16),
|
||||
offset=int(match.group("offset"), 16),
|
||||
size=int(match.group("size"), 16),
|
||||
name=match.group("name"),
|
||||
)
|
||||
)
|
||||
|
||||
def _section_contributions(self, line: str):
|
||||
"""Gives the size of elements across all sections of the binary.
|
||||
This is the easiest way to get the data size for .data and .rdata
|
||||
@@ -177,7 +160,7 @@ class CvdumpParser:
|
||||
self.types.read_line(line)
|
||||
|
||||
elif self._section == "SYMBOLS":
|
||||
self._symbols_section(line)
|
||||
self.symbols_parser.read_line(line)
|
||||
|
||||
elif self._section == "LINES":
|
||||
self._lines_section(line)
|
||||
|
153
tools/isledecomp/isledecomp/cvdump/symbols.py
Normal file
153
tools/isledecomp/isledecomp/cvdump/symbols.py
Normal file
@@ -0,0 +1,153 @@
|
||||
from dataclasses import dataclass, field
|
||||
import logging
|
||||
import re
|
||||
from re import Match
|
||||
from typing import NamedTuple, Optional
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class StackOrRegisterSymbol(NamedTuple):
|
||||
symbol_type: str
|
||||
location: str
|
||||
"""Should always be set/converted to lowercase."""
|
||||
data_type: str
|
||||
name: str
|
||||
|
||||
|
||||
# S_GPROC32 = functions
|
||||
@dataclass
|
||||
class SymbolsEntry:
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
type: str
|
||||
section: int
|
||||
offset: int
|
||||
size: int
|
||||
func_type: str
|
||||
name: str
|
||||
stack_symbols: list[StackOrRegisterSymbol] = field(default_factory=list)
|
||||
frame_pointer_present: bool = False
|
||||
addr: Optional[int] = None # Absolute address. Will be set later, if at all
|
||||
|
||||
|
||||
class CvdumpSymbolsParser:
|
||||
_symbol_line_generic_regex = re.compile(
|
||||
r"\(\w+\)\s+(?P<symbol_type>[^\s:]+)(?::\s+(?P<second_part>\S.*))?|(?::)$"
|
||||
)
|
||||
"""
|
||||
Parses the first part, e.g. `(00008C) S_GPROC32`, and splits off the second part after the colon (if it exists).
|
||||
There are three cases:
|
||||
- no colon, e.g. `(000350) S_END`
|
||||
- colon but no data, e.g. `(000370) S_COMPILE:`
|
||||
- colon and data, e.g. `(000304) S_REGISTER: esi, Type: 0x1E14, this``
|
||||
"""
|
||||
|
||||
_symbol_line_function_regex = re.compile(
|
||||
r"\[(?P<section>\w{4}):(?P<offset>\w{8})\], Cb: (?P<size>\w+), Type:\s+(?P<func_type>[^\s,]+), (?P<name>.+)"
|
||||
)
|
||||
"""
|
||||
Parses the second part of a function symbol, e.g.
|
||||
`[0001:00034E90], Cb: 00000007, Type: 0x1024, ViewROI::IntrinsicImportance`
|
||||
"""
|
||||
|
||||
# the second part of e.g.
|
||||
_stack_register_symbol_regex = re.compile(
|
||||
r"(?P<location>\S+), Type:\s+(?P<data_type>[\w()]+), (?P<name>.+)$"
|
||||
)
|
||||
"""
|
||||
Parses the second part of a stack or register symbol, e.g.
|
||||
`esi, Type: 0x1E14, this`
|
||||
"""
|
||||
|
||||
_debug_start_end_regex = re.compile(
|
||||
r"^\s*Debug start: (?P<debug_start>\w+), Debug end: (?P<debug_end>\w+)$"
|
||||
)
|
||||
|
||||
_parent_end_next_regex = re.compile(
|
||||
r"\s*Parent: (?P<parent_addr>\w+), End: (?P<end_addr>\w+), Next: (?P<next_addr>\w+)$"
|
||||
)
|
||||
|
||||
_flags_frame_pointer_regex = re.compile(r"\s*Flags: Frame Ptr Present$")
|
||||
|
||||
_register_stack_symbols = ["S_BPREL32", "S_REGISTER"]
|
||||
|
||||
# List the unhandled types so we can check exhaustiveness
|
||||
_unhandled_symbols = [
|
||||
"S_COMPILE",
|
||||
"S_OBJNAME",
|
||||
"S_THUNK32",
|
||||
"S_LABEL32",
|
||||
"S_LDATA32",
|
||||
"S_LPROC32",
|
||||
"S_UDT",
|
||||
]
|
||||
|
||||
"""Parser for cvdump output, SYMBOLS section."""
|
||||
|
||||
def __init__(self):
|
||||
self.symbols: list[SymbolsEntry] = []
|
||||
self.current_function: Optional[SymbolsEntry] = None
|
||||
|
||||
def read_line(self, line: str):
|
||||
if (match := self._symbol_line_generic_regex.match(line)) is not None:
|
||||
self._parse_generic_case(line, match)
|
||||
elif (match := self._parent_end_next_regex.match(line)) is not None:
|
||||
# We do not need this info at the moment, might be useful in the future
|
||||
pass
|
||||
elif (match := self._debug_start_end_regex.match(line)) is not None:
|
||||
# We do not need this info at the moment, might be useful in the future
|
||||
pass
|
||||
elif (match := self._flags_frame_pointer_regex.match(line)) is not None:
|
||||
if self.current_function is None:
|
||||
logger.error(
|
||||
"Found a `Flags: Frame Ptr Present` but self.current_function is None"
|
||||
)
|
||||
return
|
||||
self.current_function.frame_pointer_present = True
|
||||
else:
|
||||
# Most of these are either `** Module: [...]` or data we do not care about
|
||||
logger.debug("Unhandled line: %s", line[:-1])
|
||||
|
||||
def _parse_generic_case(self, line, line_match: Match[str]):
|
||||
symbol_type: str = line_match.group("symbol_type")
|
||||
second_part: Optional[str] = line_match.group("second_part")
|
||||
|
||||
if symbol_type == "S_GPROC32":
|
||||
assert second_part is not None
|
||||
if (match := self._symbol_line_function_regex.match(second_part)) is None:
|
||||
logger.error("Invalid function symbol: %s", line[:-1])
|
||||
return
|
||||
self.current_function = SymbolsEntry(
|
||||
type=symbol_type,
|
||||
section=int(match.group("section"), 16),
|
||||
offset=int(match.group("offset"), 16),
|
||||
size=int(match.group("size"), 16),
|
||||
func_type=match.group("func_type"),
|
||||
name=match.group("name"),
|
||||
)
|
||||
self.symbols.append(self.current_function)
|
||||
|
||||
elif symbol_type in self._register_stack_symbols:
|
||||
assert second_part is not None
|
||||
if self.current_function is None:
|
||||
logger.error("Found stack/register outside of function: %s", line[:-1])
|
||||
return
|
||||
if (match := self._stack_register_symbol_regex.match(second_part)) is None:
|
||||
logger.error("Invalid stack/register symbol: %s", line[:-1])
|
||||
return
|
||||
|
||||
new_symbol = StackOrRegisterSymbol(
|
||||
symbol_type=symbol_type,
|
||||
location=match.group("location").lower(),
|
||||
data_type=match.group("data_type"),
|
||||
name=match.group("name"),
|
||||
)
|
||||
self.current_function.stack_symbols.append(new_symbol)
|
||||
|
||||
elif symbol_type == "S_END":
|
||||
self.current_function = None
|
||||
elif symbol_type in self._unhandled_symbols:
|
||||
return
|
||||
else:
|
||||
logger.error("Unhandled symbol type: %s", line)
|
@@ -1,5 +1,9 @@
|
||||
import re
|
||||
from typing import Dict, List, NamedTuple, Optional
|
||||
import logging
|
||||
from typing import Any, Dict, List, NamedTuple, Optional
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CvdumpTypeError(Exception):
|
||||
@@ -42,7 +46,7 @@ class ScalarType(NamedTuple):
|
||||
|
||||
class TypeInfo(NamedTuple):
|
||||
key: str
|
||||
size: int
|
||||
size: Optional[int]
|
||||
name: Optional[str] = None
|
||||
members: Optional[List[FieldListItem]] = None
|
||||
|
||||
@@ -156,6 +160,10 @@ class CvdumpTypesParser:
|
||||
# LF_FIELDLIST member name (2/2)
|
||||
MEMBER_RE = re.compile(r"^\s+member name = '(?P<name>.*)'$")
|
||||
|
||||
LF_FIELDLIST_ENUMERATE = re.compile(
|
||||
r"^\s+list\[\d+\] = LF_ENUMERATE,.*value = (?P<value>\d+), name = '(?P<name>[^']+)'$"
|
||||
)
|
||||
|
||||
# LF_ARRAY element type
|
||||
ARRAY_ELEMENT_RE = re.compile(r"^\s+Element type = (?P<type>.*)")
|
||||
|
||||
@@ -169,12 +177,53 @@ class CvdumpTypesParser:
|
||||
|
||||
# LF_CLASS/LF_STRUCTURE name and other info
|
||||
CLASS_NAME_RE = re.compile(
|
||||
r"^\s+Size = (?P<size>\d+), class name = (?P<name>.+), UDT\((?P<udt>0x\w+)\)"
|
||||
r"^\s+Size = (?P<size>\d+), class name = (?P<name>(?:[^,]|,\S)+)(?:, UDT\((?P<udt>0x\w+)\))?"
|
||||
)
|
||||
|
||||
# LF_MODIFIER, type being modified
|
||||
MODIFIES_RE = re.compile(r".*modifies type (?P<type>.*)$")
|
||||
|
||||
# LF_ARGLIST number of entries
|
||||
LF_ARGLIST_ARGCOUNT = re.compile(r".*argument count = (?P<argcount>\d+)$")
|
||||
|
||||
# LF_ARGLIST list entry
|
||||
LF_ARGLIST_ENTRY = re.compile(
|
||||
r"^\s+list\[(?P<index>\d+)\] = (?P<arg_type>[\w()]+)$"
|
||||
)
|
||||
|
||||
# LF_POINTER element
|
||||
LF_POINTER_ELEMENT = re.compile(r"^\s+Element type : (?P<element_type>.+)$")
|
||||
|
||||
# LF_MFUNCTION attribute key-value pairs
|
||||
LF_MFUNCTION_ATTRIBUTES = [
|
||||
re.compile(r"\s*Return type = (?P<return_type>[\w()]+)$"),
|
||||
re.compile(r"\s*Class type = (?P<class_type>[\w()]+)$"),
|
||||
re.compile(r"\s*This type = (?P<this_type>[\w()]+)$"),
|
||||
# Call type may contain whitespace
|
||||
re.compile(r"\s*Call type = (?P<call_type>[\w()\s]+)$"),
|
||||
re.compile(r"\s*Parms = (?P<num_params>[\w()]+)$"), # LF_MFUNCTION only
|
||||
re.compile(r"\s*# Parms = (?P<num_params>[\w()]+)$"), # LF_PROCEDURE only
|
||||
re.compile(r"\s*Arg list type = (?P<arg_list_type>[\w()]+)$"),
|
||||
re.compile(
|
||||
r"\s*This adjust = (?P<this_adjust>[\w()]+)$"
|
||||
), # TODO: figure out the meaning
|
||||
re.compile(
|
||||
r"\s*Func attr = (?P<func_attr>[\w()]+)$"
|
||||
), # Only for completeness, is always `none`
|
||||
]
|
||||
|
||||
LF_ENUM_ATTRIBUTES = [
|
||||
re.compile(r"^\s*# members = (?P<num_members>\d+)$"),
|
||||
re.compile(r"^\s*enum name = (?P<name>.+)$"),
|
||||
]
|
||||
LF_ENUM_TYPES = re.compile(
|
||||
r"^\s*type = (?P<underlying_type>\S+) field list type (?P<field_type>0x\w{4})$"
|
||||
)
|
||||
LF_ENUM_UDT = re.compile(r"^\s*UDT\((?P<udt>0x\w+)\)$")
|
||||
LF_UNION_LINE = re.compile(
|
||||
r"^.*field list type (?P<field_type>0x\w+),.*Size = (?P<size>\d+)\s*,class name = (?P<name>(?:[^,]|,\S)+),\s.*UDT\((?P<udt>0x\w+)\)$"
|
||||
)
|
||||
|
||||
MODES_OF_INTEREST = {
|
||||
"LF_ARRAY",
|
||||
"LF_CLASS",
|
||||
@@ -183,12 +232,16 @@ class CvdumpTypesParser:
|
||||
"LF_MODIFIER",
|
||||
"LF_POINTER",
|
||||
"LF_STRUCTURE",
|
||||
"LF_ARGLIST",
|
||||
"LF_MFUNCTION",
|
||||
"LF_PROCEDURE",
|
||||
"LF_UNION",
|
||||
}
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.mode: Optional[str] = None
|
||||
self.last_key = ""
|
||||
self.keys = {}
|
||||
self.keys: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
def _new_type(self):
|
||||
"""Prepare a new dict for the type we just parsed.
|
||||
@@ -211,13 +264,20 @@ class CvdumpTypesParser:
|
||||
obj = self.keys[self.last_key]
|
||||
obj["members"][-1]["name"] = name
|
||||
|
||||
def _get_field_list(self, type_obj: Dict) -> List[FieldListItem]:
|
||||
def _add_variant(self, name: str, value: int):
|
||||
obj = self.keys[self.last_key]
|
||||
if "variants" not in obj:
|
||||
obj["variants"] = []
|
||||
variants: list[dict[str, Any]] = obj["variants"]
|
||||
variants.append({"name": name, "value": value})
|
||||
|
||||
def _get_field_list(self, type_obj: Dict[str, Any]) -> List[FieldListItem]:
|
||||
"""Return the field list for the given LF_CLASS/LF_STRUCTURE reference"""
|
||||
|
||||
if type_obj.get("type") == "LF_FIELDLIST":
|
||||
field_obj = type_obj
|
||||
else:
|
||||
field_list_type = type_obj.get("field_list_type")
|
||||
field_list_type = type_obj["field_list_type"]
|
||||
field_obj = self.keys[field_list_type]
|
||||
|
||||
members: List[FieldListItem] = []
|
||||
@@ -253,6 +313,9 @@ class CvdumpTypesParser:
|
||||
raise CvdumpIntegrityError("No array element type")
|
||||
|
||||
array_element_size = self.get(array_type).size
|
||||
assert (
|
||||
array_element_size is not None
|
||||
), "Encountered an array whose type has no size"
|
||||
|
||||
n_elements = type_obj["size"] // array_element_size
|
||||
|
||||
@@ -285,7 +348,10 @@ class CvdumpTypesParser:
|
||||
|
||||
# These type references are just a wrapper around a scalar
|
||||
if obj.get("type") == "LF_ENUM":
|
||||
return self.get("T_INT4")
|
||||
underlying_type = obj.get("underlying_type")
|
||||
if underlying_type is None:
|
||||
raise CvdumpKeyError(f"Missing 'underlying_type' in {obj}")
|
||||
return self.get(underlying_type)
|
||||
|
||||
if obj.get("type") == "LF_POINTER":
|
||||
return self.get("T_32PVOID")
|
||||
@@ -350,6 +416,9 @@ class CvdumpTypesParser:
|
||||
|
||||
obj = self.get(type_key)
|
||||
total_size = obj.size
|
||||
assert (
|
||||
total_size is not None
|
||||
), "Called get_scalar_gapless() on a type without size"
|
||||
|
||||
scalars = self.get_scalars(type_key)
|
||||
|
||||
@@ -383,6 +452,11 @@ class CvdumpTypesParser:
|
||||
return member_list_to_struct_string(members)
|
||||
|
||||
def read_line(self, line: str):
|
||||
if line.endswith("\n"):
|
||||
line = line[:-1]
|
||||
if len(line) == 0:
|
||||
return
|
||||
|
||||
if (match := self.INDEX_RE.match(line)) is not None:
|
||||
type_ = match.group(2)
|
||||
if type_ not in self.MODES_OF_INTEREST:
|
||||
@@ -393,6 +467,12 @@ class CvdumpTypesParser:
|
||||
self.last_key = match.group(1)
|
||||
self.mode = type_
|
||||
self._new_type()
|
||||
|
||||
if type_ == "LF_ARGLIST":
|
||||
submatch = self.LF_ARGLIST_ARGCOUNT.match(line)
|
||||
assert submatch is not None
|
||||
self.keys[self.last_key]["argcount"] = int(submatch.group("argcount"))
|
||||
# TODO: This should be validated in another pass
|
||||
return
|
||||
|
||||
if self.mode is None:
|
||||
@@ -413,41 +493,170 @@ class CvdumpTypesParser:
|
||||
self._set("size", int(match.group("length")))
|
||||
|
||||
elif self.mode == "LF_FIELDLIST":
|
||||
# If this class has a vtable, create a mock member at offset 0
|
||||
if (match := self.VTABLE_RE.match(line)) is not None:
|
||||
# For our purposes, any pointer type will do
|
||||
self._add_member(0, "T_32PVOID")
|
||||
self._set_member_name("vftable")
|
||||
self.read_fieldlist_line(line)
|
||||
|
||||
# Superclass is set here in the fieldlist rather than in LF_CLASS
|
||||
elif (match := self.SUPERCLASS_RE.match(line)) is not None:
|
||||
self._set("super", normalize_type_id(match.group("type")))
|
||||
elif self.mode == "LF_ARGLIST":
|
||||
self.read_arglist_line(line)
|
||||
|
||||
# Member offset and type given on the first of two lines.
|
||||
elif (match := self.LIST_RE.match(line)) is not None:
|
||||
self._add_member(
|
||||
int(match.group("offset")), normalize_type_id(match.group("type"))
|
||||
)
|
||||
elif self.mode in ["LF_MFUNCTION", "LF_PROCEDURE"]:
|
||||
self.read_mfunction_line(line)
|
||||
|
||||
# Name of the member read on the second of two lines.
|
||||
elif (match := self.MEMBER_RE.match(line)) is not None:
|
||||
self._set_member_name(match.group("name"))
|
||||
elif self.mode in ["LF_CLASS", "LF_STRUCTURE"]:
|
||||
self.read_class_or_struct_line(line)
|
||||
|
||||
else: # LF_CLASS or LF_STRUCTURE
|
||||
# Match the reference to the associated LF_FIELDLIST
|
||||
if (match := self.CLASS_FIELD_RE.match(line)) is not None:
|
||||
if match.group("field_type") == "0x0000":
|
||||
# Not redundant. UDT might not match the key.
|
||||
# These cases get reported as UDT mismatch.
|
||||
self._set("is_forward_ref", True)
|
||||
else:
|
||||
field_list_type = normalize_type_id(match.group("field_type"))
|
||||
self._set("field_list_type", field_list_type)
|
||||
elif self.mode == "LF_POINTER":
|
||||
self.read_pointer_line(line)
|
||||
|
||||
elif self.mode == "LF_ENUM":
|
||||
self.read_enum_line(line)
|
||||
|
||||
elif self.mode == "LF_UNION":
|
||||
self.read_union_line(line)
|
||||
|
||||
else:
|
||||
# Check for exhaustiveness
|
||||
logger.error("Unhandled data in mode: %s", self.mode)
|
||||
|
||||
def read_fieldlist_line(self, line: str):
|
||||
# If this class has a vtable, create a mock member at offset 0
|
||||
if (match := self.VTABLE_RE.match(line)) is not None:
|
||||
# For our purposes, any pointer type will do
|
||||
self._add_member(0, "T_32PVOID")
|
||||
self._set_member_name("vftable")
|
||||
|
||||
# Superclass is set here in the fieldlist rather than in LF_CLASS
|
||||
elif (match := self.SUPERCLASS_RE.match(line)) is not None:
|
||||
self._set("super", normalize_type_id(match.group("type")))
|
||||
|
||||
# Member offset and type given on the first of two lines.
|
||||
elif (match := self.LIST_RE.match(line)) is not None:
|
||||
self._add_member(
|
||||
int(match.group("offset")), normalize_type_id(match.group("type"))
|
||||
)
|
||||
|
||||
# Name of the member read on the second of two lines.
|
||||
elif (match := self.MEMBER_RE.match(line)) is not None:
|
||||
self._set_member_name(match.group("name"))
|
||||
|
||||
elif (match := self.LF_FIELDLIST_ENUMERATE.match(line)) is not None:
|
||||
self._add_variant(match.group("name"), int(match.group("value")))
|
||||
|
||||
def read_class_or_struct_line(self, line: str):
|
||||
# Match the reference to the associated LF_FIELDLIST
|
||||
if (match := self.CLASS_FIELD_RE.match(line)) is not None:
|
||||
if match.group("field_type") == "0x0000":
|
||||
# Not redundant. UDT might not match the key.
|
||||
# These cases get reported as UDT mismatch.
|
||||
self._set("is_forward_ref", True)
|
||||
else:
|
||||
field_list_type = normalize_type_id(match.group("field_type"))
|
||||
self._set("field_list_type", field_list_type)
|
||||
|
||||
elif line.lstrip().startswith("Derivation list type"):
|
||||
# We do not care about the second line, but we still match it so we see an error
|
||||
# when another line fails to match
|
||||
pass
|
||||
elif (match := self.CLASS_NAME_RE.match(line)) is not None:
|
||||
# Last line has the vital information.
|
||||
# If this is a FORWARD REF, we need to follow the UDT pointer
|
||||
# to get the actual class details.
|
||||
elif (match := self.CLASS_NAME_RE.match(line)) is not None:
|
||||
self._set("name", match.group("name"))
|
||||
self._set("udt", normalize_type_id(match.group("udt")))
|
||||
self._set("size", int(match.group("size")))
|
||||
self._set("name", match.group("name"))
|
||||
udt = match.group("udt")
|
||||
if udt is not None:
|
||||
self._set("udt", normalize_type_id(udt))
|
||||
self._set("size", int(match.group("size")))
|
||||
else:
|
||||
logger.error("Unmatched line in class: %s", line[:-1])
|
||||
|
||||
def read_arglist_line(self, line: str):
|
||||
if (match := self.LF_ARGLIST_ENTRY.match(line)) is not None:
|
||||
obj = self.keys[self.last_key]
|
||||
arglist: list = obj.setdefault("args", [])
|
||||
assert int(match.group("index")) == len(
|
||||
arglist
|
||||
), "Argument list out of sync"
|
||||
arglist.append(match.group("arg_type"))
|
||||
else:
|
||||
logger.error("Unmatched line in arglist: %s", line[:-1])
|
||||
|
||||
def read_pointer_line(self, line):
|
||||
if (match := self.LF_POINTER_ELEMENT.match(line)) is not None:
|
||||
self._set("element_type", match.group("element_type"))
|
||||
else:
|
||||
stripped_line = line.strip()
|
||||
# We don't parse these lines, but we still want to check for exhaustiveness
|
||||
# in case we missed some relevant data
|
||||
if not any(
|
||||
stripped_line.startswith(prefix)
|
||||
for prefix in ["Pointer", "const Pointer", "L-value", "volatile"]
|
||||
):
|
||||
logger.error("Unrecognized pointer attribute: %s", line[:-1])
|
||||
|
||||
def read_mfunction_line(self, line: str):
|
||||
"""
|
||||
The layout is not consistent, so we want to be as robust as possible here.
|
||||
- Example 1:
|
||||
Return type = T_LONG(0012), Call type = C Near
|
||||
Func attr = none
|
||||
- Example 2:
|
||||
Return type = T_CHAR(0010), Class type = 0x101A, This type = 0x101B,
|
||||
Call type = ThisCall, Func attr = none
|
||||
"""
|
||||
|
||||
obj = self.keys[self.last_key]
|
||||
|
||||
key_value_pairs = line.split(",")
|
||||
for pair in key_value_pairs:
|
||||
if pair.isspace():
|
||||
continue
|
||||
obj |= self.parse_function_attribute(pair)
|
||||
|
||||
def parse_function_attribute(self, pair: str) -> dict[str, str]:
|
||||
for attribute_regex in self.LF_MFUNCTION_ATTRIBUTES:
|
||||
if (match := attribute_regex.match(pair)) is not None:
|
||||
return match.groupdict()
|
||||
logger.error("Unknown attribute in function: %s", pair)
|
||||
return {}
|
||||
|
||||
def read_enum_line(self, line: str):
|
||||
obj = self.keys[self.last_key]
|
||||
|
||||
# We need special comma handling because commas may appear in the name.
|
||||
# Splitting by "," yields the wrong result.
|
||||
enum_attributes = line.split(", ")
|
||||
for pair in enum_attributes:
|
||||
if pair.endswith(","):
|
||||
pair = pair[:-1]
|
||||
if pair.isspace():
|
||||
continue
|
||||
obj |= self.parse_enum_attribute(pair)
|
||||
|
||||
def parse_enum_attribute(self, attribute: str) -> dict[str, Any]:
|
||||
for attribute_regex in self.LF_ENUM_ATTRIBUTES:
|
||||
if (match := attribute_regex.match(attribute)) is not None:
|
||||
return match.groupdict()
|
||||
if attribute == "NESTED":
|
||||
return {"is_nested": True}
|
||||
if attribute == "FORWARD REF":
|
||||
return {"is_forward_ref": True}
|
||||
if attribute.startswith("UDT"):
|
||||
match = self.LF_ENUM_UDT.match(attribute)
|
||||
assert match is not None
|
||||
return {"udt": normalize_type_id(match.group("udt"))}
|
||||
if (match := self.LF_ENUM_TYPES.match(attribute)) is not None:
|
||||
result = match.groupdict()
|
||||
result["underlying_type"] = normalize_type_id(result["underlying_type"])
|
||||
return result
|
||||
logger.error("Unknown attribute in enum: %s", attribute)
|
||||
return {}
|
||||
|
||||
def read_union_line(self, line: str):
|
||||
"""This is a rather barebones handler, only parsing the size"""
|
||||
if (match := self.LF_UNION_LINE.match(line)) is None:
|
||||
raise AssertionError(f"Unhandled in union: {line}")
|
||||
self._set("name", match.group("name"))
|
||||
if match.group("field_type") == "0x0000":
|
||||
self._set("is_forward_ref", True)
|
||||
|
||||
self._set("size", int(match.group("size")))
|
||||
self._set("udt", normalize_type_id(match.group("udt")))
|
||||
|
@@ -9,6 +9,21 @@ from isledecomp.cvdump.types import (
|
||||
)
|
||||
|
||||
TEST_LINES = """
|
||||
0x1018 : Length = 18, Leaf = 0x1201 LF_ARGLIST argument count = 3
|
||||
list[0] = 0x100D
|
||||
list[1] = 0x1016
|
||||
list[2] = 0x1017
|
||||
|
||||
0x1019 : Length = 14, Leaf = 0x1008 LF_PROCEDURE
|
||||
Return type = T_LONG(0012), Call type = C Near
|
||||
Func attr = none
|
||||
# Parms = 3, Arg list type = 0x1018
|
||||
|
||||
0x101e : Length = 26, Leaf = 0x1009 LF_MFUNCTION
|
||||
Return type = T_CHAR(0010), Class type = 0x101A, This type = 0x101B,
|
||||
Call type = ThisCall, Func attr = none
|
||||
Parms = 2, Arg list type = 0x101d, This adjust = 0
|
||||
|
||||
0x1028 : Length = 10, Leaf = 0x1001 LF_MODIFIER
|
||||
const, modifies type T_REAL32(0040)
|
||||
|
||||
@@ -47,16 +62,16 @@ TEST_LINES = """
|
||||
Element type = T_UCHAR(0020)
|
||||
Index type = T_SHORT(0011)
|
||||
length = 8
|
||||
Name =
|
||||
Name =
|
||||
|
||||
0x10ea : Length = 14, Leaf = 0x1503 LF_ARRAY
|
||||
Element type = 0x1028
|
||||
Index type = T_SHORT(0011)
|
||||
length = 12
|
||||
Name =
|
||||
Name =
|
||||
|
||||
0x11f0 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 0, class name = MxRect32, UDT(0x00001214)
|
||||
|
||||
@@ -98,22 +113,22 @@ TEST_LINES = """
|
||||
member name = 'm_bottom'
|
||||
|
||||
0x1214 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 34, field list type 0x1213, CONSTRUCTOR, OVERLOAD,
|
||||
# members = 34, field list type 0x1213, CONSTRUCTOR, OVERLOAD,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 16, class name = MxRect32, UDT(0x00001214)
|
||||
|
||||
0x1220 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 0, class name = MxCore, UDT(0x00004060)
|
||||
|
||||
0x14db : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 0, class name = MxString, UDT(0x00004db6)
|
||||
|
||||
0x19b0 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 0, class name = ROIColorAlias, UDT(0x00002a76)
|
||||
|
||||
@@ -123,6 +138,12 @@ TEST_LINES = """
|
||||
length = 440
|
||||
Name =
|
||||
|
||||
0x2339 : Length = 26, Leaf = 0x1506 LF_UNION
|
||||
# members = 0, field list type 0x0000, FORWARD REF, Size = 0 ,class name = FlagBitfield, UDT(0x00002e85)
|
||||
|
||||
0x2e85 : Length = 26, Leaf = 0x1506 LF_UNION
|
||||
# members = 8, field list type 0x2e84, Size = 1 ,class name = FlagBitfield, UDT(0x00002e85)
|
||||
|
||||
0x2a75 : Length = 98, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_MEMBER, public, type = T_32PRCHAR(0470), offset = 0
|
||||
member name = 'm_name'
|
||||
@@ -136,18 +157,18 @@ TEST_LINES = """
|
||||
member name = 'm_unk0x10'
|
||||
|
||||
0x2a76 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
|
||||
# members = 5, field list type 0x2a75,
|
||||
# members = 5, field list type 0x2a75,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 20, class name = ROIColorAlias, UDT(0x00002a76)
|
||||
|
||||
0x22d4 : Length = 154, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_VFUNCTAB, type = 0x20FC
|
||||
list[1] = LF_METHOD, count = 3, list = 0x22D0, name = 'MxVariable'
|
||||
list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F0F,
|
||||
list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F0F,
|
||||
vfptr offset = 0, name = 'GetValue'
|
||||
list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F10,
|
||||
list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F10,
|
||||
vfptr offset = 4, name = 'SetValue'
|
||||
list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F11,
|
||||
list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F11,
|
||||
vfptr offset = 8, name = '~MxVariable'
|
||||
list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x22D3, name = 'GetKey'
|
||||
list[6] = LF_MEMBER, protected, type = 0x14DB, offset = 4
|
||||
@@ -156,10 +177,15 @@ TEST_LINES = """
|
||||
member name = 'm_value'
|
||||
|
||||
0x22d5 : Length = 34, Leaf = 0x1504 LF_CLASS
|
||||
# members = 10, field list type 0x22d4, CONSTRUCTOR,
|
||||
# members = 10, field list type 0x22d4, CONSTRUCTOR,
|
||||
Derivation list type 0x0000, VT shape type 0x20fb
|
||||
Size = 36, class name = MxVariable, UDT(0x00004041)
|
||||
|
||||
0x3c45 : Length = 50, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_ENUMERATE, public, value = 1, name = 'c_read'
|
||||
list[1] = LF_ENUMERATE, public, value = 2, name = 'c_write'
|
||||
list[2] = LF_ENUMERATE, public, value = 4, name = 'c_text'
|
||||
|
||||
0x3cc2 : Length = 38, Leaf = 0x1507 LF_ENUM
|
||||
# members = 64, type = T_INT4(0074) field list type 0x3cc1
|
||||
NESTED, enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2)
|
||||
@@ -171,22 +197,22 @@ NESTED, enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2)
|
||||
0x405f : Length = 158, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_VFUNCTAB, type = 0x2090
|
||||
list[1] = LF_ONEMETHOD, public, VANILLA, index = 0x176A, name = 'MxCore'
|
||||
list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176A,
|
||||
list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176A,
|
||||
vfptr offset = 0, name = '~MxCore'
|
||||
list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176B,
|
||||
list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176B,
|
||||
vfptr offset = 4, name = 'Notify'
|
||||
list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2087,
|
||||
list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2087,
|
||||
vfptr offset = 8, name = 'Tickle'
|
||||
list[5] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x202F,
|
||||
list[5] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x202F,
|
||||
vfptr offset = 12, name = 'ClassName'
|
||||
list[6] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2030,
|
||||
list[6] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2030,
|
||||
vfptr offset = 16, name = 'IsA'
|
||||
list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x2091, name = 'GetId'
|
||||
list[8] = LF_MEMBER, private, type = T_UINT4(0075), offset = 4
|
||||
member name = 'm_id'
|
||||
|
||||
0x4060 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 9, field list type 0x405f, CONSTRUCTOR,
|
||||
# members = 9, field list type 0x405f, CONSTRUCTOR,
|
||||
Derivation list type 0x0000, VT shape type 0x1266
|
||||
Size = 8, class name = MxCore, UDT(0x00004060)
|
||||
|
||||
@@ -194,7 +220,7 @@ NESTED, enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2)
|
||||
Element type = 0x3CC2
|
||||
Index type = T_SHORT(0011)
|
||||
length = 24
|
||||
Name =
|
||||
Name =
|
||||
|
||||
0x432f : Length = 14, Leaf = 0x1503 LF_ARRAY
|
||||
Element type = T_INT4(0074)
|
||||
@@ -220,7 +246,7 @@ NESTED, enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2)
|
||||
member name = 'm_length'
|
||||
|
||||
0x4db6 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 16, field list type 0x4db5, CONSTRUCTOR, OVERLOAD,
|
||||
# members = 16, field list type 0x4db5, CONSTRUCTOR, OVERLOAD,
|
||||
Derivation list type 0x0000, VT shape type 0x1266
|
||||
Size = 16, class name = MxString, UDT(0x00004db6)
|
||||
"""
|
||||
@@ -235,7 +261,7 @@ def types_parser_fixture():
|
||||
return parser
|
||||
|
||||
|
||||
def test_basic_parsing(parser):
|
||||
def test_basic_parsing(parser: CvdumpTypesParser):
|
||||
obj = parser.keys["0x4db6"]
|
||||
assert obj["type"] == "LF_CLASS"
|
||||
assert obj["name"] == "MxString"
|
||||
@@ -244,7 +270,7 @@ def test_basic_parsing(parser):
|
||||
assert len(parser.keys["0x4db5"]["members"]) == 2
|
||||
|
||||
|
||||
def test_scalar_types(parser):
|
||||
def test_scalar_types(parser: CvdumpTypesParser):
|
||||
"""Full tests on the scalar_* methods are in another file.
|
||||
Here we are just testing the passthrough of the "T_" types."""
|
||||
assert parser.get("T_CHAR").name is None
|
||||
@@ -254,7 +280,7 @@ def test_scalar_types(parser):
|
||||
assert parser.get("T_32PVOID").size == 4
|
||||
|
||||
|
||||
def test_resolve_forward_ref(parser):
|
||||
def test_resolve_forward_ref(parser: CvdumpTypesParser):
|
||||
# Non-forward ref
|
||||
assert parser.get("0x22d5").name == "MxVariable"
|
||||
# Forward ref
|
||||
@@ -262,7 +288,7 @@ def test_resolve_forward_ref(parser):
|
||||
assert parser.get("0x14db").size == 16
|
||||
|
||||
|
||||
def test_members(parser):
|
||||
def test_members(parser: CvdumpTypesParser):
|
||||
"""Return the list of items to compare for a given complex type.
|
||||
If the class has a superclass, add those members too."""
|
||||
# MxCore field list
|
||||
@@ -284,7 +310,7 @@ def test_members(parser):
|
||||
]
|
||||
|
||||
|
||||
def test_members_recursive(parser):
|
||||
def test_members_recursive(parser: CvdumpTypesParser):
|
||||
"""Make sure that we unwrap the dependency tree correctly."""
|
||||
# MxVariable field list
|
||||
assert parser.get_scalars("0x22d4") == [
|
||||
@@ -300,7 +326,7 @@ def test_members_recursive(parser):
|
||||
]
|
||||
|
||||
|
||||
def test_struct(parser):
|
||||
def test_struct(parser: CvdumpTypesParser):
|
||||
"""Basic test for converting type into struct.unpack format string."""
|
||||
# MxCore: vftable and uint32. The vftable pointer is read as uint32.
|
||||
assert parser.get_format_string("0x4060") == "<LL"
|
||||
@@ -312,7 +338,7 @@ def test_struct(parser):
|
||||
assert parser.get_format_string("0x1214") == "<llll"
|
||||
|
||||
|
||||
def test_struct_padding(parser):
|
||||
def test_struct_padding(parser: CvdumpTypesParser):
|
||||
"""For data comparison purposes, make sure we have no gaps in the
|
||||
list of scalar types. Any gap is filled by an unsigned char."""
|
||||
|
||||
@@ -326,7 +352,7 @@ def test_struct_padding(parser):
|
||||
assert len(parser.get_scalars_gapless("0x22d5")) == 13
|
||||
|
||||
|
||||
def test_struct_format_string(parser):
|
||||
def test_struct_format_string(parser: CvdumpTypesParser):
|
||||
"""Generate the struct.unpack format string using the
|
||||
list of scalars with padding filled in."""
|
||||
# MxString, padded to 16 bytes.
|
||||
@@ -336,7 +362,7 @@ def test_struct_format_string(parser):
|
||||
assert parser.get_format_string("0x22d5") == "<LLLLHBBLLLHBB"
|
||||
|
||||
|
||||
def test_array(parser):
|
||||
def test_array(parser: CvdumpTypesParser):
|
||||
"""LF_ARRAY members are created dynamically based on the
|
||||
total array size and the size of one element."""
|
||||
# unsigned char[8]
|
||||
@@ -360,7 +386,7 @@ def test_array(parser):
|
||||
]
|
||||
|
||||
|
||||
def test_2d_array(parser):
|
||||
def test_2d_array(parser: CvdumpTypesParser):
|
||||
"""Make sure 2d array elements are named as we expect."""
|
||||
# float[4][4]
|
||||
float_array = parser.get_scalars("0x103c")
|
||||
@@ -371,7 +397,7 @@ def test_2d_array(parser):
|
||||
assert float_array[-1] == (60, "[3][3]", "T_REAL32")
|
||||
|
||||
|
||||
def test_enum(parser):
|
||||
def test_enum(parser: CvdumpTypesParser):
|
||||
"""LF_ENUM should equal 4-byte int"""
|
||||
assert parser.get("0x3cc2").size == 4
|
||||
assert parser.get_scalars("0x3cc2") == [(0, None, "T_INT4")]
|
||||
@@ -382,7 +408,7 @@ def test_enum(parser):
|
||||
assert enum_array[0].size == 4
|
||||
|
||||
|
||||
def test_lf_pointer(parser):
|
||||
def test_lf_pointer(parser: CvdumpTypesParser):
|
||||
"""LF_POINTER is just a wrapper for scalar pointer type"""
|
||||
assert parser.get("0x3fab").size == 4
|
||||
# assert parser.get("0x3fab").is_pointer is True # TODO: ?
|
||||
@@ -390,7 +416,7 @@ def test_lf_pointer(parser):
|
||||
assert parser.get_scalars("0x3fab") == [(0, None, "T_32PVOID")]
|
||||
|
||||
|
||||
def test_key_not_exist(parser):
|
||||
def test_key_not_exist(parser: CvdumpTypesParser):
|
||||
"""Accessing a non-existent type id should raise our exception"""
|
||||
with pytest.raises(CvdumpKeyError):
|
||||
parser.get("0xbeef")
|
||||
@@ -399,7 +425,7 @@ def test_key_not_exist(parser):
|
||||
parser.get_scalars("0xbeef")
|
||||
|
||||
|
||||
def test_broken_forward_ref(parser):
|
||||
def test_broken_forward_ref(parser: CvdumpTypesParser):
|
||||
"""Raise an exception if we cannot follow a forward reference"""
|
||||
# Verify forward reference on MxCore
|
||||
parser.get("0x1220")
|
||||
@@ -412,7 +438,7 @@ def test_broken_forward_ref(parser):
|
||||
parser.get("0x1220")
|
||||
|
||||
|
||||
def test_null_forward_ref(parser):
|
||||
def test_null_forward_ref(parser: CvdumpTypesParser):
|
||||
"""If the forward ref object is invalid and has no forward ref id,
|
||||
raise an exception."""
|
||||
# Test MxString forward reference
|
||||
@@ -426,7 +452,7 @@ def test_null_forward_ref(parser):
|
||||
parser.get("0x14db")
|
||||
|
||||
|
||||
def test_broken_array_element_ref(parser):
|
||||
def test_broken_array_element_ref(parser: CvdumpTypesParser):
|
||||
# Test LF_ARRAY of ROIColorAlias
|
||||
parser.get("0x19b1")
|
||||
|
||||
@@ -438,7 +464,7 @@ def test_broken_array_element_ref(parser):
|
||||
parser.get("0x19b1")
|
||||
|
||||
|
||||
def test_lf_modifier(parser):
|
||||
def test_lf_modifier(parser: CvdumpTypesParser):
|
||||
"""Is this an alias for another type?"""
|
||||
# Modifies float
|
||||
assert parser.get("0x1028").size == 4
|
||||
@@ -449,7 +475,7 @@ def test_lf_modifier(parser):
|
||||
assert mxrect == parser.get_scalars("0x11f2")
|
||||
|
||||
|
||||
def test_union_members(parser):
|
||||
def test_union_members(parser: CvdumpTypesParser):
|
||||
"""If there is a union somewhere in our dependency list, we can
|
||||
expect to see duplicated member offsets and names. This is ok for
|
||||
the TypeInfo tuple, but the list of ScalarType items should have
|
||||
@@ -457,9 +483,71 @@ def test_union_members(parser):
|
||||
|
||||
# D3DVector type with duplicated offsets
|
||||
d3dvector = parser.get("0x10e1")
|
||||
assert d3dvector.members is not None
|
||||
assert len(d3dvector.members) == 6
|
||||
assert len([m for m in d3dvector.members if m.offset == 0]) == 2
|
||||
|
||||
# Deduplicated comparison list
|
||||
vector_items = parser.get_scalars("0x10e1")
|
||||
assert len(vector_items) == 3
|
||||
|
||||
|
||||
def test_arglist(parser: CvdumpTypesParser):
|
||||
arglist = parser.keys["0x1018"]
|
||||
assert arglist["argcount"] == 3
|
||||
assert arglist["args"] == ["0x100D", "0x1016", "0x1017"]
|
||||
|
||||
|
||||
def test_procedure(parser: CvdumpTypesParser):
|
||||
procedure = parser.keys["0x1019"]
|
||||
assert procedure == {
|
||||
"type": "LF_PROCEDURE",
|
||||
"return_type": "T_LONG(0012)",
|
||||
"call_type": "C Near",
|
||||
"func_attr": "none",
|
||||
"num_params": "3",
|
||||
"arg_list_type": "0x1018",
|
||||
}
|
||||
|
||||
|
||||
def test_mfunction(parser: CvdumpTypesParser):
|
||||
mfunction = parser.keys["0x101e"]
|
||||
assert mfunction == {
|
||||
"type": "LF_MFUNCTION",
|
||||
"return_type": "T_CHAR(0010)",
|
||||
"class_type": "0x101A",
|
||||
"this_type": "0x101B",
|
||||
"call_type": "ThisCall",
|
||||
"func_attr": "none",
|
||||
"num_params": "2",
|
||||
"arg_list_type": "0x101d",
|
||||
"this_adjust": "0",
|
||||
}
|
||||
|
||||
|
||||
def test_union_forward_ref(parser: CvdumpTypesParser):
|
||||
union = parser.keys["0x2339"]
|
||||
assert union["is_forward_ref"] is True
|
||||
assert union["udt"] == "0x2e85"
|
||||
|
||||
|
||||
def test_union(parser: CvdumpTypesParser):
|
||||
union = parser.keys["0x2e85"]
|
||||
assert union == {
|
||||
"type": "LF_UNION",
|
||||
"name": "FlagBitfield",
|
||||
"size": 1,
|
||||
"udt": "0x2e85",
|
||||
}
|
||||
|
||||
|
||||
def test_fieldlist_enumerate(parser: CvdumpTypesParser):
|
||||
fieldlist_enum = parser.keys["0x3c45"]
|
||||
assert fieldlist_enum == {
|
||||
"type": "LF_FIELDLIST",
|
||||
"variants": [
|
||||
{"name": "c_read", "value": 1},
|
||||
{"name": "c_write", "value": 2},
|
||||
{"name": "c_text", "value": 4},
|
||||
],
|
||||
}
|
||||
|
Reference in New Issue
Block a user