mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-23 00:14:22 +00:00
Improve handling of variables for decomp parser (#376)
* Parser refactor: - Handling LIRBARY and STRING markers - Extracting global variable name for future comparison - Marking function static variables - More fluent error messages * String constants annotated with STRING * fix variable name * Should compare LIBRARY markers
This commit is contained in:
@@ -39,6 +39,14 @@ class ParserError(Enum):
|
||||
# WARN: We found a marker to be referenced by name outside of a header file.
|
||||
BYNAME_FUNCTION_IN_CPP = 109
|
||||
|
||||
# WARN: A GLOBAL marker appeared over a variable without the g_ prefix
|
||||
GLOBAL_MISSING_PREFIX = 110
|
||||
|
||||
# WARN: GLOBAL marker points at something other than variable declaration.
|
||||
# We can't match global variables based on position, but the goal here is
|
||||
# to ignore things like string literal that are not variables.
|
||||
GLOBAL_NOT_VARIABLE = 111
|
||||
|
||||
# This code or higher is an error, not a warning
|
||||
DECOMP_ERROR_START = 200
|
||||
|
||||
@@ -50,13 +58,18 @@ class ParserError(Enum):
|
||||
# For example, a GLOBAL cannot follow FUNCTION/STUB
|
||||
INCOMPATIBLE_MARKER = 201
|
||||
|
||||
# ERROR: The line following a synthetic marker was not a comment
|
||||
BAD_SYNTHETIC = 202
|
||||
# ERROR: The line following an explicit by-name marker was not a comment
|
||||
# We assume a syntax error here rather than try to use the next line
|
||||
BAD_NAMEREF = 202
|
||||
|
||||
# ERROR: This function offset comes before the previous offset from the same module
|
||||
# This hopefully gives some hint about which functions need to be rearranged.
|
||||
FUNCTION_OUT_OF_ORDER = 203
|
||||
|
||||
# ERROR: The line following an explicit by-name marker that does _not_ expect
|
||||
# a comment -- i.e. VTABLE or GLOBAL -- could not extract the name
|
||||
NO_SUITABLE_NAME = 204
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserAlert:
|
||||
|
103
tools/isledecomp/isledecomp/parser/marker.py
Normal file
103
tools/isledecomp/isledecomp/parser/marker.py
Normal file
@@ -0,0 +1,103 @@
|
||||
import re
|
||||
from typing import Optional
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class MarkerType(Enum):
|
||||
UNKNOWN = -100
|
||||
FUNCTION = 1
|
||||
STUB = 2
|
||||
SYNTHETIC = 3
|
||||
TEMPLATE = 4
|
||||
GLOBAL = 5
|
||||
VTABLE = 6
|
||||
STRING = 7
|
||||
LIBRARY = 8
|
||||
|
||||
|
||||
markerRegex = re.compile(
|
||||
r"\s*//\s*(?P<type>\w+):\s*(?P<module>\w+)\s+(?P<offset>0x[a-f0-9]+)",
|
||||
flags=re.I,
|
||||
)
|
||||
|
||||
|
||||
markerExactRegex = re.compile(
|
||||
r"\s*// (?P<type>[A-Z]+): (?P<module>[A-Z0-9]+) (?P<offset>0x[a-f0-9]+)$"
|
||||
)
|
||||
|
||||
|
||||
class DecompMarker:
|
||||
def __init__(self, marker_type: str, module: str, offset: int) -> None:
|
||||
try:
|
||||
self._type = MarkerType[marker_type.upper()]
|
||||
except KeyError:
|
||||
self._type = MarkerType.UNKNOWN
|
||||
|
||||
# Convert to upper here. A lot of other analysis depends on this name
|
||||
# being consistent and predictable. If the name is _not_ capitalized
|
||||
# we will emit a syntax error.
|
||||
self._module: str = module.upper()
|
||||
self._offset: int = offset
|
||||
|
||||
@property
|
||||
def type(self) -> MarkerType:
|
||||
return self._type
|
||||
|
||||
@property
|
||||
def module(self) -> str:
|
||||
return self._module
|
||||
|
||||
@property
|
||||
def offset(self) -> int:
|
||||
return self._offset
|
||||
|
||||
def is_regular_function(self) -> bool:
|
||||
"""Regular function, meaning: not an explicit byname lookup. FUNCTION
|
||||
markers can be _implicit_ byname.
|
||||
FUNCTION and STUB markers are (currently) the only heterogenous marker types that
|
||||
can be lumped together, although the reasons for doing so are a little vague."""
|
||||
return self._type in (MarkerType.FUNCTION, MarkerType.STUB)
|
||||
|
||||
def is_explicit_byname(self) -> bool:
|
||||
return self._type in (
|
||||
MarkerType.SYNTHETIC,
|
||||
MarkerType.TEMPLATE,
|
||||
MarkerType.LIBRARY,
|
||||
)
|
||||
|
||||
def is_variable(self) -> bool:
|
||||
return self._type == MarkerType.GLOBAL
|
||||
|
||||
def is_synthetic(self) -> bool:
|
||||
return self._type == MarkerType.SYNTHETIC
|
||||
|
||||
def is_template(self) -> bool:
|
||||
return self._type == MarkerType.TEMPLATE
|
||||
|
||||
def is_vtable(self) -> bool:
|
||||
return self._type == MarkerType.VTABLE
|
||||
|
||||
def is_library(self) -> bool:
|
||||
return self._type == MarkerType.LIBRARY
|
||||
|
||||
def is_string(self) -> bool:
|
||||
return self._type == MarkerType.STRING
|
||||
|
||||
def allowed_in_func(self) -> bool:
|
||||
return self._type in (MarkerType.GLOBAL, MarkerType.STRING)
|
||||
|
||||
|
||||
def match_marker(line: str) -> Optional[DecompMarker]:
|
||||
match = markerRegex.match(line)
|
||||
if match is None:
|
||||
return None
|
||||
|
||||
return DecompMarker(
|
||||
marker_type=match.group("type"),
|
||||
module=match.group("module"),
|
||||
offset=int(match.group("offset"), 16),
|
||||
)
|
||||
|
||||
|
||||
def is_marker_exact(line: str) -> bool:
|
||||
return markerExactRegex.match(line) is not None
|
@@ -1,35 +1,57 @@
|
||||
from typing import Optional
|
||||
from dataclasses import dataclass
|
||||
from .marker import MarkerType
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserNode:
|
||||
class ParserSymbol:
|
||||
"""Exported decomp marker with all information (except the code filename) required to
|
||||
cross-reference with cvdump data."""
|
||||
|
||||
type: MarkerType
|
||||
line_number: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserSymbol(ParserNode):
|
||||
module: str
|
||||
offset: int
|
||||
name: str
|
||||
|
||||
# The parser doesn't (currently) know about the code filename, but if you
|
||||
# wanted to set it here after the fact, here's the spot.
|
||||
filename: Optional[str] = None
|
||||
|
||||
def should_skip(self) -> bool:
|
||||
"""The default is to compare any symbols we have"""
|
||||
return False
|
||||
|
||||
def is_nameref(self) -> bool:
|
||||
"""All symbols default to name lookup"""
|
||||
return True
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserFunction(ParserSymbol):
|
||||
name: str
|
||||
# We are able to detect the closing line of a function with some reliability.
|
||||
# This isn't used for anything right now, but perhaps later it will be.
|
||||
end_line: Optional[int] = None
|
||||
|
||||
# All marker types are referenced by name except FUNCTION/STUB. These can also be
|
||||
# referenced by name, but only if this flag is true.
|
||||
lookup_by_name: bool = False
|
||||
is_stub: bool = False
|
||||
is_synthetic: bool = False
|
||||
is_template: bool = False
|
||||
end_line: int = -1
|
||||
|
||||
def should_skip(self) -> bool:
|
||||
return self.type == MarkerType.STUB
|
||||
|
||||
def is_nameref(self) -> bool:
|
||||
return (
|
||||
self.type in (MarkerType.SYNTHETIC, MarkerType.TEMPLATE, MarkerType.LIBRARY)
|
||||
or self.lookup_by_name
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserVariable(ParserSymbol):
|
||||
name: str
|
||||
size: int = -1
|
||||
is_static: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserVtable(ParserSymbol):
|
||||
class_name: str
|
||||
num_entries: int = -1
|
||||
pass
|
||||
|
@@ -3,15 +3,19 @@
|
||||
from typing import List, Iterable, Iterator
|
||||
from enum import Enum
|
||||
from .util import (
|
||||
DecompMarker,
|
||||
is_blank_or_comment,
|
||||
match_marker,
|
||||
is_marker_exact,
|
||||
get_class_name,
|
||||
get_variable_name,
|
||||
get_synthetic_name,
|
||||
remove_trailing_comment,
|
||||
)
|
||||
from .marker import (
|
||||
DecompMarker,
|
||||
match_marker,
|
||||
is_marker_exact,
|
||||
)
|
||||
from .node import (
|
||||
ParserSymbol,
|
||||
ParserFunction,
|
||||
ParserVariable,
|
||||
ParserVtable,
|
||||
@@ -28,44 +32,23 @@ class ReaderState(Enum):
|
||||
IN_GLOBAL = 5
|
||||
IN_FUNC_GLOBAL = 6
|
||||
IN_VTABLE = 7
|
||||
IN_SYNTHETIC = 8
|
||||
IN_LIBRARY = 9
|
||||
DONE = 100
|
||||
|
||||
|
||||
def marker_is_stub(marker: DecompMarker) -> bool:
|
||||
return marker.type.upper() == "STUB"
|
||||
|
||||
|
||||
def marker_is_variable(marker: DecompMarker) -> bool:
|
||||
return marker.type.upper() == "GLOBAL"
|
||||
|
||||
|
||||
def marker_is_synthetic(marker: DecompMarker) -> bool:
|
||||
return marker.type.upper() in ("SYNTHETIC", "TEMPLATE")
|
||||
|
||||
|
||||
def marker_is_template(marker: DecompMarker) -> bool:
|
||||
return marker.type.upper() == "TEMPLATE"
|
||||
|
||||
|
||||
def marker_is_function(marker: DecompMarker) -> bool:
|
||||
return marker.type.upper() in ("FUNCTION", "STUB")
|
||||
|
||||
|
||||
def marker_is_vtable(marker: DecompMarker) -> bool:
|
||||
return marker.type.upper() == "VTABLE"
|
||||
|
||||
|
||||
class MarkerDict:
|
||||
def __init__(self) -> None:
|
||||
self.markers: dict = {}
|
||||
|
||||
def insert(self, marker: DecompMarker) -> bool:
|
||||
"""Return True if this insert would overwrite"""
|
||||
module = marker.module.upper()
|
||||
module = marker.module
|
||||
if module in self.markers:
|
||||
return True
|
||||
|
||||
self.markers[module] = (marker.type, marker.offset)
|
||||
# TODO: type converted back to string version here instead of using enum
|
||||
self.markers[module] = (marker.type.name, marker.offset)
|
||||
return False
|
||||
|
||||
def iter(self) -> Iterator[DecompMarker]:
|
||||
@@ -82,9 +65,7 @@ class DecompParser:
|
||||
# but not right now
|
||||
def __init__(self) -> None:
|
||||
# The lists to be populated as we parse
|
||||
self.functions: List[ParserFunction] = []
|
||||
self.vtables: List[ParserVtable] = []
|
||||
self.variables: List[ParserVariable] = []
|
||||
self._symbols: List[ParserSymbol] = []
|
||||
self.alerts: List[ParserAlert] = []
|
||||
|
||||
self.line_number: int = 0
|
||||
@@ -113,9 +94,7 @@ class DecompParser:
|
||||
self.function_sig: str = ""
|
||||
|
||||
def reset(self):
|
||||
self.functions = []
|
||||
self.vtables = []
|
||||
self.variables = []
|
||||
self._symbols = []
|
||||
self.alerts = []
|
||||
|
||||
self.line_number = 0
|
||||
@@ -131,6 +110,18 @@ class DecompParser:
|
||||
self.function_start = 0
|
||||
self.function_sig = ""
|
||||
|
||||
@property
|
||||
def functions(self) -> List[ParserSymbol]:
|
||||
return [s for s in self._symbols if isinstance(s, ParserFunction)]
|
||||
|
||||
@property
|
||||
def vtables(self) -> List[ParserSymbol]:
|
||||
return [s for s in self._symbols if isinstance(s, ParserVtable)]
|
||||
|
||||
@property
|
||||
def variables(self) -> List[ParserSymbol]:
|
||||
return [s for s in self._symbols if isinstance(s, ParserVariable)]
|
||||
|
||||
def _recover(self):
|
||||
"""We hit a syntax error and need to reset temp structures"""
|
||||
self.state = ReaderState.SEARCH
|
||||
@@ -159,10 +150,17 @@ class DecompParser:
|
||||
self._syntax_warning(ParserError.DUPLICATE_MODULE)
|
||||
self.state = ReaderState.WANT_SIG
|
||||
|
||||
def _synthetic_marker(self, marker: DecompMarker):
|
||||
def _nameref_marker(self, marker: DecompMarker):
|
||||
"""Functions explicitly referenced by name are set here"""
|
||||
if self.fun_markers.insert(marker):
|
||||
self._syntax_warning(ParserError.DUPLICATE_MODULE)
|
||||
self.state = ReaderState.IN_TEMPLATE
|
||||
|
||||
if marker.is_template():
|
||||
self.state = ReaderState.IN_TEMPLATE
|
||||
elif marker.is_synthetic():
|
||||
self.state = ReaderState.IN_SYNTHETIC
|
||||
else:
|
||||
self.state = ReaderState.IN_LIBRARY
|
||||
|
||||
def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False):
|
||||
end_line = self.line_number
|
||||
@@ -173,16 +171,14 @@ class DecompParser:
|
||||
end_line -= 1
|
||||
|
||||
for marker in self.fun_markers.iter():
|
||||
self.functions.append(
|
||||
self._symbols.append(
|
||||
ParserFunction(
|
||||
type=marker.type,
|
||||
line_number=self.function_start,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
lookup_by_name=lookup_by_name,
|
||||
is_stub=marker_is_stub(marker),
|
||||
is_synthetic=marker_is_synthetic(marker),
|
||||
is_template=marker_is_template(marker),
|
||||
name=self.function_sig,
|
||||
lookup_by_name=lookup_by_name,
|
||||
end_line=end_line,
|
||||
)
|
||||
)
|
||||
@@ -202,12 +198,13 @@ class DecompParser:
|
||||
class_name = self.last_line.strip()
|
||||
|
||||
for marker in self.tbl_markers.iter():
|
||||
self.vtables.append(
|
||||
self._symbols.append(
|
||||
ParserVtable(
|
||||
type=marker.type,
|
||||
line_number=self.line_number,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
class_name=class_name,
|
||||
name=class_name,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -223,14 +220,19 @@ class DecompParser:
|
||||
else:
|
||||
self.state = ReaderState.IN_GLOBAL
|
||||
|
||||
def _variable_done(self):
|
||||
def _variable_done(self, name: str):
|
||||
if not name.startswith("g_"):
|
||||
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
|
||||
|
||||
for marker in self.var_markers.iter():
|
||||
self.variables.append(
|
||||
self._symbols.append(
|
||||
ParserVariable(
|
||||
type=marker.type,
|
||||
line_number=self.line_number,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
name=self.last_line.strip(),
|
||||
name=name,
|
||||
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -246,12 +248,23 @@ class DecompParser:
|
||||
self._syntax_error(ParserError.UNEXPECTED_MARKER)
|
||||
return
|
||||
|
||||
# If we are inside a function, the only markers we accept are:
|
||||
# GLOBAL, indicating a static variable
|
||||
# STRING, indicating a literal string.
|
||||
# Otherwise we assume that the parser missed the end of the function
|
||||
# and we have moved on to something else.
|
||||
# This is unlikely to occur with well-formed code, but
|
||||
# we can recover easily by just ending the function here.
|
||||
if self.state == ReaderState.IN_FUNC and not marker.allowed_in_func():
|
||||
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
||||
self._function_done(unexpected=True)
|
||||
|
||||
# TODO: How uncertain are we of detecting the end of a function
|
||||
# in a clang-formatted file? For now we assume we have missed the
|
||||
# end if we detect a non-GLOBAL marker while state is IN_FUNC.
|
||||
# Maybe these cases should be syntax errors instead
|
||||
|
||||
if marker_is_function(marker):
|
||||
if marker.is_regular_function():
|
||||
if self.state in (
|
||||
ReaderState.SEARCH,
|
||||
ReaderState.WANT_SIG,
|
||||
@@ -259,29 +272,41 @@ class DecompParser:
|
||||
# We will allow multiple offsets if we have just begun
|
||||
# the code block, but not after we hit the curly brace.
|
||||
self._function_marker(marker)
|
||||
elif self.state == ReaderState.IN_FUNC:
|
||||
# We hit another offset unexpectedly.
|
||||
# We can recover easily by just ending the function here.
|
||||
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
||||
self._function_done(unexpected=True)
|
||||
|
||||
# Start the next function right after so we can
|
||||
# read the next line.
|
||||
self._function_marker(marker)
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker_is_synthetic(marker):
|
||||
elif marker.is_template():
|
||||
if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE):
|
||||
self._synthetic_marker(marker)
|
||||
elif self.state == ReaderState.IN_FUNC:
|
||||
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
||||
self._function_done(lookup_by_name=True, unexpected=True)
|
||||
self._synthetic_marker(marker)
|
||||
self._nameref_marker(marker)
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker_is_variable(marker):
|
||||
elif marker.is_synthetic():
|
||||
if self.state in (ReaderState.SEARCH, ReaderState.IN_SYNTHETIC):
|
||||
self._nameref_marker(marker)
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker.is_library():
|
||||
if self.state in (ReaderState.SEARCH, ReaderState.IN_LIBRARY):
|
||||
self._nameref_marker(marker)
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker.is_string():
|
||||
# TODO: We are ignoring string markers for the moment.
|
||||
# We already have a lot of them in the codebase, though, so we'll
|
||||
# hang onto them for now in case we can use them later.
|
||||
# To match up string constants, the strategy will be:
|
||||
# 1. Use cvdump to find all string constants in the recomp
|
||||
# 2. In the original binary, look at relocated vaddrs from .rdata
|
||||
# 3. Try to match up string data from #1 with locations in #2
|
||||
|
||||
# Throw the syntax error we would throw if we were parsing these
|
||||
if self.state not in (ReaderState.SEARCH, ReaderState.IN_FUNC):
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker.is_variable():
|
||||
if self.state in (
|
||||
ReaderState.SEARCH,
|
||||
ReaderState.IN_GLOBAL,
|
||||
@@ -292,13 +317,9 @@ class DecompParser:
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker_is_vtable(marker):
|
||||
elif marker.is_vtable():
|
||||
if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE):
|
||||
self._vtable_marker(marker)
|
||||
elif self.state == ReaderState.IN_FUNC:
|
||||
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
||||
self._function_done(unexpected=True)
|
||||
self._vtable_marker(marker)
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
@@ -322,12 +343,16 @@ class DecompParser:
|
||||
return
|
||||
|
||||
line_strip = line.strip()
|
||||
if self.state == ReaderState.IN_TEMPLATE:
|
||||
# TEMPLATE functions are a special case. The signature is
|
||||
# given on the next line (in a // comment)
|
||||
if self.state in (
|
||||
ReaderState.IN_SYNTHETIC,
|
||||
ReaderState.IN_TEMPLATE,
|
||||
ReaderState.IN_LIBRARY,
|
||||
):
|
||||
# Explicit nameref functions provide the function name
|
||||
# on the next line (in a // comment)
|
||||
name = get_synthetic_name(line)
|
||||
if name is None:
|
||||
self._syntax_error(ParserError.BAD_SYNTHETIC)
|
||||
self._syntax_error(ParserError.BAD_NAMEREF)
|
||||
else:
|
||||
self.function_sig = name
|
||||
self._function_starts_here()
|
||||
@@ -384,8 +409,28 @@ class DecompParser:
|
||||
self._function_done()
|
||||
|
||||
elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL):
|
||||
if not is_blank_or_comment(line):
|
||||
self._variable_done()
|
||||
# TODO: Known problem that an error here will cause us to abandon a
|
||||
# function we have already parsed if state == IN_FUNC_GLOBAL.
|
||||
# However, we are not tolerant of _any_ syntax problems in our
|
||||
# CI actions, so the solution is to just fix the invalid marker.
|
||||
if is_blank_or_comment(line):
|
||||
self._syntax_error(ParserError.NO_SUITABLE_NAME)
|
||||
return
|
||||
|
||||
# We don't have a foolproof mechanism to tell what is and is not a variable.
|
||||
# If the GLOBAL is being declared on a `return` statement, though, this is
|
||||
# not correct. It is either a string literal (which will be handled differently)
|
||||
# or it is not the variable declaration, which is incorrect decomp syntax.
|
||||
if line.strip().startswith("return"):
|
||||
self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE)
|
||||
return
|
||||
|
||||
name = get_variable_name(line)
|
||||
if name is None:
|
||||
self._syntax_error(ParserError.NO_SUITABLE_NAME)
|
||||
return
|
||||
|
||||
self._variable_done(name)
|
||||
|
||||
elif self.state == ReaderState.IN_VTABLE:
|
||||
vtable_class = get_class_name(line)
|
||||
|
@@ -1,17 +1,6 @@
|
||||
# C++ Parser utility functions and data structures
|
||||
from __future__ import annotations # python <3.10 compatibility
|
||||
import re
|
||||
from collections import namedtuple
|
||||
|
||||
DecompMarker = namedtuple("DecompMarker", ["type", "module", "offset"])
|
||||
|
||||
|
||||
markerRegex = re.compile(
|
||||
r"\s*//\s*(\w+):\s*(\w+)\s+(0x[a-f0-9]+)",
|
||||
flags=re.I,
|
||||
)
|
||||
|
||||
markerExactRegex = re.compile(r"\s*// ([A-Z]+): ([A-Z0-9]+) (0x[a-f0-9]+)$")
|
||||
from typing import Optional
|
||||
|
||||
# The goal here is to just read whatever is on the next line, so some
|
||||
# flexibility in the formatting seems OK
|
||||
@@ -23,7 +12,7 @@ templateCommentRegex = re.compile(r"\s*//\s+(.*)")
|
||||
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
||||
|
||||
|
||||
def get_synthetic_name(line: str) -> str | None:
|
||||
def get_synthetic_name(line: str) -> Optional[str]:
|
||||
"""Synthetic names appear on a single line comment on the line after the marker.
|
||||
If that's not what we have, return None"""
|
||||
template_match = templateCommentRegex.match(line)
|
||||
@@ -51,20 +40,6 @@ def is_blank_or_comment(line: str) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def match_marker(line: str) -> DecompMarker | None:
|
||||
match = markerRegex.match(line)
|
||||
if match is None:
|
||||
return None
|
||||
|
||||
return DecompMarker(
|
||||
type=match.group(1), module=match.group(2), offset=int(match.group(3), 16)
|
||||
)
|
||||
|
||||
|
||||
def is_marker_exact(line: str) -> bool:
|
||||
return markerExactRegex.match(line) is not None
|
||||
|
||||
|
||||
template_class_decl_regex = re.compile(
|
||||
r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)<([\w]+)\s*(\*+)?\s*>"
|
||||
)
|
||||
@@ -73,7 +48,7 @@ template_class_decl_regex = re.compile(
|
||||
class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)")
|
||||
|
||||
|
||||
def get_class_name(line: str) -> str | None:
|
||||
def get_class_name(line: str) -> Optional[str]:
|
||||
"""For VTABLE markers, extract the class name from the code line or comment
|
||||
where it appears."""
|
||||
|
||||
@@ -93,3 +68,21 @@ def get_class_name(line: str) -> str | None:
|
||||
return match.group(1)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
global_regex = re.compile(r"(?P<name>g_\w+)")
|
||||
less_strict_global_regex = re.compile(r"(?P<name>\w+)(?:\)\(|\[.*|\s*=.*|;)")
|
||||
|
||||
|
||||
def get_variable_name(line: str) -> Optional[str]:
|
||||
"""Grab the name of the variable annotated with the GLOBAL marker.
|
||||
Correct syntax would have the variable start with the prefix "g_"
|
||||
but we will try to match regardless."""
|
||||
|
||||
if (match := global_regex.search(line)) is not None:
|
||||
return match.group("name")
|
||||
|
||||
if (match := less_strict_global_regex.search(line)) is not None:
|
||||
return match.group("name")
|
||||
|
||||
return None
|
||||
|
@@ -115,7 +115,7 @@ def test_different_markers_same_module(parser):
|
||||
|
||||
# Use first marker declaration, don't replace
|
||||
assert len(parser.functions) == 1
|
||||
assert parser.functions[0].is_stub is False
|
||||
assert parser.functions[0].should_skip() is False
|
||||
|
||||
# Should alert to this
|
||||
assert len(parser.alerts) == 1
|
||||
@@ -193,7 +193,7 @@ def test_multiple_vtables(parser):
|
||||
)
|
||||
assert len(parser.alerts) == 0
|
||||
assert len(parser.vtables) == 2
|
||||
assert parser.vtables[0].class_name == "MxString"
|
||||
assert parser.vtables[0].name == "MxString"
|
||||
|
||||
|
||||
def test_multiple_vtables_same_module(parser):
|
||||
@@ -247,7 +247,7 @@ def test_synthetic_no_comment(parser):
|
||||
)
|
||||
assert len(parser.functions) == 0
|
||||
assert len(parser.alerts) == 1
|
||||
assert parser.alerts[0].code == ParserError.BAD_SYNTHETIC
|
||||
assert parser.alerts[0].code == ParserError.BAD_NAMEREF
|
||||
assert parser.state == ReaderState.SEARCH
|
||||
|
||||
|
||||
@@ -375,3 +375,70 @@ def test_unexpected_eof(parser):
|
||||
assert len(parser.functions) == 1
|
||||
assert len(parser.alerts) == 1
|
||||
assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE
|
||||
|
||||
|
||||
def test_global_variable_prefix(parser):
|
||||
"""Global and static variables should have the g_ prefix."""
|
||||
parser.read_lines(
|
||||
[
|
||||
"// GLOBAL: TEST 0x1234",
|
||||
'const char* g_msg = "hello";',
|
||||
]
|
||||
)
|
||||
assert len(parser.variables) == 1
|
||||
assert len(parser.alerts) == 0
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"// GLOBAL: TEXT 0x5555",
|
||||
"int test = 5;",
|
||||
]
|
||||
)
|
||||
assert len(parser.alerts) == 1
|
||||
assert parser.alerts[0].code == ParserError.GLOBAL_MISSING_PREFIX
|
||||
# In spite of that, we should still grab the variable name.
|
||||
assert parser.variables[1].name == "test"
|
||||
|
||||
|
||||
def test_global_nomatch(parser):
|
||||
"""We do our best to grab the variable name, even without the g_ prefix
|
||||
but this (by design) will not match everything."""
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"// GLOBAL: TEST 0x1234",
|
||||
"FunctionCall();",
|
||||
]
|
||||
)
|
||||
assert len(parser.variables) == 0
|
||||
assert len(parser.alerts) == 1
|
||||
assert parser.alerts[0].code == ParserError.NO_SUITABLE_NAME
|
||||
|
||||
|
||||
def test_static_variable(parser):
|
||||
"""We can detect whether a variable is a static function variable
|
||||
based on the parser's state when we detect it.
|
||||
Checking for the word `static` alone is not a good test.
|
||||
Static class variables are filed as S_GDATA32, same as regular globals.
|
||||
Only function statics are filed as S_LDATA32."""
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"// GLOBAL: TEST 0x1234",
|
||||
"int g_test = 1234;",
|
||||
]
|
||||
)
|
||||
assert len(parser.variables) == 1
|
||||
assert parser.variables[0].is_static is False
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"// FUNCTION: TEST 0x5555",
|
||||
"void test_function() {",
|
||||
"// GLOBAL: TEST 0x8888",
|
||||
"int g_internal = 0;",
|
||||
"}",
|
||||
]
|
||||
)
|
||||
assert len(parser.variables) == 2
|
||||
assert parser.variables[1].is_static is True
|
||||
|
@@ -11,9 +11,11 @@ state_change_marker_cases = [
|
||||
(_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None),
|
||||
(_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None),
|
||||
(_rs.SEARCH, "STUB", _rs.WANT_SIG, None),
|
||||
(_rs.SEARCH, "SYNTHETIC", _rs.IN_TEMPLATE, None),
|
||||
(_rs.SEARCH, "SYNTHETIC", _rs.IN_SYNTHETIC, None),
|
||||
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
|
||||
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
|
||||
(_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None),
|
||||
(_rs.SEARCH, "STRING", _rs.SEARCH, None),
|
||||
|
||||
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
|
||||
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
@@ -21,20 +23,26 @@ state_change_marker_cases = [
|
||||
(_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.WANT_SIG, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.WANT_SIG, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
|
||||
(_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
|
||||
(_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
|
||||
(_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
|
||||
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
|
||||
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_SYNTHETIC, _pe.MISSED_END_OF_FUNCTION),
|
||||
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
|
||||
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
|
||||
(_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION),
|
||||
(_rs.IN_FUNC, "STRING", _rs.IN_FUNC, None),
|
||||
|
||||
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None),
|
||||
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None),
|
||||
(_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_TEMPLATE, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_TEMPLATE, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
|
||||
(_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||
(_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||
@@ -42,6 +50,8 @@ state_change_marker_cases = [
|
||||
(_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||
(_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||
(_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||
(_rs.WANT_CURLY, "LIBRARY", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||
(_rs.WANT_CURLY, "STRING", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||
|
||||
(_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None),
|
||||
@@ -49,6 +59,8 @@ state_change_marker_cases = [
|
||||
(_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
|
||||
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
|
||||
@@ -56,6 +68,8 @@ state_change_marker_cases = [
|
||||
(_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_FUNC_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
|
||||
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
@@ -63,6 +77,26 @@ state_change_marker_cases = [
|
||||
(_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None),
|
||||
(_rs.IN_VTABLE, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_VTABLE, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
|
||||
(_rs.IN_SYNTHETIC, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_SYNTHETIC, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_SYNTHETIC, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_SYNTHETIC, "SYNTHETIC", _rs.IN_SYNTHETIC, None),
|
||||
(_rs.IN_SYNTHETIC, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_SYNTHETIC, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_SYNTHETIC, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_SYNTHETIC, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
|
||||
(_rs.IN_LIBRARY, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_LIBRARY, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_LIBRARY, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_LIBRARY, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_LIBRARY, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_LIBRARY, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_LIBRARY, "LIBRARY", _rs.IN_LIBRARY, None),
|
||||
(_rs.IN_LIBRARY, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
]
|
||||
# fmt: on
|
||||
|
||||
@@ -105,47 +139,3 @@ def test_state_search_line(line: str):
|
||||
p.read_line(line)
|
||||
assert p.state == _rs.SEARCH
|
||||
assert len(p.alerts) == 0
|
||||
|
||||
|
||||
global_lines = [
|
||||
("// A comment", _rs.IN_GLOBAL),
|
||||
("", _rs.IN_GLOBAL),
|
||||
("\t", _rs.IN_GLOBAL),
|
||||
(" ", _rs.IN_GLOBAL),
|
||||
# TODO: no check for "likely" variable declaration so these all count
|
||||
("void function()", _rs.SEARCH),
|
||||
("int x = 123;", _rs.SEARCH),
|
||||
("just some text", _rs.SEARCH),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("line, new_state", global_lines)
|
||||
def test_state_global_line(line: str, new_state: _rs):
|
||||
p = DecompParser()
|
||||
p.read_line("// GLOBAL: TEST 0x1234")
|
||||
assert p.state == _rs.IN_GLOBAL
|
||||
p.read_line(line)
|
||||
assert p.state == new_state
|
||||
|
||||
|
||||
# mostly same as above
|
||||
in_func_global_lines = [
|
||||
("// A comment", _rs.IN_FUNC_GLOBAL),
|
||||
("", _rs.IN_FUNC_GLOBAL),
|
||||
("\t", _rs.IN_FUNC_GLOBAL),
|
||||
(" ", _rs.IN_FUNC_GLOBAL),
|
||||
# TODO: no check for "likely" variable declaration so these all count
|
||||
("void function()", _rs.IN_FUNC),
|
||||
("int x = 123;", _rs.IN_FUNC),
|
||||
("just some text", _rs.IN_FUNC),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("line, new_state", in_func_global_lines)
|
||||
def test_state_in_func_global_line(line: str, new_state: _rs):
|
||||
p = DecompParser()
|
||||
p.state = _rs.IN_FUNC
|
||||
p.read_line("// GLOBAL: TEST 0x1234")
|
||||
assert p.state == _rs.IN_FUNC_GLOBAL
|
||||
p.read_line(line)
|
||||
assert p.state == new_state
|
||||
|
@@ -1,11 +1,15 @@
|
||||
import pytest
|
||||
from isledecomp.parser.parser import MarkerDict
|
||||
from isledecomp.parser.util import (
|
||||
from isledecomp.parser.marker import (
|
||||
DecompMarker,
|
||||
is_blank_or_comment,
|
||||
MarkerType,
|
||||
match_marker,
|
||||
is_marker_exact,
|
||||
)
|
||||
from isledecomp.parser.util import (
|
||||
is_blank_or_comment,
|
||||
get_class_name,
|
||||
get_variable_name,
|
||||
)
|
||||
|
||||
|
||||
@@ -96,7 +100,7 @@ def test_marker_dict_type_replace():
|
||||
d.insert(DecompMarker("STUB", "TEST", 0x1234))
|
||||
markers = list(d.iter())
|
||||
assert len(markers) == 1
|
||||
assert markers[0].type == "FUNCTION"
|
||||
assert markers[0].type == MarkerType.FUNCTION
|
||||
|
||||
|
||||
class_name_match_cases = [
|
||||
@@ -131,3 +135,26 @@ class_name_no_match_cases = [
|
||||
@pytest.mark.parametrize("line", class_name_no_match_cases)
|
||||
def test_get_class_name_none(line: str):
|
||||
assert get_class_name(line) is None
|
||||
|
||||
|
||||
variable_name_cases = [
|
||||
# with prefix for easy access
|
||||
("char* g_test;", "g_test"),
|
||||
("g_test;", "g_test"),
|
||||
("void (*g_test)(int);", "g_test"),
|
||||
("char g_test[50];", "g_test"),
|
||||
("char g_test[50] = {1234,", "g_test"),
|
||||
("int g_test = 500;", "g_test"),
|
||||
# no prefix
|
||||
("char* hello;", "hello"),
|
||||
("hello;", "hello"),
|
||||
("void (*hello)(int);", "hello"),
|
||||
("char hello[50];", "hello"),
|
||||
("char hello[50] = {1234,", "hello"),
|
||||
("int hello = 500;", "hello"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("line,name", variable_name_cases)
|
||||
def test_get_variable_name(line: str, name: str):
|
||||
assert get_variable_name(line) == name
|
||||
|
Reference in New Issue
Block a user