Improve handling of variables for decomp parser (#376)

* Parser refactor:
- Handling LIRBARY and STRING markers
- Extracting global variable name for future comparison
- Marking function static variables
- More fluent error messages

* String constants annotated with STRING

* fix variable name

* Should compare LIBRARY markers
This commit is contained in:
MS
2023-12-27 15:59:42 -05:00
committed by GitHub
parent dfad323880
commit bcb7bec68b
160 changed files with 586 additions and 327 deletions

View File

@@ -39,6 +39,14 @@ class ParserError(Enum):
# WARN: We found a marker to be referenced by name outside of a header file.
BYNAME_FUNCTION_IN_CPP = 109
# WARN: A GLOBAL marker appeared over a variable without the g_ prefix
GLOBAL_MISSING_PREFIX = 110
# WARN: GLOBAL marker points at something other than variable declaration.
# We can't match global variables based on position, but the goal here is
# to ignore things like string literal that are not variables.
GLOBAL_NOT_VARIABLE = 111
# This code or higher is an error, not a warning
DECOMP_ERROR_START = 200
@@ -50,13 +58,18 @@ class ParserError(Enum):
# For example, a GLOBAL cannot follow FUNCTION/STUB
INCOMPATIBLE_MARKER = 201
# ERROR: The line following a synthetic marker was not a comment
BAD_SYNTHETIC = 202
# ERROR: The line following an explicit by-name marker was not a comment
# We assume a syntax error here rather than try to use the next line
BAD_NAMEREF = 202
# ERROR: This function offset comes before the previous offset from the same module
# This hopefully gives some hint about which functions need to be rearranged.
FUNCTION_OUT_OF_ORDER = 203
# ERROR: The line following an explicit by-name marker that does _not_ expect
# a comment -- i.e. VTABLE or GLOBAL -- could not extract the name
NO_SUITABLE_NAME = 204
@dataclass
class ParserAlert:

View File

@@ -0,0 +1,103 @@
import re
from typing import Optional
from enum import Enum
class MarkerType(Enum):
UNKNOWN = -100
FUNCTION = 1
STUB = 2
SYNTHETIC = 3
TEMPLATE = 4
GLOBAL = 5
VTABLE = 6
STRING = 7
LIBRARY = 8
markerRegex = re.compile(
r"\s*//\s*(?P<type>\w+):\s*(?P<module>\w+)\s+(?P<offset>0x[a-f0-9]+)",
flags=re.I,
)
markerExactRegex = re.compile(
r"\s*// (?P<type>[A-Z]+): (?P<module>[A-Z0-9]+) (?P<offset>0x[a-f0-9]+)$"
)
class DecompMarker:
def __init__(self, marker_type: str, module: str, offset: int) -> None:
try:
self._type = MarkerType[marker_type.upper()]
except KeyError:
self._type = MarkerType.UNKNOWN
# Convert to upper here. A lot of other analysis depends on this name
# being consistent and predictable. If the name is _not_ capitalized
# we will emit a syntax error.
self._module: str = module.upper()
self._offset: int = offset
@property
def type(self) -> MarkerType:
return self._type
@property
def module(self) -> str:
return self._module
@property
def offset(self) -> int:
return self._offset
def is_regular_function(self) -> bool:
"""Regular function, meaning: not an explicit byname lookup. FUNCTION
markers can be _implicit_ byname.
FUNCTION and STUB markers are (currently) the only heterogenous marker types that
can be lumped together, although the reasons for doing so are a little vague."""
return self._type in (MarkerType.FUNCTION, MarkerType.STUB)
def is_explicit_byname(self) -> bool:
return self._type in (
MarkerType.SYNTHETIC,
MarkerType.TEMPLATE,
MarkerType.LIBRARY,
)
def is_variable(self) -> bool:
return self._type == MarkerType.GLOBAL
def is_synthetic(self) -> bool:
return self._type == MarkerType.SYNTHETIC
def is_template(self) -> bool:
return self._type == MarkerType.TEMPLATE
def is_vtable(self) -> bool:
return self._type == MarkerType.VTABLE
def is_library(self) -> bool:
return self._type == MarkerType.LIBRARY
def is_string(self) -> bool:
return self._type == MarkerType.STRING
def allowed_in_func(self) -> bool:
return self._type in (MarkerType.GLOBAL, MarkerType.STRING)
def match_marker(line: str) -> Optional[DecompMarker]:
match = markerRegex.match(line)
if match is None:
return None
return DecompMarker(
marker_type=match.group("type"),
module=match.group("module"),
offset=int(match.group("offset"), 16),
)
def is_marker_exact(line: str) -> bool:
return markerExactRegex.match(line) is not None

View File

@@ -1,35 +1,57 @@
from typing import Optional
from dataclasses import dataclass
from .marker import MarkerType
@dataclass
class ParserNode:
class ParserSymbol:
"""Exported decomp marker with all information (except the code filename) required to
cross-reference with cvdump data."""
type: MarkerType
line_number: int
@dataclass
class ParserSymbol(ParserNode):
module: str
offset: int
name: str
# The parser doesn't (currently) know about the code filename, but if you
# wanted to set it here after the fact, here's the spot.
filename: Optional[str] = None
def should_skip(self) -> bool:
"""The default is to compare any symbols we have"""
return False
def is_nameref(self) -> bool:
"""All symbols default to name lookup"""
return True
@dataclass
class ParserFunction(ParserSymbol):
name: str
# We are able to detect the closing line of a function with some reliability.
# This isn't used for anything right now, but perhaps later it will be.
end_line: Optional[int] = None
# All marker types are referenced by name except FUNCTION/STUB. These can also be
# referenced by name, but only if this flag is true.
lookup_by_name: bool = False
is_stub: bool = False
is_synthetic: bool = False
is_template: bool = False
end_line: int = -1
def should_skip(self) -> bool:
return self.type == MarkerType.STUB
def is_nameref(self) -> bool:
return (
self.type in (MarkerType.SYNTHETIC, MarkerType.TEMPLATE, MarkerType.LIBRARY)
or self.lookup_by_name
)
@dataclass
class ParserVariable(ParserSymbol):
name: str
size: int = -1
is_static: bool = False
@dataclass
class ParserVtable(ParserSymbol):
class_name: str
num_entries: int = -1
pass

View File

@@ -3,15 +3,19 @@
from typing import List, Iterable, Iterator
from enum import Enum
from .util import (
DecompMarker,
is_blank_or_comment,
match_marker,
is_marker_exact,
get_class_name,
get_variable_name,
get_synthetic_name,
remove_trailing_comment,
)
from .marker import (
DecompMarker,
match_marker,
is_marker_exact,
)
from .node import (
ParserSymbol,
ParserFunction,
ParserVariable,
ParserVtable,
@@ -28,44 +32,23 @@ class ReaderState(Enum):
IN_GLOBAL = 5
IN_FUNC_GLOBAL = 6
IN_VTABLE = 7
IN_SYNTHETIC = 8
IN_LIBRARY = 9
DONE = 100
def marker_is_stub(marker: DecompMarker) -> bool:
return marker.type.upper() == "STUB"
def marker_is_variable(marker: DecompMarker) -> bool:
return marker.type.upper() == "GLOBAL"
def marker_is_synthetic(marker: DecompMarker) -> bool:
return marker.type.upper() in ("SYNTHETIC", "TEMPLATE")
def marker_is_template(marker: DecompMarker) -> bool:
return marker.type.upper() == "TEMPLATE"
def marker_is_function(marker: DecompMarker) -> bool:
return marker.type.upper() in ("FUNCTION", "STUB")
def marker_is_vtable(marker: DecompMarker) -> bool:
return marker.type.upper() == "VTABLE"
class MarkerDict:
def __init__(self) -> None:
self.markers: dict = {}
def insert(self, marker: DecompMarker) -> bool:
"""Return True if this insert would overwrite"""
module = marker.module.upper()
module = marker.module
if module in self.markers:
return True
self.markers[module] = (marker.type, marker.offset)
# TODO: type converted back to string version here instead of using enum
self.markers[module] = (marker.type.name, marker.offset)
return False
def iter(self) -> Iterator[DecompMarker]:
@@ -82,9 +65,7 @@ class DecompParser:
# but not right now
def __init__(self) -> None:
# The lists to be populated as we parse
self.functions: List[ParserFunction] = []
self.vtables: List[ParserVtable] = []
self.variables: List[ParserVariable] = []
self._symbols: List[ParserSymbol] = []
self.alerts: List[ParserAlert] = []
self.line_number: int = 0
@@ -113,9 +94,7 @@ class DecompParser:
self.function_sig: str = ""
def reset(self):
self.functions = []
self.vtables = []
self.variables = []
self._symbols = []
self.alerts = []
self.line_number = 0
@@ -131,6 +110,18 @@ class DecompParser:
self.function_start = 0
self.function_sig = ""
@property
def functions(self) -> List[ParserSymbol]:
return [s for s in self._symbols if isinstance(s, ParserFunction)]
@property
def vtables(self) -> List[ParserSymbol]:
return [s for s in self._symbols if isinstance(s, ParserVtable)]
@property
def variables(self) -> List[ParserSymbol]:
return [s for s in self._symbols if isinstance(s, ParserVariable)]
def _recover(self):
"""We hit a syntax error and need to reset temp structures"""
self.state = ReaderState.SEARCH
@@ -159,10 +150,17 @@ class DecompParser:
self._syntax_warning(ParserError.DUPLICATE_MODULE)
self.state = ReaderState.WANT_SIG
def _synthetic_marker(self, marker: DecompMarker):
def _nameref_marker(self, marker: DecompMarker):
"""Functions explicitly referenced by name are set here"""
if self.fun_markers.insert(marker):
self._syntax_warning(ParserError.DUPLICATE_MODULE)
self.state = ReaderState.IN_TEMPLATE
if marker.is_template():
self.state = ReaderState.IN_TEMPLATE
elif marker.is_synthetic():
self.state = ReaderState.IN_SYNTHETIC
else:
self.state = ReaderState.IN_LIBRARY
def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False):
end_line = self.line_number
@@ -173,16 +171,14 @@ class DecompParser:
end_line -= 1
for marker in self.fun_markers.iter():
self.functions.append(
self._symbols.append(
ParserFunction(
type=marker.type,
line_number=self.function_start,
module=marker.module,
offset=marker.offset,
lookup_by_name=lookup_by_name,
is_stub=marker_is_stub(marker),
is_synthetic=marker_is_synthetic(marker),
is_template=marker_is_template(marker),
name=self.function_sig,
lookup_by_name=lookup_by_name,
end_line=end_line,
)
)
@@ -202,12 +198,13 @@ class DecompParser:
class_name = self.last_line.strip()
for marker in self.tbl_markers.iter():
self.vtables.append(
self._symbols.append(
ParserVtable(
type=marker.type,
line_number=self.line_number,
module=marker.module,
offset=marker.offset,
class_name=class_name,
name=class_name,
)
)
@@ -223,14 +220,19 @@ class DecompParser:
else:
self.state = ReaderState.IN_GLOBAL
def _variable_done(self):
def _variable_done(self, name: str):
if not name.startswith("g_"):
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
for marker in self.var_markers.iter():
self.variables.append(
self._symbols.append(
ParserVariable(
type=marker.type,
line_number=self.line_number,
module=marker.module,
offset=marker.offset,
name=self.last_line.strip(),
name=name,
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
)
)
@@ -246,12 +248,23 @@ class DecompParser:
self._syntax_error(ParserError.UNEXPECTED_MARKER)
return
# If we are inside a function, the only markers we accept are:
# GLOBAL, indicating a static variable
# STRING, indicating a literal string.
# Otherwise we assume that the parser missed the end of the function
# and we have moved on to something else.
# This is unlikely to occur with well-formed code, but
# we can recover easily by just ending the function here.
if self.state == ReaderState.IN_FUNC and not marker.allowed_in_func():
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
self._function_done(unexpected=True)
# TODO: How uncertain are we of detecting the end of a function
# in a clang-formatted file? For now we assume we have missed the
# end if we detect a non-GLOBAL marker while state is IN_FUNC.
# Maybe these cases should be syntax errors instead
if marker_is_function(marker):
if marker.is_regular_function():
if self.state in (
ReaderState.SEARCH,
ReaderState.WANT_SIG,
@@ -259,29 +272,41 @@ class DecompParser:
# We will allow multiple offsets if we have just begun
# the code block, but not after we hit the curly brace.
self._function_marker(marker)
elif self.state == ReaderState.IN_FUNC:
# We hit another offset unexpectedly.
# We can recover easily by just ending the function here.
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
self._function_done(unexpected=True)
# Start the next function right after so we can
# read the next line.
self._function_marker(marker)
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker_is_synthetic(marker):
elif marker.is_template():
if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE):
self._synthetic_marker(marker)
elif self.state == ReaderState.IN_FUNC:
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
self._function_done(lookup_by_name=True, unexpected=True)
self._synthetic_marker(marker)
self._nameref_marker(marker)
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker_is_variable(marker):
elif marker.is_synthetic():
if self.state in (ReaderState.SEARCH, ReaderState.IN_SYNTHETIC):
self._nameref_marker(marker)
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker.is_library():
if self.state in (ReaderState.SEARCH, ReaderState.IN_LIBRARY):
self._nameref_marker(marker)
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker.is_string():
# TODO: We are ignoring string markers for the moment.
# We already have a lot of them in the codebase, though, so we'll
# hang onto them for now in case we can use them later.
# To match up string constants, the strategy will be:
# 1. Use cvdump to find all string constants in the recomp
# 2. In the original binary, look at relocated vaddrs from .rdata
# 3. Try to match up string data from #1 with locations in #2
# Throw the syntax error we would throw if we were parsing these
if self.state not in (ReaderState.SEARCH, ReaderState.IN_FUNC):
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker.is_variable():
if self.state in (
ReaderState.SEARCH,
ReaderState.IN_GLOBAL,
@@ -292,13 +317,9 @@ class DecompParser:
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
elif marker_is_vtable(marker):
elif marker.is_vtable():
if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE):
self._vtable_marker(marker)
elif self.state == ReaderState.IN_FUNC:
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
self._function_done(unexpected=True)
self._vtable_marker(marker)
else:
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
@@ -322,12 +343,16 @@ class DecompParser:
return
line_strip = line.strip()
if self.state == ReaderState.IN_TEMPLATE:
# TEMPLATE functions are a special case. The signature is
# given on the next line (in a // comment)
if self.state in (
ReaderState.IN_SYNTHETIC,
ReaderState.IN_TEMPLATE,
ReaderState.IN_LIBRARY,
):
# Explicit nameref functions provide the function name
# on the next line (in a // comment)
name = get_synthetic_name(line)
if name is None:
self._syntax_error(ParserError.BAD_SYNTHETIC)
self._syntax_error(ParserError.BAD_NAMEREF)
else:
self.function_sig = name
self._function_starts_here()
@@ -384,8 +409,28 @@ class DecompParser:
self._function_done()
elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL):
if not is_blank_or_comment(line):
self._variable_done()
# TODO: Known problem that an error here will cause us to abandon a
# function we have already parsed if state == IN_FUNC_GLOBAL.
# However, we are not tolerant of _any_ syntax problems in our
# CI actions, so the solution is to just fix the invalid marker.
if is_blank_or_comment(line):
self._syntax_error(ParserError.NO_SUITABLE_NAME)
return
# We don't have a foolproof mechanism to tell what is and is not a variable.
# If the GLOBAL is being declared on a `return` statement, though, this is
# not correct. It is either a string literal (which will be handled differently)
# or it is not the variable declaration, which is incorrect decomp syntax.
if line.strip().startswith("return"):
self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE)
return
name = get_variable_name(line)
if name is None:
self._syntax_error(ParserError.NO_SUITABLE_NAME)
return
self._variable_done(name)
elif self.state == ReaderState.IN_VTABLE:
vtable_class = get_class_name(line)

View File

@@ -1,17 +1,6 @@
# C++ Parser utility functions and data structures
from __future__ import annotations # python <3.10 compatibility
import re
from collections import namedtuple
DecompMarker = namedtuple("DecompMarker", ["type", "module", "offset"])
markerRegex = re.compile(
r"\s*//\s*(\w+):\s*(\w+)\s+(0x[a-f0-9]+)",
flags=re.I,
)
markerExactRegex = re.compile(r"\s*// ([A-Z]+): ([A-Z0-9]+) (0x[a-f0-9]+)$")
from typing import Optional
# The goal here is to just read whatever is on the next line, so some
# flexibility in the formatting seems OK
@@ -23,7 +12,7 @@ templateCommentRegex = re.compile(r"\s*//\s+(.*)")
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
def get_synthetic_name(line: str) -> str | None:
def get_synthetic_name(line: str) -> Optional[str]:
"""Synthetic names appear on a single line comment on the line after the marker.
If that's not what we have, return None"""
template_match = templateCommentRegex.match(line)
@@ -51,20 +40,6 @@ def is_blank_or_comment(line: str) -> bool:
)
def match_marker(line: str) -> DecompMarker | None:
match = markerRegex.match(line)
if match is None:
return None
return DecompMarker(
type=match.group(1), module=match.group(2), offset=int(match.group(3), 16)
)
def is_marker_exact(line: str) -> bool:
return markerExactRegex.match(line) is not None
template_class_decl_regex = re.compile(
r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)<([\w]+)\s*(\*+)?\s*>"
)
@@ -73,7 +48,7 @@ template_class_decl_regex = re.compile(
class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)")
def get_class_name(line: str) -> str | None:
def get_class_name(line: str) -> Optional[str]:
"""For VTABLE markers, extract the class name from the code line or comment
where it appears."""
@@ -93,3 +68,21 @@ def get_class_name(line: str) -> str | None:
return match.group(1)
return None
global_regex = re.compile(r"(?P<name>g_\w+)")
less_strict_global_regex = re.compile(r"(?P<name>\w+)(?:\)\(|\[.*|\s*=.*|;)")
def get_variable_name(line: str) -> Optional[str]:
"""Grab the name of the variable annotated with the GLOBAL marker.
Correct syntax would have the variable start with the prefix "g_"
but we will try to match regardless."""
if (match := global_regex.search(line)) is not None:
return match.group("name")
if (match := less_strict_global_regex.search(line)) is not None:
return match.group("name")
return None

View File

@@ -115,7 +115,7 @@ def test_different_markers_same_module(parser):
# Use first marker declaration, don't replace
assert len(parser.functions) == 1
assert parser.functions[0].is_stub is False
assert parser.functions[0].should_skip() is False
# Should alert to this
assert len(parser.alerts) == 1
@@ -193,7 +193,7 @@ def test_multiple_vtables(parser):
)
assert len(parser.alerts) == 0
assert len(parser.vtables) == 2
assert parser.vtables[0].class_name == "MxString"
assert parser.vtables[0].name == "MxString"
def test_multiple_vtables_same_module(parser):
@@ -247,7 +247,7 @@ def test_synthetic_no_comment(parser):
)
assert len(parser.functions) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.BAD_SYNTHETIC
assert parser.alerts[0].code == ParserError.BAD_NAMEREF
assert parser.state == ReaderState.SEARCH
@@ -375,3 +375,70 @@ def test_unexpected_eof(parser):
assert len(parser.functions) == 1
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE
def test_global_variable_prefix(parser):
"""Global and static variables should have the g_ prefix."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
'const char* g_msg = "hello";',
]
)
assert len(parser.variables) == 1
assert len(parser.alerts) == 0
parser.read_lines(
[
"// GLOBAL: TEXT 0x5555",
"int test = 5;",
]
)
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.GLOBAL_MISSING_PREFIX
# In spite of that, we should still grab the variable name.
assert parser.variables[1].name == "test"
def test_global_nomatch(parser):
"""We do our best to grab the variable name, even without the g_ prefix
but this (by design) will not match everything."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
"FunctionCall();",
]
)
assert len(parser.variables) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.NO_SUITABLE_NAME
def test_static_variable(parser):
"""We can detect whether a variable is a static function variable
based on the parser's state when we detect it.
Checking for the word `static` alone is not a good test.
Static class variables are filed as S_GDATA32, same as regular globals.
Only function statics are filed as S_LDATA32."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
"int g_test = 1234;",
]
)
assert len(parser.variables) == 1
assert parser.variables[0].is_static is False
parser.read_lines(
[
"// FUNCTION: TEST 0x5555",
"void test_function() {",
"// GLOBAL: TEST 0x8888",
"int g_internal = 0;",
"}",
]
)
assert len(parser.variables) == 2
assert parser.variables[1].is_static is True

View File

@@ -11,9 +11,11 @@ state_change_marker_cases = [
(_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None),
(_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None),
(_rs.SEARCH, "STUB", _rs.WANT_SIG, None),
(_rs.SEARCH, "SYNTHETIC", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "SYNTHETIC", _rs.IN_SYNTHETIC, None),
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
(_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None),
(_rs.SEARCH, "STRING", _rs.SEARCH, None),
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
@@ -21,20 +23,26 @@ state_change_marker_cases = [
(_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_SYNTHETIC, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "STRING", _rs.IN_FUNC, None),
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None),
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
@@ -42,6 +50,8 @@ state_change_marker_cases = [
(_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "LIBRARY", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "STRING", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None),
@@ -49,6 +59,8 @@ state_change_marker_cases = [
(_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
@@ -56,6 +68,8 @@ state_change_marker_cases = [
(_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
@@ -63,6 +77,26 @@ state_change_marker_cases = [
(_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None),
(_rs.IN_VTABLE, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "SYNTHETIC", _rs.IN_SYNTHETIC, None),
(_rs.IN_SYNTHETIC, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "LIBRARY", _rs.IN_LIBRARY, None),
(_rs.IN_LIBRARY, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
]
# fmt: on
@@ -105,47 +139,3 @@ def test_state_search_line(line: str):
p.read_line(line)
assert p.state == _rs.SEARCH
assert len(p.alerts) == 0
global_lines = [
("// A comment", _rs.IN_GLOBAL),
("", _rs.IN_GLOBAL),
("\t", _rs.IN_GLOBAL),
(" ", _rs.IN_GLOBAL),
# TODO: no check for "likely" variable declaration so these all count
("void function()", _rs.SEARCH),
("int x = 123;", _rs.SEARCH),
("just some text", _rs.SEARCH),
]
@pytest.mark.parametrize("line, new_state", global_lines)
def test_state_global_line(line: str, new_state: _rs):
p = DecompParser()
p.read_line("// GLOBAL: TEST 0x1234")
assert p.state == _rs.IN_GLOBAL
p.read_line(line)
assert p.state == new_state
# mostly same as above
in_func_global_lines = [
("// A comment", _rs.IN_FUNC_GLOBAL),
("", _rs.IN_FUNC_GLOBAL),
("\t", _rs.IN_FUNC_GLOBAL),
(" ", _rs.IN_FUNC_GLOBAL),
# TODO: no check for "likely" variable declaration so these all count
("void function()", _rs.IN_FUNC),
("int x = 123;", _rs.IN_FUNC),
("just some text", _rs.IN_FUNC),
]
@pytest.mark.parametrize("line, new_state", in_func_global_lines)
def test_state_in_func_global_line(line: str, new_state: _rs):
p = DecompParser()
p.state = _rs.IN_FUNC
p.read_line("// GLOBAL: TEST 0x1234")
assert p.state == _rs.IN_FUNC_GLOBAL
p.read_line(line)
assert p.state == new_state

View File

@@ -1,11 +1,15 @@
import pytest
from isledecomp.parser.parser import MarkerDict
from isledecomp.parser.util import (
from isledecomp.parser.marker import (
DecompMarker,
is_blank_or_comment,
MarkerType,
match_marker,
is_marker_exact,
)
from isledecomp.parser.util import (
is_blank_or_comment,
get_class_name,
get_variable_name,
)
@@ -96,7 +100,7 @@ def test_marker_dict_type_replace():
d.insert(DecompMarker("STUB", "TEST", 0x1234))
markers = list(d.iter())
assert len(markers) == 1
assert markers[0].type == "FUNCTION"
assert markers[0].type == MarkerType.FUNCTION
class_name_match_cases = [
@@ -131,3 +135,26 @@ class_name_no_match_cases = [
@pytest.mark.parametrize("line", class_name_no_match_cases)
def test_get_class_name_none(line: str):
assert get_class_name(line) is None
variable_name_cases = [
# with prefix for easy access
("char* g_test;", "g_test"),
("g_test;", "g_test"),
("void (*g_test)(int);", "g_test"),
("char g_test[50];", "g_test"),
("char g_test[50] = {1234,", "g_test"),
("int g_test = 500;", "g_test"),
# no prefix
("char* hello;", "hello"),
("hello;", "hello"),
("void (*hello)(int);", "hello"),
("char hello[50];", "hello"),
("char hello[50] = {1234,", "hello"),
("int hello = 500;", "hello"),
]
@pytest.mark.parametrize("line,name", variable_name_cases)
def test_get_variable_name(line: str, name: str):
assert get_variable_name(line) == name

View File

@@ -316,7 +316,7 @@ def main():
parser.read_lines(srcfile)
for fun in parser.functions:
if fun.is_stub:
if fun.should_skip():
continue
if fun.module != basename:
@@ -330,7 +330,7 @@ def main():
else:
continue
if fun.lookup_by_name:
if fun.is_nameref():
recinfo = syminfo.get_recompiled_address_from_name(fun.name)
if not recinfo:
continue