mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-26 18:04:06 +00:00
Improve handling of variables for decomp parser (#376)
* Parser refactor: - Handling LIRBARY and STRING markers - Extracting global variable name for future comparison - Marking function static variables - More fluent error messages * String constants annotated with STRING * fix variable name * Should compare LIBRARY markers
This commit is contained in:
@@ -39,6 +39,14 @@ class ParserError(Enum):
|
||||
# WARN: We found a marker to be referenced by name outside of a header file.
|
||||
BYNAME_FUNCTION_IN_CPP = 109
|
||||
|
||||
# WARN: A GLOBAL marker appeared over a variable without the g_ prefix
|
||||
GLOBAL_MISSING_PREFIX = 110
|
||||
|
||||
# WARN: GLOBAL marker points at something other than variable declaration.
|
||||
# We can't match global variables based on position, but the goal here is
|
||||
# to ignore things like string literal that are not variables.
|
||||
GLOBAL_NOT_VARIABLE = 111
|
||||
|
||||
# This code or higher is an error, not a warning
|
||||
DECOMP_ERROR_START = 200
|
||||
|
||||
@@ -50,13 +58,18 @@ class ParserError(Enum):
|
||||
# For example, a GLOBAL cannot follow FUNCTION/STUB
|
||||
INCOMPATIBLE_MARKER = 201
|
||||
|
||||
# ERROR: The line following a synthetic marker was not a comment
|
||||
BAD_SYNTHETIC = 202
|
||||
# ERROR: The line following an explicit by-name marker was not a comment
|
||||
# We assume a syntax error here rather than try to use the next line
|
||||
BAD_NAMEREF = 202
|
||||
|
||||
# ERROR: This function offset comes before the previous offset from the same module
|
||||
# This hopefully gives some hint about which functions need to be rearranged.
|
||||
FUNCTION_OUT_OF_ORDER = 203
|
||||
|
||||
# ERROR: The line following an explicit by-name marker that does _not_ expect
|
||||
# a comment -- i.e. VTABLE or GLOBAL -- could not extract the name
|
||||
NO_SUITABLE_NAME = 204
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserAlert:
|
||||
|
||||
103
tools/isledecomp/isledecomp/parser/marker.py
Normal file
103
tools/isledecomp/isledecomp/parser/marker.py
Normal file
@@ -0,0 +1,103 @@
|
||||
import re
|
||||
from typing import Optional
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class MarkerType(Enum):
|
||||
UNKNOWN = -100
|
||||
FUNCTION = 1
|
||||
STUB = 2
|
||||
SYNTHETIC = 3
|
||||
TEMPLATE = 4
|
||||
GLOBAL = 5
|
||||
VTABLE = 6
|
||||
STRING = 7
|
||||
LIBRARY = 8
|
||||
|
||||
|
||||
markerRegex = re.compile(
|
||||
r"\s*//\s*(?P<type>\w+):\s*(?P<module>\w+)\s+(?P<offset>0x[a-f0-9]+)",
|
||||
flags=re.I,
|
||||
)
|
||||
|
||||
|
||||
markerExactRegex = re.compile(
|
||||
r"\s*// (?P<type>[A-Z]+): (?P<module>[A-Z0-9]+) (?P<offset>0x[a-f0-9]+)$"
|
||||
)
|
||||
|
||||
|
||||
class DecompMarker:
|
||||
def __init__(self, marker_type: str, module: str, offset: int) -> None:
|
||||
try:
|
||||
self._type = MarkerType[marker_type.upper()]
|
||||
except KeyError:
|
||||
self._type = MarkerType.UNKNOWN
|
||||
|
||||
# Convert to upper here. A lot of other analysis depends on this name
|
||||
# being consistent and predictable. If the name is _not_ capitalized
|
||||
# we will emit a syntax error.
|
||||
self._module: str = module.upper()
|
||||
self._offset: int = offset
|
||||
|
||||
@property
|
||||
def type(self) -> MarkerType:
|
||||
return self._type
|
||||
|
||||
@property
|
||||
def module(self) -> str:
|
||||
return self._module
|
||||
|
||||
@property
|
||||
def offset(self) -> int:
|
||||
return self._offset
|
||||
|
||||
def is_regular_function(self) -> bool:
|
||||
"""Regular function, meaning: not an explicit byname lookup. FUNCTION
|
||||
markers can be _implicit_ byname.
|
||||
FUNCTION and STUB markers are (currently) the only heterogenous marker types that
|
||||
can be lumped together, although the reasons for doing so are a little vague."""
|
||||
return self._type in (MarkerType.FUNCTION, MarkerType.STUB)
|
||||
|
||||
def is_explicit_byname(self) -> bool:
|
||||
return self._type in (
|
||||
MarkerType.SYNTHETIC,
|
||||
MarkerType.TEMPLATE,
|
||||
MarkerType.LIBRARY,
|
||||
)
|
||||
|
||||
def is_variable(self) -> bool:
|
||||
return self._type == MarkerType.GLOBAL
|
||||
|
||||
def is_synthetic(self) -> bool:
|
||||
return self._type == MarkerType.SYNTHETIC
|
||||
|
||||
def is_template(self) -> bool:
|
||||
return self._type == MarkerType.TEMPLATE
|
||||
|
||||
def is_vtable(self) -> bool:
|
||||
return self._type == MarkerType.VTABLE
|
||||
|
||||
def is_library(self) -> bool:
|
||||
return self._type == MarkerType.LIBRARY
|
||||
|
||||
def is_string(self) -> bool:
|
||||
return self._type == MarkerType.STRING
|
||||
|
||||
def allowed_in_func(self) -> bool:
|
||||
return self._type in (MarkerType.GLOBAL, MarkerType.STRING)
|
||||
|
||||
|
||||
def match_marker(line: str) -> Optional[DecompMarker]:
|
||||
match = markerRegex.match(line)
|
||||
if match is None:
|
||||
return None
|
||||
|
||||
return DecompMarker(
|
||||
marker_type=match.group("type"),
|
||||
module=match.group("module"),
|
||||
offset=int(match.group("offset"), 16),
|
||||
)
|
||||
|
||||
|
||||
def is_marker_exact(line: str) -> bool:
|
||||
return markerExactRegex.match(line) is not None
|
||||
@@ -1,35 +1,57 @@
|
||||
from typing import Optional
|
||||
from dataclasses import dataclass
|
||||
from .marker import MarkerType
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserNode:
|
||||
class ParserSymbol:
|
||||
"""Exported decomp marker with all information (except the code filename) required to
|
||||
cross-reference with cvdump data."""
|
||||
|
||||
type: MarkerType
|
||||
line_number: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserSymbol(ParserNode):
|
||||
module: str
|
||||
offset: int
|
||||
name: str
|
||||
|
||||
# The parser doesn't (currently) know about the code filename, but if you
|
||||
# wanted to set it here after the fact, here's the spot.
|
||||
filename: Optional[str] = None
|
||||
|
||||
def should_skip(self) -> bool:
|
||||
"""The default is to compare any symbols we have"""
|
||||
return False
|
||||
|
||||
def is_nameref(self) -> bool:
|
||||
"""All symbols default to name lookup"""
|
||||
return True
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserFunction(ParserSymbol):
|
||||
name: str
|
||||
# We are able to detect the closing line of a function with some reliability.
|
||||
# This isn't used for anything right now, but perhaps later it will be.
|
||||
end_line: Optional[int] = None
|
||||
|
||||
# All marker types are referenced by name except FUNCTION/STUB. These can also be
|
||||
# referenced by name, but only if this flag is true.
|
||||
lookup_by_name: bool = False
|
||||
is_stub: bool = False
|
||||
is_synthetic: bool = False
|
||||
is_template: bool = False
|
||||
end_line: int = -1
|
||||
|
||||
def should_skip(self) -> bool:
|
||||
return self.type == MarkerType.STUB
|
||||
|
||||
def is_nameref(self) -> bool:
|
||||
return (
|
||||
self.type in (MarkerType.SYNTHETIC, MarkerType.TEMPLATE, MarkerType.LIBRARY)
|
||||
or self.lookup_by_name
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserVariable(ParserSymbol):
|
||||
name: str
|
||||
size: int = -1
|
||||
is_static: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserVtable(ParserSymbol):
|
||||
class_name: str
|
||||
num_entries: int = -1
|
||||
pass
|
||||
|
||||
@@ -3,15 +3,19 @@
|
||||
from typing import List, Iterable, Iterator
|
||||
from enum import Enum
|
||||
from .util import (
|
||||
DecompMarker,
|
||||
is_blank_or_comment,
|
||||
match_marker,
|
||||
is_marker_exact,
|
||||
get_class_name,
|
||||
get_variable_name,
|
||||
get_synthetic_name,
|
||||
remove_trailing_comment,
|
||||
)
|
||||
from .marker import (
|
||||
DecompMarker,
|
||||
match_marker,
|
||||
is_marker_exact,
|
||||
)
|
||||
from .node import (
|
||||
ParserSymbol,
|
||||
ParserFunction,
|
||||
ParserVariable,
|
||||
ParserVtable,
|
||||
@@ -28,44 +32,23 @@ class ReaderState(Enum):
|
||||
IN_GLOBAL = 5
|
||||
IN_FUNC_GLOBAL = 6
|
||||
IN_VTABLE = 7
|
||||
IN_SYNTHETIC = 8
|
||||
IN_LIBRARY = 9
|
||||
DONE = 100
|
||||
|
||||
|
||||
def marker_is_stub(marker: DecompMarker) -> bool:
|
||||
return marker.type.upper() == "STUB"
|
||||
|
||||
|
||||
def marker_is_variable(marker: DecompMarker) -> bool:
|
||||
return marker.type.upper() == "GLOBAL"
|
||||
|
||||
|
||||
def marker_is_synthetic(marker: DecompMarker) -> bool:
|
||||
return marker.type.upper() in ("SYNTHETIC", "TEMPLATE")
|
||||
|
||||
|
||||
def marker_is_template(marker: DecompMarker) -> bool:
|
||||
return marker.type.upper() == "TEMPLATE"
|
||||
|
||||
|
||||
def marker_is_function(marker: DecompMarker) -> bool:
|
||||
return marker.type.upper() in ("FUNCTION", "STUB")
|
||||
|
||||
|
||||
def marker_is_vtable(marker: DecompMarker) -> bool:
|
||||
return marker.type.upper() == "VTABLE"
|
||||
|
||||
|
||||
class MarkerDict:
|
||||
def __init__(self) -> None:
|
||||
self.markers: dict = {}
|
||||
|
||||
def insert(self, marker: DecompMarker) -> bool:
|
||||
"""Return True if this insert would overwrite"""
|
||||
module = marker.module.upper()
|
||||
module = marker.module
|
||||
if module in self.markers:
|
||||
return True
|
||||
|
||||
self.markers[module] = (marker.type, marker.offset)
|
||||
# TODO: type converted back to string version here instead of using enum
|
||||
self.markers[module] = (marker.type.name, marker.offset)
|
||||
return False
|
||||
|
||||
def iter(self) -> Iterator[DecompMarker]:
|
||||
@@ -82,9 +65,7 @@ class DecompParser:
|
||||
# but not right now
|
||||
def __init__(self) -> None:
|
||||
# The lists to be populated as we parse
|
||||
self.functions: List[ParserFunction] = []
|
||||
self.vtables: List[ParserVtable] = []
|
||||
self.variables: List[ParserVariable] = []
|
||||
self._symbols: List[ParserSymbol] = []
|
||||
self.alerts: List[ParserAlert] = []
|
||||
|
||||
self.line_number: int = 0
|
||||
@@ -113,9 +94,7 @@ class DecompParser:
|
||||
self.function_sig: str = ""
|
||||
|
||||
def reset(self):
|
||||
self.functions = []
|
||||
self.vtables = []
|
||||
self.variables = []
|
||||
self._symbols = []
|
||||
self.alerts = []
|
||||
|
||||
self.line_number = 0
|
||||
@@ -131,6 +110,18 @@ class DecompParser:
|
||||
self.function_start = 0
|
||||
self.function_sig = ""
|
||||
|
||||
@property
|
||||
def functions(self) -> List[ParserSymbol]:
|
||||
return [s for s in self._symbols if isinstance(s, ParserFunction)]
|
||||
|
||||
@property
|
||||
def vtables(self) -> List[ParserSymbol]:
|
||||
return [s for s in self._symbols if isinstance(s, ParserVtable)]
|
||||
|
||||
@property
|
||||
def variables(self) -> List[ParserSymbol]:
|
||||
return [s for s in self._symbols if isinstance(s, ParserVariable)]
|
||||
|
||||
def _recover(self):
|
||||
"""We hit a syntax error and need to reset temp structures"""
|
||||
self.state = ReaderState.SEARCH
|
||||
@@ -159,10 +150,17 @@ class DecompParser:
|
||||
self._syntax_warning(ParserError.DUPLICATE_MODULE)
|
||||
self.state = ReaderState.WANT_SIG
|
||||
|
||||
def _synthetic_marker(self, marker: DecompMarker):
|
||||
def _nameref_marker(self, marker: DecompMarker):
|
||||
"""Functions explicitly referenced by name are set here"""
|
||||
if self.fun_markers.insert(marker):
|
||||
self._syntax_warning(ParserError.DUPLICATE_MODULE)
|
||||
self.state = ReaderState.IN_TEMPLATE
|
||||
|
||||
if marker.is_template():
|
||||
self.state = ReaderState.IN_TEMPLATE
|
||||
elif marker.is_synthetic():
|
||||
self.state = ReaderState.IN_SYNTHETIC
|
||||
else:
|
||||
self.state = ReaderState.IN_LIBRARY
|
||||
|
||||
def _function_done(self, lookup_by_name: bool = False, unexpected: bool = False):
|
||||
end_line = self.line_number
|
||||
@@ -173,16 +171,14 @@ class DecompParser:
|
||||
end_line -= 1
|
||||
|
||||
for marker in self.fun_markers.iter():
|
||||
self.functions.append(
|
||||
self._symbols.append(
|
||||
ParserFunction(
|
||||
type=marker.type,
|
||||
line_number=self.function_start,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
lookup_by_name=lookup_by_name,
|
||||
is_stub=marker_is_stub(marker),
|
||||
is_synthetic=marker_is_synthetic(marker),
|
||||
is_template=marker_is_template(marker),
|
||||
name=self.function_sig,
|
||||
lookup_by_name=lookup_by_name,
|
||||
end_line=end_line,
|
||||
)
|
||||
)
|
||||
@@ -202,12 +198,13 @@ class DecompParser:
|
||||
class_name = self.last_line.strip()
|
||||
|
||||
for marker in self.tbl_markers.iter():
|
||||
self.vtables.append(
|
||||
self._symbols.append(
|
||||
ParserVtable(
|
||||
type=marker.type,
|
||||
line_number=self.line_number,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
class_name=class_name,
|
||||
name=class_name,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -223,14 +220,19 @@ class DecompParser:
|
||||
else:
|
||||
self.state = ReaderState.IN_GLOBAL
|
||||
|
||||
def _variable_done(self):
|
||||
def _variable_done(self, name: str):
|
||||
if not name.startswith("g_"):
|
||||
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
|
||||
|
||||
for marker in self.var_markers.iter():
|
||||
self.variables.append(
|
||||
self._symbols.append(
|
||||
ParserVariable(
|
||||
type=marker.type,
|
||||
line_number=self.line_number,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
name=self.last_line.strip(),
|
||||
name=name,
|
||||
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -246,12 +248,23 @@ class DecompParser:
|
||||
self._syntax_error(ParserError.UNEXPECTED_MARKER)
|
||||
return
|
||||
|
||||
# If we are inside a function, the only markers we accept are:
|
||||
# GLOBAL, indicating a static variable
|
||||
# STRING, indicating a literal string.
|
||||
# Otherwise we assume that the parser missed the end of the function
|
||||
# and we have moved on to something else.
|
||||
# This is unlikely to occur with well-formed code, but
|
||||
# we can recover easily by just ending the function here.
|
||||
if self.state == ReaderState.IN_FUNC and not marker.allowed_in_func():
|
||||
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
||||
self._function_done(unexpected=True)
|
||||
|
||||
# TODO: How uncertain are we of detecting the end of a function
|
||||
# in a clang-formatted file? For now we assume we have missed the
|
||||
# end if we detect a non-GLOBAL marker while state is IN_FUNC.
|
||||
# Maybe these cases should be syntax errors instead
|
||||
|
||||
if marker_is_function(marker):
|
||||
if marker.is_regular_function():
|
||||
if self.state in (
|
||||
ReaderState.SEARCH,
|
||||
ReaderState.WANT_SIG,
|
||||
@@ -259,29 +272,41 @@ class DecompParser:
|
||||
# We will allow multiple offsets if we have just begun
|
||||
# the code block, but not after we hit the curly brace.
|
||||
self._function_marker(marker)
|
||||
elif self.state == ReaderState.IN_FUNC:
|
||||
# We hit another offset unexpectedly.
|
||||
# We can recover easily by just ending the function here.
|
||||
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
||||
self._function_done(unexpected=True)
|
||||
|
||||
# Start the next function right after so we can
|
||||
# read the next line.
|
||||
self._function_marker(marker)
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker_is_synthetic(marker):
|
||||
elif marker.is_template():
|
||||
if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE):
|
||||
self._synthetic_marker(marker)
|
||||
elif self.state == ReaderState.IN_FUNC:
|
||||
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
||||
self._function_done(lookup_by_name=True, unexpected=True)
|
||||
self._synthetic_marker(marker)
|
||||
self._nameref_marker(marker)
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker_is_variable(marker):
|
||||
elif marker.is_synthetic():
|
||||
if self.state in (ReaderState.SEARCH, ReaderState.IN_SYNTHETIC):
|
||||
self._nameref_marker(marker)
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker.is_library():
|
||||
if self.state in (ReaderState.SEARCH, ReaderState.IN_LIBRARY):
|
||||
self._nameref_marker(marker)
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker.is_string():
|
||||
# TODO: We are ignoring string markers for the moment.
|
||||
# We already have a lot of them in the codebase, though, so we'll
|
||||
# hang onto them for now in case we can use them later.
|
||||
# To match up string constants, the strategy will be:
|
||||
# 1. Use cvdump to find all string constants in the recomp
|
||||
# 2. In the original binary, look at relocated vaddrs from .rdata
|
||||
# 3. Try to match up string data from #1 with locations in #2
|
||||
|
||||
# Throw the syntax error we would throw if we were parsing these
|
||||
if self.state not in (ReaderState.SEARCH, ReaderState.IN_FUNC):
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker.is_variable():
|
||||
if self.state in (
|
||||
ReaderState.SEARCH,
|
||||
ReaderState.IN_GLOBAL,
|
||||
@@ -292,13 +317,9 @@ class DecompParser:
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker_is_vtable(marker):
|
||||
elif marker.is_vtable():
|
||||
if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE):
|
||||
self._vtable_marker(marker)
|
||||
elif self.state == ReaderState.IN_FUNC:
|
||||
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
||||
self._function_done(unexpected=True)
|
||||
self._vtable_marker(marker)
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
@@ -322,12 +343,16 @@ class DecompParser:
|
||||
return
|
||||
|
||||
line_strip = line.strip()
|
||||
if self.state == ReaderState.IN_TEMPLATE:
|
||||
# TEMPLATE functions are a special case. The signature is
|
||||
# given on the next line (in a // comment)
|
||||
if self.state in (
|
||||
ReaderState.IN_SYNTHETIC,
|
||||
ReaderState.IN_TEMPLATE,
|
||||
ReaderState.IN_LIBRARY,
|
||||
):
|
||||
# Explicit nameref functions provide the function name
|
||||
# on the next line (in a // comment)
|
||||
name = get_synthetic_name(line)
|
||||
if name is None:
|
||||
self._syntax_error(ParserError.BAD_SYNTHETIC)
|
||||
self._syntax_error(ParserError.BAD_NAMEREF)
|
||||
else:
|
||||
self.function_sig = name
|
||||
self._function_starts_here()
|
||||
@@ -384,8 +409,28 @@ class DecompParser:
|
||||
self._function_done()
|
||||
|
||||
elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL):
|
||||
if not is_blank_or_comment(line):
|
||||
self._variable_done()
|
||||
# TODO: Known problem that an error here will cause us to abandon a
|
||||
# function we have already parsed if state == IN_FUNC_GLOBAL.
|
||||
# However, we are not tolerant of _any_ syntax problems in our
|
||||
# CI actions, so the solution is to just fix the invalid marker.
|
||||
if is_blank_or_comment(line):
|
||||
self._syntax_error(ParserError.NO_SUITABLE_NAME)
|
||||
return
|
||||
|
||||
# We don't have a foolproof mechanism to tell what is and is not a variable.
|
||||
# If the GLOBAL is being declared on a `return` statement, though, this is
|
||||
# not correct. It is either a string literal (which will be handled differently)
|
||||
# or it is not the variable declaration, which is incorrect decomp syntax.
|
||||
if line.strip().startswith("return"):
|
||||
self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE)
|
||||
return
|
||||
|
||||
name = get_variable_name(line)
|
||||
if name is None:
|
||||
self._syntax_error(ParserError.NO_SUITABLE_NAME)
|
||||
return
|
||||
|
||||
self._variable_done(name)
|
||||
|
||||
elif self.state == ReaderState.IN_VTABLE:
|
||||
vtable_class = get_class_name(line)
|
||||
|
||||
@@ -1,17 +1,6 @@
|
||||
# C++ Parser utility functions and data structures
|
||||
from __future__ import annotations # python <3.10 compatibility
|
||||
import re
|
||||
from collections import namedtuple
|
||||
|
||||
DecompMarker = namedtuple("DecompMarker", ["type", "module", "offset"])
|
||||
|
||||
|
||||
markerRegex = re.compile(
|
||||
r"\s*//\s*(\w+):\s*(\w+)\s+(0x[a-f0-9]+)",
|
||||
flags=re.I,
|
||||
)
|
||||
|
||||
markerExactRegex = re.compile(r"\s*// ([A-Z]+): ([A-Z0-9]+) (0x[a-f0-9]+)$")
|
||||
from typing import Optional
|
||||
|
||||
# The goal here is to just read whatever is on the next line, so some
|
||||
# flexibility in the formatting seems OK
|
||||
@@ -23,7 +12,7 @@ templateCommentRegex = re.compile(r"\s*//\s+(.*)")
|
||||
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
||||
|
||||
|
||||
def get_synthetic_name(line: str) -> str | None:
|
||||
def get_synthetic_name(line: str) -> Optional[str]:
|
||||
"""Synthetic names appear on a single line comment on the line after the marker.
|
||||
If that's not what we have, return None"""
|
||||
template_match = templateCommentRegex.match(line)
|
||||
@@ -51,20 +40,6 @@ def is_blank_or_comment(line: str) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def match_marker(line: str) -> DecompMarker | None:
|
||||
match = markerRegex.match(line)
|
||||
if match is None:
|
||||
return None
|
||||
|
||||
return DecompMarker(
|
||||
type=match.group(1), module=match.group(2), offset=int(match.group(3), 16)
|
||||
)
|
||||
|
||||
|
||||
def is_marker_exact(line: str) -> bool:
|
||||
return markerExactRegex.match(line) is not None
|
||||
|
||||
|
||||
template_class_decl_regex = re.compile(
|
||||
r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)<([\w]+)\s*(\*+)?\s*>"
|
||||
)
|
||||
@@ -73,7 +48,7 @@ template_class_decl_regex = re.compile(
|
||||
class_decl_regex = re.compile(r"\s*(?:\/\/)?\s*(?:class|struct) (\w+)")
|
||||
|
||||
|
||||
def get_class_name(line: str) -> str | None:
|
||||
def get_class_name(line: str) -> Optional[str]:
|
||||
"""For VTABLE markers, extract the class name from the code line or comment
|
||||
where it appears."""
|
||||
|
||||
@@ -93,3 +68,21 @@ def get_class_name(line: str) -> str | None:
|
||||
return match.group(1)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
global_regex = re.compile(r"(?P<name>g_\w+)")
|
||||
less_strict_global_regex = re.compile(r"(?P<name>\w+)(?:\)\(|\[.*|\s*=.*|;)")
|
||||
|
||||
|
||||
def get_variable_name(line: str) -> Optional[str]:
|
||||
"""Grab the name of the variable annotated with the GLOBAL marker.
|
||||
Correct syntax would have the variable start with the prefix "g_"
|
||||
but we will try to match regardless."""
|
||||
|
||||
if (match := global_regex.search(line)) is not None:
|
||||
return match.group("name")
|
||||
|
||||
if (match := less_strict_global_regex.search(line)) is not None:
|
||||
return match.group("name")
|
||||
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user