mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-26 09:54:18 +00:00
parser: Identify namespaces (#499)
This commit is contained in:
@@ -68,4 +68,9 @@ def demangle_vtable(symbol: str) -> str:
|
||||
|
||||
return f"{class_name}<{generic}>"
|
||||
|
||||
# If we have two classes listed, it is a namespace hierarchy.
|
||||
# @@6B@ is a common generic suffix for these vtable symbols.
|
||||
if t[1] != "" and t[1] != "6B":
|
||||
return t[1] + "::" + t[0]
|
||||
|
||||
return t[0]
|
||||
|
||||
@@ -8,6 +8,8 @@ from .util import (
|
||||
get_synthetic_name,
|
||||
remove_trailing_comment,
|
||||
get_string_contents,
|
||||
sanitize_code_line,
|
||||
scopeDetectRegex,
|
||||
)
|
||||
from .marker import (
|
||||
DecompMarker,
|
||||
@@ -59,6 +61,57 @@ class MarkerDict:
|
||||
self.markers = {}
|
||||
|
||||
|
||||
class CurlyManager:
|
||||
"""Overly simplified scope manager"""
|
||||
|
||||
def __init__(self):
|
||||
self._stack = []
|
||||
|
||||
def reset(self):
|
||||
self._stack = []
|
||||
|
||||
def _pop(self):
|
||||
"""Pop stack safely"""
|
||||
try:
|
||||
self._stack.pop()
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
def get_prefix(self, name: Optional[str] = None) -> str:
|
||||
"""Return the prefix for where we are."""
|
||||
|
||||
scopes = [t for t in self._stack if t != "{"]
|
||||
if len(scopes) == 0:
|
||||
return name if name is not None else ""
|
||||
|
||||
if name is not None and name not in scopes:
|
||||
scopes.append(name)
|
||||
|
||||
return "::".join(scopes)
|
||||
|
||||
def read_line(self, raw_line: str):
|
||||
"""Read a line of code and update the stack."""
|
||||
line = sanitize_code_line(raw_line)
|
||||
if (match := scopeDetectRegex.match(line)) is not None:
|
||||
if not line.endswith(";"):
|
||||
self._stack.append(match.group("name"))
|
||||
|
||||
change = line.count("{") - line.count("}")
|
||||
if change > 0:
|
||||
for _ in range(change):
|
||||
self._stack.append("{")
|
||||
elif change < 0:
|
||||
for _ in range(-change):
|
||||
self._pop()
|
||||
|
||||
if len(self._stack) == 0:
|
||||
return
|
||||
|
||||
last = self._stack[-1]
|
||||
if last != "{":
|
||||
self._pop()
|
||||
|
||||
|
||||
class DecompParser:
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
# Could combine output lists into a single list to get under the limit,
|
||||
@@ -73,6 +126,8 @@ class DecompParser:
|
||||
|
||||
self.last_line: str = ""
|
||||
|
||||
self.curly = CurlyManager()
|
||||
|
||||
# To allow for multiple markers where code is shared across different
|
||||
# modules, save lists of compatible markers that appear in sequence
|
||||
self.fun_markers = MarkerDict()
|
||||
@@ -110,6 +165,8 @@ class DecompParser:
|
||||
self.function_start = 0
|
||||
self.function_sig = ""
|
||||
|
||||
self.curly.reset()
|
||||
|
||||
@property
|
||||
def functions(self) -> List[ParserFunction]:
|
||||
return [s for s in self._symbols if isinstance(s, ParserFunction)]
|
||||
@@ -213,7 +270,7 @@ class DecompParser:
|
||||
line_number=self.line_number,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
name=class_name,
|
||||
name=self.curly.get_prefix(class_name),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -254,7 +311,7 @@ class DecompParser:
|
||||
line_number=self.line_number,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
name=variable_name,
|
||||
name=self.curly.get_prefix(variable_name),
|
||||
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
|
||||
)
|
||||
)
|
||||
@@ -353,6 +410,8 @@ class DecompParser:
|
||||
self._handle_marker(marker)
|
||||
return
|
||||
|
||||
self.curly.read_line(line)
|
||||
|
||||
line_strip = line.strip()
|
||||
if self.state in (
|
||||
ReaderState.IN_SYNTHETIC,
|
||||
@@ -451,8 +510,11 @@ class DecompParser:
|
||||
variable_name = get_variable_name(line)
|
||||
# This is out of our control for library variables, but all of our
|
||||
# variables should start with "g_".
|
||||
if variable_name is not None and not variable_name.startswith("g_"):
|
||||
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
|
||||
if variable_name is not None:
|
||||
# Before checking for the prefix, remove the
|
||||
# namespace chain if there is one.
|
||||
if not variable_name.split("::")[-1].startswith("g_"):
|
||||
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
|
||||
|
||||
string_name = get_string_contents(line)
|
||||
|
||||
|
||||
@@ -7,15 +7,25 @@ from ast import literal_eval
|
||||
# flexibility in the formatting seems OK
|
||||
templateCommentRegex = re.compile(r"\s*//\s+(.*)")
|
||||
|
||||
|
||||
# To remove any comment (//) or block comment (/*) and its leading spaces
|
||||
# from the end of a code line
|
||||
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
||||
|
||||
# Get char contents, ignore escape characters
|
||||
singleQuoteRegex = re.compile(r"('(?:[^\'\\]|\\.)')")
|
||||
|
||||
# Match contents of block comment on one line
|
||||
blockCommentRegex = re.compile(r"(/\*.*?\*/)")
|
||||
|
||||
# Match contents of single comment on one line
|
||||
regularCommentRegex = re.compile(r"(//.*)")
|
||||
|
||||
# Get string contents, ignore escape characters that might interfere
|
||||
doubleQuoteRegex = re.compile(r"(\"(?:[^\"\\]|\\.)*\")")
|
||||
|
||||
# Detect a line that would cause us to enter a new scope
|
||||
scopeDetectRegex = re.compile(r"(?:class|struct|namespace) (?P<name>\w+).*(?:{)?")
|
||||
|
||||
|
||||
def get_synthetic_name(line: str) -> Optional[str]:
|
||||
"""Synthetic names appear on a single line comment on the line after the marker.
|
||||
@@ -28,6 +38,20 @@ def get_synthetic_name(line: str) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def sanitize_code_line(line: str) -> str:
|
||||
"""Helper for scope manager. Removes sections from a code line
|
||||
that would cause us to incorrectly detect curly brackets.
|
||||
This is a very naive implementation and fails entirely on multi-line
|
||||
strings or comments."""
|
||||
|
||||
line = singleQuoteRegex.sub("''", line)
|
||||
line = doubleQuoteRegex.sub('""', line)
|
||||
line = blockCommentRegex.sub("", line)
|
||||
line = regularCommentRegex.sub("", line)
|
||||
|
||||
return line.strip()
|
||||
|
||||
|
||||
def remove_trailing_comment(line: str) -> str:
|
||||
return trailingCommentRegex.sub("", line)
|
||||
|
||||
@@ -75,8 +99,8 @@ def get_class_name(line: str) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
global_regex = re.compile(r"(?P<name>g_\w+)")
|
||||
less_strict_global_regex = re.compile(r"(?P<name>\w+)(?:\)\(|\[.*|\s*=.*|;)")
|
||||
global_regex = re.compile(r"(?P<name>(?:\w+::)*g_\w+)")
|
||||
less_strict_global_regex = re.compile(r"(?P<name>(?:\w+::)*\w+)(?:\)\(|\[.*|\s*=.*|;)")
|
||||
|
||||
|
||||
def get_variable_name(line: str) -> Optional[str]:
|
||||
|
||||
Reference in New Issue
Block a user