parser: Identify namespaces (#499)

This commit is contained in:
MS
2024-01-28 09:25:45 -05:00
committed by GitHub
parent d9c4151bb8
commit 4137cd75e6
6 changed files with 280 additions and 7 deletions

View File

@@ -68,4 +68,9 @@ def demangle_vtable(symbol: str) -> str:
return f"{class_name}<{generic}>"
# If we have two classes listed, it is a namespace hierarchy.
# @@6B@ is a common generic suffix for these vtable symbols.
if t[1] != "" and t[1] != "6B":
return t[1] + "::" + t[0]
return t[0]

View File

@@ -8,6 +8,8 @@ from .util import (
get_synthetic_name,
remove_trailing_comment,
get_string_contents,
sanitize_code_line,
scopeDetectRegex,
)
from .marker import (
DecompMarker,
@@ -59,6 +61,57 @@ class MarkerDict:
self.markers = {}
class CurlyManager:
"""Overly simplified scope manager"""
def __init__(self):
self._stack = []
def reset(self):
self._stack = []
def _pop(self):
"""Pop stack safely"""
try:
self._stack.pop()
except IndexError:
pass
def get_prefix(self, name: Optional[str] = None) -> str:
"""Return the prefix for where we are."""
scopes = [t for t in self._stack if t != "{"]
if len(scopes) == 0:
return name if name is not None else ""
if name is not None and name not in scopes:
scopes.append(name)
return "::".join(scopes)
def read_line(self, raw_line: str):
"""Read a line of code and update the stack."""
line = sanitize_code_line(raw_line)
if (match := scopeDetectRegex.match(line)) is not None:
if not line.endswith(";"):
self._stack.append(match.group("name"))
change = line.count("{") - line.count("}")
if change > 0:
for _ in range(change):
self._stack.append("{")
elif change < 0:
for _ in range(-change):
self._pop()
if len(self._stack) == 0:
return
last = self._stack[-1]
if last != "{":
self._pop()
class DecompParser:
# pylint: disable=too-many-instance-attributes
# Could combine output lists into a single list to get under the limit,
@@ -73,6 +126,8 @@ class DecompParser:
self.last_line: str = ""
self.curly = CurlyManager()
# To allow for multiple markers where code is shared across different
# modules, save lists of compatible markers that appear in sequence
self.fun_markers = MarkerDict()
@@ -110,6 +165,8 @@ class DecompParser:
self.function_start = 0
self.function_sig = ""
self.curly.reset()
@property
def functions(self) -> List[ParserFunction]:
return [s for s in self._symbols if isinstance(s, ParserFunction)]
@@ -213,7 +270,7 @@ class DecompParser:
line_number=self.line_number,
module=marker.module,
offset=marker.offset,
name=class_name,
name=self.curly.get_prefix(class_name),
)
)
@@ -254,7 +311,7 @@ class DecompParser:
line_number=self.line_number,
module=marker.module,
offset=marker.offset,
name=variable_name,
name=self.curly.get_prefix(variable_name),
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
)
)
@@ -353,6 +410,8 @@ class DecompParser:
self._handle_marker(marker)
return
self.curly.read_line(line)
line_strip = line.strip()
if self.state in (
ReaderState.IN_SYNTHETIC,
@@ -451,8 +510,11 @@ class DecompParser:
variable_name = get_variable_name(line)
# This is out of our control for library variables, but all of our
# variables should start with "g_".
if variable_name is not None and not variable_name.startswith("g_"):
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
if variable_name is not None:
# Before checking for the prefix, remove the
# namespace chain if there is one.
if not variable_name.split("::")[-1].startswith("g_"):
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
string_name = get_string_contents(line)

View File

@@ -7,15 +7,25 @@ from ast import literal_eval
# flexibility in the formatting seems OK
templateCommentRegex = re.compile(r"\s*//\s+(.*)")
# To remove any comment (//) or block comment (/*) and its leading spaces
# from the end of a code line
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
# Get char contents, ignore escape characters
singleQuoteRegex = re.compile(r"('(?:[^\'\\]|\\.)')")
# Match contents of block comment on one line
blockCommentRegex = re.compile(r"(/\*.*?\*/)")
# Match contents of single comment on one line
regularCommentRegex = re.compile(r"(//.*)")
# Get string contents, ignore escape characters that might interfere
doubleQuoteRegex = re.compile(r"(\"(?:[^\"\\]|\\.)*\")")
# Detect a line that would cause us to enter a new scope
scopeDetectRegex = re.compile(r"(?:class|struct|namespace) (?P<name>\w+).*(?:{)?")
def get_synthetic_name(line: str) -> Optional[str]:
"""Synthetic names appear on a single line comment on the line after the marker.
@@ -28,6 +38,20 @@ def get_synthetic_name(line: str) -> Optional[str]:
return None
def sanitize_code_line(line: str) -> str:
"""Helper for scope manager. Removes sections from a code line
that would cause us to incorrectly detect curly brackets.
This is a very naive implementation and fails entirely on multi-line
strings or comments."""
line = singleQuoteRegex.sub("''", line)
line = doubleQuoteRegex.sub('""', line)
line = blockCommentRegex.sub("", line)
line = regularCommentRegex.sub("", line)
return line.strip()
def remove_trailing_comment(line: str) -> str:
return trailingCommentRegex.sub("", line)
@@ -75,8 +99,8 @@ def get_class_name(line: str) -> Optional[str]:
return None
global_regex = re.compile(r"(?P<name>g_\w+)")
less_strict_global_regex = re.compile(r"(?P<name>\w+)(?:\)\(|\[.*|\s*=.*|;)")
global_regex = re.compile(r"(?P<name>(?:\w+::)*g_\w+)")
less_strict_global_regex = re.compile(r"(?P<name>(?:\w+::)*\w+)(?:\)\(|\[.*|\s*=.*|;)")
def get_variable_name(line: str) -> Optional[str]: