parser: Identify namespaces (#499)

2025-12-11 08:33:13 +00:00 · 2024-01-28 09:25:45 -05:00
parent d9c4151bb8
commit 4137cd75e6
6 changed files with 280 additions and 7 deletions
--- a/tools/isledecomp/isledecomp/cvdump/demangler.py
+++ b/tools/isledecomp/isledecomp/cvdump/demangler.py
@@ -68,4 +68,9 @@ def demangle_vtable(symbol: str) -> str:

        return f"{class_name}<{generic}>"

+    # If we have two classes listed, it is a namespace hierarchy.
+    # @@6B@ is a common generic suffix for these vtable symbols.
+    if t[1] != "" and t[1] != "6B":
+        return t[1] + "::" + t[0]
+
    return t[0]
--- a/tools/isledecomp/isledecomp/parser/parser.py
+++ b/tools/isledecomp/isledecomp/parser/parser.py
@@ -8,6 +8,8 @@ from .util import (
    get_synthetic_name,
    remove_trailing_comment,
    get_string_contents,
+    sanitize_code_line,
+    scopeDetectRegex,
 )
 from .marker import (
    DecompMarker,
@@ -59,6 +61,57 @@ class MarkerDict:
        self.markers = {}


+class CurlyManager:
+    """Overly simplified scope manager"""
+
+    def __init__(self):
+        self._stack = []
+
+    def reset(self):
+        self._stack = []
+
+    def _pop(self):
+        """Pop stack safely"""
+        try:
+            self._stack.pop()
+        except IndexError:
+            pass
+
+    def get_prefix(self, name: Optional[str] = None) -> str:
+        """Return the prefix for where we are."""
+
+        scopes = [t for t in self._stack if t != "{"]
+        if len(scopes) == 0:
+            return name if name is not None else ""
+
+        if name is not None and name not in scopes:
+            scopes.append(name)
+
+        return "::".join(scopes)
+
+    def read_line(self, raw_line: str):
+        """Read a line of code and update the stack."""
+        line = sanitize_code_line(raw_line)
+        if (match := scopeDetectRegex.match(line)) is not None:
+            if not line.endswith(";"):
+                self._stack.append(match.group("name"))
+
+        change = line.count("{") - line.count("}")
+        if change > 0:
+            for _ in range(change):
+                self._stack.append("{")
+        elif change < 0:
+            for _ in range(-change):
+                self._pop()
+
+            if len(self._stack) == 0:
+                return
+
+            last = self._stack[-1]
+            if last != "{":
+                self._pop()
+
+
 class DecompParser:
    # pylint: disable=too-many-instance-attributes
    # Could combine output lists into a single list to get under the limit,
@@ -73,6 +126,8 @@ class DecompParser:

        self.last_line: str = ""

+        self.curly = CurlyManager()
+
        # To allow for multiple markers where code is shared across different
        # modules, save lists of compatible markers that appear in sequence
        self.fun_markers = MarkerDict()
@@ -110,6 +165,8 @@ class DecompParser:
        self.function_start = 0
        self.function_sig = ""

+        self.curly.reset()
+
    @property
    def functions(self) -> List[ParserFunction]:
        return [s for s in self._symbols if isinstance(s, ParserFunction)]
@@ -213,7 +270,7 @@ class DecompParser:
                    line_number=self.line_number,
                    module=marker.module,
                    offset=marker.offset,
-                    name=class_name,
+                    name=self.curly.get_prefix(class_name),
                )
            )

@@ -254,7 +311,7 @@ class DecompParser:
                        line_number=self.line_number,
                        module=marker.module,
                        offset=marker.offset,
-                        name=variable_name,
+                        name=self.curly.get_prefix(variable_name),
                        is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
                    )
                )
@@ -353,6 +410,8 @@ class DecompParser:
            self._handle_marker(marker)
            return

+        self.curly.read_line(line)
+
        line_strip = line.strip()
        if self.state in (
            ReaderState.IN_SYNTHETIC,
@@ -451,8 +510,11 @@ class DecompParser:
                    variable_name = get_variable_name(line)
                    # This is out of our control for library variables, but all of our
                    # variables should start with "g_".
-                    if variable_name is not None and not variable_name.startswith("g_"):
-                        self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
+                    if variable_name is not None:
+                        # Before checking for the prefix, remove the
+                        # namespace chain if there is one.
+                        if not variable_name.split("::")[-1].startswith("g_"):
+                            self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)

            string_name = get_string_contents(line)

--- a/tools/isledecomp/isledecomp/parser/util.py
+++ b/tools/isledecomp/isledecomp/parser/util.py
@@ -7,15 +7,25 @@ from ast import literal_eval
 # flexibility in the formatting seems OK
 templateCommentRegex = re.compile(r"\s*//\s+(.*)")

-
 # To remove any comment (//) or block comment (/*) and its leading spaces
 # from the end of a code line
 trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")

+# Get char contents, ignore escape characters
+singleQuoteRegex = re.compile(r"('(?:[^\'\\]|\\.)')")
+
+# Match contents of block comment on one line
+blockCommentRegex = re.compile(r"(/\*.*?\*/)")
+
+# Match contents of single comment on one line
+regularCommentRegex = re.compile(r"(//.*)")

 # Get string contents, ignore escape characters that might interfere
 doubleQuoteRegex = re.compile(r"(\"(?:[^\"\\]|\\.)*\")")

+# Detect a line that would cause us to enter a new scope
+scopeDetectRegex = re.compile(r"(?:class|struct|namespace) (?P<name>\w+).*(?:{)?")
+

 def get_synthetic_name(line: str) -> Optional[str]:
    """Synthetic names appear on a single line comment on the line after the marker.
@@ -28,6 +38,20 @@ def get_synthetic_name(line: str) -> Optional[str]:
    return None


+def sanitize_code_line(line: str) -> str:
+    """Helper for scope manager. Removes sections from a code line
+    that would cause us to incorrectly detect curly brackets.
+    This is a very naive implementation and fails entirely on multi-line
+    strings or comments."""
+
+    line = singleQuoteRegex.sub("''", line)
+    line = doubleQuoteRegex.sub('""', line)
+    line = blockCommentRegex.sub("", line)
+    line = regularCommentRegex.sub("", line)
+
+    return line.strip()
+
+
 def remove_trailing_comment(line: str) -> str:
    return trailingCommentRegex.sub("", line)

@@ -75,8 +99,8 @@ def get_class_name(line: str) -> Optional[str]:
    return None


-global_regex = re.compile(r"(?P<name>g_\w+)")
-less_strict_global_regex = re.compile(r"(?P<name>\w+)(?:\)\(|\[.*|\s*=.*|;)")
+global_regex = re.compile(r"(?P<name>(?:\w+::)*g_\w+)")
+less_strict_global_regex = re.compile(r"(?P<name>(?:\w+::)*\w+)(?:\)\(|\[.*|\s*=.*|;)")


 def get_variable_name(line: str) -> Optional[str]: