mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-23 00:14:22 +00:00
Decomp parser: indirect globals and string markers (#446)
* Enable string annotations and indirect globals * Adding some STRING annotations * Library functions
This commit is contained in:
@@ -85,13 +85,19 @@ class Compare:
|
||||
|
||||
if sym.node_type == SymbolType.STRING:
|
||||
string_info = demangle_string_const(sym.decorated_name)
|
||||
if string_info is None:
|
||||
logger.debug(
|
||||
"Could not demangle string symbol: %s", sym.decorated_name
|
||||
)
|
||||
continue
|
||||
|
||||
# TODO: skip unicode for now. will need to handle these differently.
|
||||
if string_info.is_utf16:
|
||||
continue
|
||||
|
||||
raw = self.recomp_bin.read(addr, sym.size())
|
||||
try:
|
||||
sym.friendly_name = raw.decode("latin1")
|
||||
sym.friendly_name = raw.decode("latin1").rstrip("\x00")
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
@@ -134,6 +140,26 @@ class Compare:
|
||||
for tbl in codebase.iter_vtables():
|
||||
self._db.match_vtable(tbl.offset, tbl.name)
|
||||
|
||||
for string in codebase.iter_strings():
|
||||
# Not that we don't trust you, but we're checking the string
|
||||
# annotation to make sure it is accurate.
|
||||
try:
|
||||
# TODO: would presumably fail for wchar_t strings
|
||||
orig = self.orig_bin.read_string(string.offset).decode("latin1")
|
||||
string_correct = string.name == orig
|
||||
except UnicodeDecodeError:
|
||||
string_correct = False
|
||||
|
||||
if not string_correct:
|
||||
logger.error(
|
||||
"Data at 0x%x does not match string %s",
|
||||
string.offset,
|
||||
repr(string.name),
|
||||
)
|
||||
continue
|
||||
|
||||
self._db.match_string(string.offset, string.name)
|
||||
|
||||
def _find_original_strings(self):
|
||||
"""Go to the original binary and look for the specified string constants
|
||||
to find a match. This is a (relatively) expensive operation so we only
|
||||
|
@@ -43,7 +43,8 @@ class MatchInfo:
|
||||
return None
|
||||
|
||||
ctype = self.compare_type.name if self.compare_type is not None else "UNK"
|
||||
return f"{self.name} ({ctype})"
|
||||
name = repr(self.name) if ctype == "STRING" else self.name
|
||||
return f"{name} ({ctype})"
|
||||
|
||||
|
||||
def matchinfo_factory(_, row):
|
||||
@@ -197,3 +198,5 @@ class CompareDb:
|
||||
if not did_match:
|
||||
escaped = repr(value)
|
||||
logger.error("Failed to find string: %s", escaped)
|
||||
|
||||
return did_match
|
||||
|
@@ -94,7 +94,11 @@ class CvdumpNode:
|
||||
def name(self) -> Optional[str]:
|
||||
"""Prefer "friendly" name if we have it.
|
||||
This is what we have been using to match functions."""
|
||||
return self.friendly_name or self.decorated_name
|
||||
return (
|
||||
self.friendly_name
|
||||
if self.friendly_name is not None
|
||||
else self.decorated_name
|
||||
)
|
||||
|
||||
def size(self) -> Optional[int]:
|
||||
if self.confirmed_size is not None:
|
||||
|
@@ -4,6 +4,7 @@ https://en.wikiversity.org/wiki/Visual_C%2B%2B_name_mangling
|
||||
"""
|
||||
import re
|
||||
from collections import namedtuple
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class InvalidEncodedNumberError(Exception):
|
||||
@@ -30,13 +31,12 @@ string_const_regex = re.compile(
|
||||
StringConstInfo = namedtuple("StringConstInfo", "len is_utf16")
|
||||
|
||||
|
||||
def demangle_string_const(symbol: str) -> StringConstInfo:
|
||||
def demangle_string_const(symbol: str) -> Optional[StringConstInfo]:
|
||||
"""Don't bother to decode the string text from the symbol.
|
||||
We can just read it from the binary once we have the length."""
|
||||
match = string_const_regex.match(symbol)
|
||||
if match is None:
|
||||
# See below
|
||||
return StringConstInfo(0, False)
|
||||
return None
|
||||
|
||||
try:
|
||||
strlen = (
|
||||
@@ -45,10 +45,7 @@ def demangle_string_const(symbol: str) -> StringConstInfo:
|
||||
else int(match.group("len"))
|
||||
)
|
||||
except (ValueError, InvalidEncodedNumberError):
|
||||
# This would be an annoying error to fail on if we get a bad symbol.
|
||||
# For now, just assume a zero length string because this will probably
|
||||
# raise some eyebrows during the comparison.
|
||||
strlen = 0
|
||||
return None
|
||||
|
||||
is_utf16 = match.group("is_utf16") == "1"
|
||||
return StringConstInfo(len=strlen, is_utf16=is_utf16)
|
||||
|
@@ -6,6 +6,7 @@ from .node import (
|
||||
ParserFunction,
|
||||
ParserVtable,
|
||||
ParserVariable,
|
||||
ParserString,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,3 +43,6 @@ class DecompCodebase:
|
||||
|
||||
def iter_variables(self) -> Iterator[ParserVariable]:
|
||||
return filter(lambda s: isinstance(s, ParserVariable), self._symbols)
|
||||
|
||||
def iter_strings(self) -> Iterator[ParserString]:
|
||||
return filter(lambda s: isinstance(s, ParserString), self._symbols)
|
||||
|
@@ -70,6 +70,10 @@ class ParserError(Enum):
|
||||
# a comment -- i.e. VTABLE or GLOBAL -- could not extract the name
|
||||
NO_SUITABLE_NAME = 204
|
||||
|
||||
# ERROR: Two STRING markers have the same module and offset, but the strings
|
||||
# they annotate are different.
|
||||
WRONG_STRING = 205
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserAlert:
|
||||
|
@@ -1,7 +1,7 @@
|
||||
from typing import List, Optional
|
||||
from .parser import DecompParser
|
||||
from .error import ParserAlert, ParserError
|
||||
from .node import ParserSymbol
|
||||
from .node import ParserSymbol, ParserString
|
||||
|
||||
|
||||
def get_checkorder_filter(module):
|
||||
@@ -19,6 +19,9 @@ class DecompLinter:
|
||||
# This is _not_ reset between files and is intended to report offset reuse
|
||||
# when scanning the entire directory.
|
||||
self._offsets_used = set()
|
||||
# Keep track of strings we have seen. Persists across files.
|
||||
# Module/offset can be repeated for string markers but the strings must match.
|
||||
self._strings = {}
|
||||
|
||||
def reset(self, full_reset: bool = False):
|
||||
self.alerts = []
|
||||
@@ -28,6 +31,7 @@ class DecompLinter:
|
||||
|
||||
if full_reset:
|
||||
self._offsets_used.clear()
|
||||
self._strings = {}
|
||||
|
||||
def file_is_header(self):
|
||||
return self._filename.lower().endswith(".h")
|
||||
@@ -36,17 +40,31 @@ class DecompLinter:
|
||||
"""Helper for loading (module, offset) tuples while the DecompParser
|
||||
has them broken up into three different lists."""
|
||||
for marker in marker_list:
|
||||
is_string = isinstance(marker, ParserString)
|
||||
|
||||
value = (marker.module, marker.offset)
|
||||
if value in self._offsets_used:
|
||||
self.alerts.append(
|
||||
ParserAlert(
|
||||
code=ParserError.DUPLICATE_OFFSET,
|
||||
line_number=marker.line_number,
|
||||
line=f"0x{marker.offset:08x}",
|
||||
if is_string:
|
||||
if self._strings[value] != marker.name:
|
||||
self.alerts.append(
|
||||
ParserAlert(
|
||||
code=ParserError.WRONG_STRING,
|
||||
line_number=marker.line_number,
|
||||
line=f"0x{marker.offset:08x}, {repr(self._strings[value])} vs. {repr(marker.name)}",
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.alerts.append(
|
||||
ParserAlert(
|
||||
code=ParserError.DUPLICATE_OFFSET,
|
||||
line_number=marker.line_number,
|
||||
line=f"0x{marker.offset:08x}",
|
||||
)
|
||||
)
|
||||
)
|
||||
else:
|
||||
self._offsets_used.add(value)
|
||||
if is_string:
|
||||
self._strings[value] = marker.name
|
||||
|
||||
def _check_function_order(self):
|
||||
"""Rules:
|
||||
@@ -82,6 +100,7 @@ class DecompLinter:
|
||||
self._load_offsets_from_list(self._parser.functions)
|
||||
self._load_offsets_from_list(self._parser.vtables)
|
||||
self._load_offsets_from_list(self._parser.variables)
|
||||
self._load_offsets_from_list(self._parser.strings)
|
||||
|
||||
def _check_byname_allowed(self):
|
||||
if self.file_is_header():
|
||||
|
@@ -3,6 +3,19 @@ from typing import Optional
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class MarkerCategory(Enum):
|
||||
"""For the purposes of grouping multiple different DecompMarkers together,
|
||||
assign a rough "category" for the MarkerType values below.
|
||||
It's really only the function types that have to get folded down, but
|
||||
we'll do that in a structured way to permit future expansion."""
|
||||
|
||||
FUNCTION = 1
|
||||
VARIABLE = 2
|
||||
STRING = 3
|
||||
VTABLE = 4
|
||||
ADDRESS = 100 # i.e. no comparison required or possible
|
||||
|
||||
|
||||
class MarkerType(Enum):
|
||||
UNKNOWN = -100
|
||||
FUNCTION = 1
|
||||
@@ -51,6 +64,23 @@ class DecompMarker:
|
||||
def offset(self) -> int:
|
||||
return self._offset
|
||||
|
||||
@property
|
||||
def category(self) -> MarkerCategory:
|
||||
if self.is_vtable():
|
||||
return MarkerCategory.VTABLE
|
||||
|
||||
if self.is_variable():
|
||||
return MarkerCategory.VARIABLE
|
||||
|
||||
if self.is_string():
|
||||
return MarkerCategory.STRING
|
||||
|
||||
# TODO: worth another look if we add more types, but this covers it
|
||||
if self.is_regular_function() or self.is_explicit_byname():
|
||||
return MarkerCategory.FUNCTION
|
||||
|
||||
return MarkerCategory.ADDRESS
|
||||
|
||||
def is_regular_function(self) -> bool:
|
||||
"""Regular function, meaning: not an explicit byname lookup. FUNCTION
|
||||
markers can be _implicit_ byname.
|
||||
|
@@ -55,3 +55,8 @@ class ParserVariable(ParserSymbol):
|
||||
@dataclass
|
||||
class ParserVtable(ParserSymbol):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserString(ParserSymbol):
|
||||
pass
|
||||
|
@@ -3,11 +3,11 @@
|
||||
from typing import List, Iterable, Iterator, Optional
|
||||
from enum import Enum
|
||||
from .util import (
|
||||
is_blank_or_comment,
|
||||
get_class_name,
|
||||
get_variable_name,
|
||||
get_synthetic_name,
|
||||
remove_trailing_comment,
|
||||
get_string_contents,
|
||||
)
|
||||
from .marker import (
|
||||
DecompMarker,
|
||||
@@ -19,6 +19,7 @@ from .node import (
|
||||
ParserFunction,
|
||||
ParserVariable,
|
||||
ParserVtable,
|
||||
ParserString,
|
||||
)
|
||||
from .error import ParserAlert, ParserError
|
||||
|
||||
@@ -43,17 +44,16 @@ class MarkerDict:
|
||||
|
||||
def insert(self, marker: DecompMarker) -> bool:
|
||||
"""Return True if this insert would overwrite"""
|
||||
module = marker.module
|
||||
if module in self.markers:
|
||||
key = (marker.category, marker.module)
|
||||
if key in self.markers:
|
||||
return True
|
||||
|
||||
# TODO: type converted back to string version here instead of using enum
|
||||
self.markers[module] = (marker.type.name, marker.offset)
|
||||
self.markers[key] = marker
|
||||
return False
|
||||
|
||||
def iter(self) -> Iterator[DecompMarker]:
|
||||
for module, (marker_type, offset) in self.markers.items():
|
||||
yield DecompMarker(marker_type, module, offset)
|
||||
for _, marker in self.markers.items():
|
||||
yield marker
|
||||
|
||||
def empty(self):
|
||||
self.markers = {}
|
||||
@@ -111,17 +111,21 @@ class DecompParser:
|
||||
self.function_sig = ""
|
||||
|
||||
@property
|
||||
def functions(self) -> List[ParserSymbol]:
|
||||
def functions(self) -> List[ParserFunction]:
|
||||
return [s for s in self._symbols if isinstance(s, ParserFunction)]
|
||||
|
||||
@property
|
||||
def vtables(self) -> List[ParserSymbol]:
|
||||
def vtables(self) -> List[ParserVtable]:
|
||||
return [s for s in self._symbols if isinstance(s, ParserVtable)]
|
||||
|
||||
@property
|
||||
def variables(self) -> List[ParserSymbol]:
|
||||
def variables(self) -> List[ParserVariable]:
|
||||
return [s for s in self._symbols if isinstance(s, ParserVariable)]
|
||||
|
||||
@property
|
||||
def strings(self) -> List[ParserString]:
|
||||
return [s for s in self._symbols if isinstance(s, ParserString)]
|
||||
|
||||
def iter_symbols(self, module: Optional[str] = None) -> Iterator[ParserSymbol]:
|
||||
for s in self._symbols:
|
||||
if module is None or s.module == module:
|
||||
@@ -225,21 +229,35 @@ class DecompParser:
|
||||
else:
|
||||
self.state = ReaderState.IN_GLOBAL
|
||||
|
||||
def _variable_done(self, name: str):
|
||||
if not name.startswith("g_"):
|
||||
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
|
||||
def _variable_done(
|
||||
self, variable_name: Optional[str] = None, string_value: Optional[str] = None
|
||||
):
|
||||
if variable_name is None and string_value is None:
|
||||
self._syntax_error(ParserError.NO_SUITABLE_NAME)
|
||||
return
|
||||
|
||||
for marker in self.var_markers.iter():
|
||||
self._symbols.append(
|
||||
ParserVariable(
|
||||
type=marker.type,
|
||||
line_number=self.line_number,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
name=name,
|
||||
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
|
||||
if marker.is_string():
|
||||
self._symbols.append(
|
||||
ParserString(
|
||||
type=marker.type,
|
||||
line_number=self.line_number,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
name=string_value,
|
||||
)
|
||||
)
|
||||
else:
|
||||
self._symbols.append(
|
||||
ParserVariable(
|
||||
type=marker.type,
|
||||
line_number=self.line_number,
|
||||
module=marker.module,
|
||||
offset=marker.offset,
|
||||
name=variable_name,
|
||||
is_static=self.state == ReaderState.IN_FUNC_GLOBAL,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
self.var_markers.empty()
|
||||
if self.state == ReaderState.IN_FUNC_GLOBAL:
|
||||
@@ -298,20 +316,8 @@ class DecompParser:
|
||||
else:
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker.is_string():
|
||||
# TODO: We are ignoring string markers for the moment.
|
||||
# We already have a lot of them in the codebase, though, so we'll
|
||||
# hang onto them for now in case we can use them later.
|
||||
# To match up string constants, the strategy will be:
|
||||
# 1. Use cvdump to find all string constants in the recomp
|
||||
# 2. In the original binary, look at relocated vaddrs from .rdata
|
||||
# 3. Try to match up string data from #1 with locations in #2
|
||||
|
||||
# Throw the syntax error we would throw if we were parsing these
|
||||
if self.state not in (ReaderState.SEARCH, ReaderState.IN_FUNC):
|
||||
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||
|
||||
elif marker.is_variable():
|
||||
# Strings and variables are almost the same thing
|
||||
elif marker.is_string() or marker.is_variable():
|
||||
if self.state in (
|
||||
ReaderState.SEARCH,
|
||||
ReaderState.IN_GLOBAL,
|
||||
@@ -418,24 +424,39 @@ class DecompParser:
|
||||
# function we have already parsed if state == IN_FUNC_GLOBAL.
|
||||
# However, we are not tolerant of _any_ syntax problems in our
|
||||
# CI actions, so the solution is to just fix the invalid marker.
|
||||
if is_blank_or_comment(line):
|
||||
self._syntax_error(ParserError.NO_SUITABLE_NAME)
|
||||
variable_name = None
|
||||
|
||||
global_markers_queued = any(
|
||||
m.is_variable() for m in self.var_markers.iter()
|
||||
)
|
||||
|
||||
if len(line_strip) == 0:
|
||||
self._syntax_warning(ParserError.UNEXPECTED_BLANK_LINE)
|
||||
return
|
||||
|
||||
# We don't have a foolproof mechanism to tell what is and is not a variable.
|
||||
# If the GLOBAL is being declared on a `return` statement, though, this is
|
||||
# not correct. It is either a string literal (which will be handled differently)
|
||||
# or it is not the variable declaration, which is incorrect decomp syntax.
|
||||
if line.strip().startswith("return"):
|
||||
self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE)
|
||||
return
|
||||
if global_markers_queued:
|
||||
# Not the greatest solution, but a consequence of combining GLOBAL and
|
||||
# STRING markers together. If the marker precedes a return statement, it is
|
||||
# valid for a STRING marker to be here, but not a GLOBAL. We need to look
|
||||
# ahead and tell whether this *would* fail.
|
||||
if line_strip.startswith("return"):
|
||||
self._syntax_error(ParserError.GLOBAL_NOT_VARIABLE)
|
||||
return
|
||||
if line_strip.startswith("//"):
|
||||
# If we found a comment, assume implicit lookup-by-name
|
||||
# function and end here. We know this is not a decomp marker
|
||||
# because it would have been handled already.
|
||||
variable_name = get_synthetic_name(line)
|
||||
else:
|
||||
variable_name = get_variable_name(line)
|
||||
# This is out of our control for library variables, but all of our
|
||||
# variables should start with "g_".
|
||||
if variable_name is not None and not variable_name.startswith("g_"):
|
||||
self._syntax_warning(ParserError.GLOBAL_MISSING_PREFIX)
|
||||
|
||||
name = get_variable_name(line)
|
||||
if name is None:
|
||||
self._syntax_error(ParserError.NO_SUITABLE_NAME)
|
||||
return
|
||||
string_name = get_string_contents(line)
|
||||
|
||||
self._variable_done(name)
|
||||
self._variable_done(variable_name, string_name)
|
||||
|
||||
elif self.state == ReaderState.IN_VTABLE:
|
||||
vtable_class = get_class_name(line)
|
||||
|
@@ -1,6 +1,7 @@
|
||||
# C++ Parser utility functions and data structures
|
||||
import re
|
||||
from typing import Optional
|
||||
from ast import literal_eval
|
||||
|
||||
# The goal here is to just read whatever is on the next line, so some
|
||||
# flexibility in the formatting seems OK
|
||||
@@ -12,6 +13,10 @@ templateCommentRegex = re.compile(r"\s*//\s+(.*)")
|
||||
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
||||
|
||||
|
||||
# Get string contents, ignore escape characters that might interfere
|
||||
doubleQuoteRegex = re.compile(r"(\"(?:[^\"\\]|\\.)*\")")
|
||||
|
||||
|
||||
def get_synthetic_name(line: str) -> Optional[str]:
|
||||
"""Synthetic names appear on a single line comment on the line after the marker.
|
||||
If that's not what we have, return None"""
|
||||
@@ -86,3 +91,20 @@ def get_variable_name(line: str) -> Optional[str]:
|
||||
return match.group("name")
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_string_contents(line: str) -> Optional[str]:
|
||||
"""Return the first C string seen on this line.
|
||||
We have to unescape the string, and a simple way to do that is to use
|
||||
python's ast.literal_eval. I'm sure there are many pitfalls to doing
|
||||
it this way, but hopefully the regex will ensure reasonably sane input."""
|
||||
|
||||
try:
|
||||
if (match := doubleQuoteRegex.search(line)) is not None:
|
||||
return literal_eval(match.group(1))
|
||||
# pylint: disable=broad-exception-caught
|
||||
# No way to predict what kind of exception could occur.
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
@@ -14,6 +14,7 @@ string_demangle_cases = [
|
||||
14,
|
||||
True,
|
||||
),
|
||||
("??_C@_00A@?$AA@", 0, False),
|
||||
]
|
||||
|
||||
|
||||
|
@@ -112,3 +112,33 @@ def test_duplicate_offsets(linter):
|
||||
# Full reset will forget seen offsets.
|
||||
linter.reset(True)
|
||||
assert linter.check_lines(lines, "test.h", "TEST") is True
|
||||
|
||||
|
||||
def test_duplicate_strings(linter):
|
||||
"""Duplicate string markers are okay if the string value is the same."""
|
||||
string_lines = [
|
||||
"// STRING: TEST 0x1000",
|
||||
'return "hello world";',
|
||||
]
|
||||
|
||||
# No problem to use this marker twice.
|
||||
assert linter.check_lines(string_lines, "test.h", "TEST") is True
|
||||
assert linter.check_lines(string_lines, "test.h", "TEST") is True
|
||||
|
||||
different_string = [
|
||||
"// STRING: TEST 0x1000",
|
||||
'return "hi there";',
|
||||
]
|
||||
|
||||
# Same address but the string is different
|
||||
assert linter.check_lines(different_string, "greeting.h", "TEST") is False
|
||||
assert len(linter.alerts) == 1
|
||||
assert linter.alerts[0].code == ParserError.WRONG_STRING
|
||||
|
||||
same_addr_reused = [
|
||||
"// GLOBAL:TEXT 0x1000",
|
||||
"int g_test = 123;",
|
||||
]
|
||||
|
||||
# This will fail like any other offset reuse.
|
||||
assert linter.check_lines(same_addr_reused, "other.h", "TEST") is False
|
||||
|
@@ -442,3 +442,82 @@ def test_static_variable(parser):
|
||||
)
|
||||
assert len(parser.variables) == 2
|
||||
assert parser.variables[1].is_static is True
|
||||
|
||||
|
||||
def test_reject_global_return(parser):
|
||||
"""Previously we had annotated strings with the GLOBAL marker.
|
||||
For example: if a function returned a string. We now want these to be
|
||||
annotated with the STRING marker."""
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"// FUNCTION: TEST 0x5555",
|
||||
"void test_function() {",
|
||||
" // GLOBAL: TEST 0x8888",
|
||||
' return "test";',
|
||||
"}",
|
||||
]
|
||||
)
|
||||
assert len(parser.variables) == 0
|
||||
assert len(parser.alerts) == 1
|
||||
assert parser.alerts[0].code == ParserError.GLOBAL_NOT_VARIABLE
|
||||
|
||||
|
||||
def test_global_string(parser):
|
||||
"""We now allow GLOBAL and STRING markers for the same item."""
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"// GLOBAL: TEST 0x1234",
|
||||
"// STRING: TEXT 0x5555",
|
||||
'char* g_test = "hello";',
|
||||
]
|
||||
)
|
||||
assert len(parser.variables) == 1
|
||||
assert len(parser.strings) == 1
|
||||
assert len(parser.alerts) == 0
|
||||
|
||||
assert parser.variables[0].name == "g_test"
|
||||
assert parser.strings[0].name == "hello"
|
||||
|
||||
|
||||
def test_comment_variables(parser):
|
||||
"""Match on hidden variables from libraries."""
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"// GLOBAL: TEST 0x1234",
|
||||
"// g_test",
|
||||
]
|
||||
)
|
||||
assert len(parser.variables) == 1
|
||||
assert parser.variables[0].name == "g_test"
|
||||
|
||||
|
||||
def test_flexible_variable_prefix(parser):
|
||||
"""Don't alert to library variables that lack the g_ prefix.
|
||||
This is out of our control."""
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"// GLOBAL: TEST 0x1234",
|
||||
"// some_other_variable",
|
||||
]
|
||||
)
|
||||
assert len(parser.variables) == 1
|
||||
assert len(parser.alerts) == 0
|
||||
assert parser.variables[0].name == "some_other_variable"
|
||||
|
||||
|
||||
def test_string_ignore_g_prefix(parser):
|
||||
"""String annotations above a regular variable should not alert to
|
||||
the missing g_ prefix. This is only required for GLOBAL markers."""
|
||||
|
||||
parser.read_lines(
|
||||
[
|
||||
"// STRING: TEST 0x1234",
|
||||
'const char* value = "";',
|
||||
]
|
||||
)
|
||||
assert len(parser.strings) == 1
|
||||
assert len(parser.alerts) == 0
|
||||
|
@@ -15,7 +15,7 @@ state_change_marker_cases = [
|
||||
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
|
||||
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
|
||||
(_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None),
|
||||
(_rs.SEARCH, "STRING", _rs.SEARCH, None),
|
||||
(_rs.SEARCH, "STRING", _rs.IN_GLOBAL, None),
|
||||
|
||||
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
|
||||
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
@@ -33,7 +33,7 @@ state_change_marker_cases = [
|
||||
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
|
||||
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
|
||||
(_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION),
|
||||
(_rs.IN_FUNC, "STRING", _rs.IN_FUNC, None),
|
||||
(_rs.IN_FUNC, "STRING", _rs.IN_FUNC_GLOBAL, None),
|
||||
|
||||
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
@@ -60,7 +60,7 @@ state_change_marker_cases = [
|
||||
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_GLOBAL, "STRING", _rs.IN_GLOBAL, None),
|
||||
|
||||
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
|
||||
@@ -69,7 +69,7 @@ state_change_marker_cases = [
|
||||
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_FUNC_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_FUNC_GLOBAL, "STRING", _rs.IN_FUNC_GLOBAL, None),
|
||||
|
||||
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||
|
@@ -10,6 +10,7 @@ from isledecomp.parser.util import (
|
||||
is_blank_or_comment,
|
||||
get_class_name,
|
||||
get_variable_name,
|
||||
get_string_contents,
|
||||
)
|
||||
|
||||
|
||||
@@ -158,3 +159,18 @@ variable_name_cases = [
|
||||
@pytest.mark.parametrize("line,name", variable_name_cases)
|
||||
def test_get_variable_name(line: str, name: str):
|
||||
assert get_variable_name(line) == name
|
||||
|
||||
|
||||
string_match_cases = [
|
||||
('return "hello world";', "hello world"),
|
||||
('"hello\\\\"', "hello\\"),
|
||||
('"hello \\"world\\""', 'hello "world"'),
|
||||
('"hello\\nworld"', "hello\nworld"),
|
||||
# Only match first string if there are multiple options
|
||||
('Method("hello", "world");', "hello"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("line, string", string_match_cases)
|
||||
def test_get_string_contents(line: str, string: str):
|
||||
assert get_string_contents(line) == string
|
||||
|
Reference in New Issue
Block a user