Improve handling of variables for decomp parser (#376)

* Parser refactor:
- Handling LIRBARY and STRING markers
- Extracting global variable name for future comparison
- Marking function static variables
- More fluent error messages

* String constants annotated with STRING

* fix variable name

* Should compare LIBRARY markers
This commit is contained in:
MS
2023-12-27 15:59:42 -05:00
committed by GitHub
parent dfad323880
commit bcb7bec68b
160 changed files with 586 additions and 327 deletions

View File

@@ -115,7 +115,7 @@ def test_different_markers_same_module(parser):
# Use first marker declaration, don't replace
assert len(parser.functions) == 1
assert parser.functions[0].is_stub is False
assert parser.functions[0].should_skip() is False
# Should alert to this
assert len(parser.alerts) == 1
@@ -193,7 +193,7 @@ def test_multiple_vtables(parser):
)
assert len(parser.alerts) == 0
assert len(parser.vtables) == 2
assert parser.vtables[0].class_name == "MxString"
assert parser.vtables[0].name == "MxString"
def test_multiple_vtables_same_module(parser):
@@ -247,7 +247,7 @@ def test_synthetic_no_comment(parser):
)
assert len(parser.functions) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.BAD_SYNTHETIC
assert parser.alerts[0].code == ParserError.BAD_NAMEREF
assert parser.state == ReaderState.SEARCH
@@ -375,3 +375,70 @@ def test_unexpected_eof(parser):
assert len(parser.functions) == 1
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE
def test_global_variable_prefix(parser):
"""Global and static variables should have the g_ prefix."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
'const char* g_msg = "hello";',
]
)
assert len(parser.variables) == 1
assert len(parser.alerts) == 0
parser.read_lines(
[
"// GLOBAL: TEXT 0x5555",
"int test = 5;",
]
)
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.GLOBAL_MISSING_PREFIX
# In spite of that, we should still grab the variable name.
assert parser.variables[1].name == "test"
def test_global_nomatch(parser):
"""We do our best to grab the variable name, even without the g_ prefix
but this (by design) will not match everything."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
"FunctionCall();",
]
)
assert len(parser.variables) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.NO_SUITABLE_NAME
def test_static_variable(parser):
"""We can detect whether a variable is a static function variable
based on the parser's state when we detect it.
Checking for the word `static` alone is not a good test.
Static class variables are filed as S_GDATA32, same as regular globals.
Only function statics are filed as S_LDATA32."""
parser.read_lines(
[
"// GLOBAL: TEST 0x1234",
"int g_test = 1234;",
]
)
assert len(parser.variables) == 1
assert parser.variables[0].is_static is False
parser.read_lines(
[
"// FUNCTION: TEST 0x5555",
"void test_function() {",
"// GLOBAL: TEST 0x8888",
"int g_internal = 0;",
"}",
]
)
assert len(parser.variables) == 2
assert parser.variables[1].is_static is True

View File

@@ -11,9 +11,11 @@ state_change_marker_cases = [
(_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None),
(_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None),
(_rs.SEARCH, "STUB", _rs.WANT_SIG, None),
(_rs.SEARCH, "SYNTHETIC", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "SYNTHETIC", _rs.IN_SYNTHETIC, None),
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
(_rs.SEARCH, "LIBRARY", _rs.IN_LIBRARY, None),
(_rs.SEARCH, "STRING", _rs.SEARCH, None),
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
@@ -21,20 +23,26 @@ state_change_marker_cases = [
(_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_SYNTHETIC, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "LIBRARY", _rs.IN_LIBRARY, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "STRING", _rs.IN_FUNC, None),
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None),
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
@@ -42,6 +50,8 @@ state_change_marker_cases = [
(_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "LIBRARY", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "STRING", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None),
@@ -49,6 +59,8 @@ state_change_marker_cases = [
(_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
@@ -56,6 +68,8 @@ state_change_marker_cases = [
(_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
@@ -63,6 +77,26 @@ state_change_marker_cases = [
(_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None),
(_rs.IN_VTABLE, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "SYNTHETIC", _rs.IN_SYNTHETIC, None),
(_rs.IN_SYNTHETIC, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "LIBRARY", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_SYNTHETIC, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_LIBRARY, "LIBRARY", _rs.IN_LIBRARY, None),
(_rs.IN_LIBRARY, "STRING", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
]
# fmt: on
@@ -105,47 +139,3 @@ def test_state_search_line(line: str):
p.read_line(line)
assert p.state == _rs.SEARCH
assert len(p.alerts) == 0
global_lines = [
("// A comment", _rs.IN_GLOBAL),
("", _rs.IN_GLOBAL),
("\t", _rs.IN_GLOBAL),
(" ", _rs.IN_GLOBAL),
# TODO: no check for "likely" variable declaration so these all count
("void function()", _rs.SEARCH),
("int x = 123;", _rs.SEARCH),
("just some text", _rs.SEARCH),
]
@pytest.mark.parametrize("line, new_state", global_lines)
def test_state_global_line(line: str, new_state: _rs):
p = DecompParser()
p.read_line("// GLOBAL: TEST 0x1234")
assert p.state == _rs.IN_GLOBAL
p.read_line(line)
assert p.state == new_state
# mostly same as above
in_func_global_lines = [
("// A comment", _rs.IN_FUNC_GLOBAL),
("", _rs.IN_FUNC_GLOBAL),
("\t", _rs.IN_FUNC_GLOBAL),
(" ", _rs.IN_FUNC_GLOBAL),
# TODO: no check for "likely" variable declaration so these all count
("void function()", _rs.IN_FUNC),
("int x = 123;", _rs.IN_FUNC),
("just some text", _rs.IN_FUNC),
]
@pytest.mark.parametrize("line, new_state", in_func_global_lines)
def test_state_in_func_global_line(line: str, new_state: _rs):
p = DecompParser()
p.state = _rs.IN_FUNC
p.read_line("// GLOBAL: TEST 0x1234")
assert p.state == _rs.IN_FUNC_GLOBAL
p.read_line(line)
assert p.state == new_state

View File

@@ -1,11 +1,15 @@
import pytest
from isledecomp.parser.parser import MarkerDict
from isledecomp.parser.util import (
from isledecomp.parser.marker import (
DecompMarker,
is_blank_or_comment,
MarkerType,
match_marker,
is_marker_exact,
)
from isledecomp.parser.util import (
is_blank_or_comment,
get_class_name,
get_variable_name,
)
@@ -96,7 +100,7 @@ def test_marker_dict_type_replace():
d.insert(DecompMarker("STUB", "TEST", 0x1234))
markers = list(d.iter())
assert len(markers) == 1
assert markers[0].type == "FUNCTION"
assert markers[0].type == MarkerType.FUNCTION
class_name_match_cases = [
@@ -131,3 +135,26 @@ class_name_no_match_cases = [
@pytest.mark.parametrize("line", class_name_no_match_cases)
def test_get_class_name_none(line: str):
assert get_class_name(line) is None
variable_name_cases = [
# with prefix for easy access
("char* g_test;", "g_test"),
("g_test;", "g_test"),
("void (*g_test)(int);", "g_test"),
("char g_test[50];", "g_test"),
("char g_test[50] = {1234,", "g_test"),
("int g_test = 500;", "g_test"),
# no prefix
("char* hello;", "hello"),
("hello;", "hello"),
("void (*hello)(int);", "hello"),
("char hello[50];", "hello"),
("char hello[50] = {1234,", "hello"),
("int hello = 500;", "hello"),
]
@pytest.mark.parametrize("line,name", variable_name_cases)
def test_get_variable_name(line: str, name: str):
assert get_variable_name(line) == name