(Proposal) Adjustments to "decomp" language (#308)

* Adjustments to "decomp" language

* Fix a comment

* Fix accidental clang-formatting

* Fix order

* Fix order

* Remove junk

* Fix OFFSET

* Adjustments based on new suggestions

* Annotate globals

* Globals in ISLE

* More globals

* Merge from parser2 branch

* Allow prepending space for exact marker match

* To eliminate noise, require the 0x prefix on offset for marker match

* fix test from previous

* Count tab stops for indented functions to reduce MISSED_END_OF_FUNCTION noise

* FUNCTION to SYNTHETIC where needed

* Missed marker conversion on SetAtomId

* pylint cleanup, remove unused code

* Fix unexpected function end, add more unit tests

* Be more strict about synthetic name syntax

* Revert "Missed marker conversion on SetAtomId"

This reverts commit d87d665127.

* Revert "FUNCTION to SYNTHETIC where needed"

This reverts commit 8c815418d2.

* Implicit lookup by name for functions

* Fix VTABLE SYNTHETIC and other decomp markers

* Get vtable class name

* Vtable marker should identify struct

* No colon for SIZE comment

* Update README.md

* Update README.md

* Update CONTRIBUTING.md

* Update README.md

* Update README.md

* Update CONTRIBUTING.md

* Update README.md

* Update CONTRIBUTING.md

* Fix destructor/annotation

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

---------

Co-authored-by: disinvite <disinvite@users.noreply.github.com>
This commit is contained in:
Christian Semmler
2023-12-06 07:10:45 -05:00
committed by GitHub
parent 4f5b70013f
commit 494a556f8e
407 changed files with 3505 additions and 2493 deletions

View File

@@ -3,6 +3,7 @@
// A very simple class
// VTABLE: TEST 0x1001002
class TestClass {
public:
TestClass();
@@ -10,14 +11,14 @@ public:
virtual MxResult Tickle() override; // vtable+08
// OFFSET: TEST 0x12345678
// FUNCTION: TEST 0x12345678
inline const char* ClassName() const // vtable+0c
{
// 0xabcd1234
return "TestClass";
}
// OFFSET: TEST 0xdeadbeef
// FUNCTION: TEST 0xdeadbeef
inline MxBool IsA(const char* name) const override // vtable+10
{
return !strcmp(name, TestClass::ClassName());

View File

@@ -3,19 +3,19 @@
// A very simple well-formed code file
// OFFSET: TEST 0x1234
// FUNCTION: TEST 0x1234
void function01()
{
// TODO
}
// OFFSET: TEST 0x2345
// FUNCTION: TEST 0x2345
void function02()
{
// TODO
}
// OFFSET: TEST 0x3456
// FUNCTION: TEST 0x3456
void function03()
{
// TODO

View File

@@ -0,0 +1,14 @@
// Sample for python unit tests
// Not part of the decomp
// Global variables inside and outside of functions
// GLOBAL: TEST 0x1000
const char *g_message = "test";
// FUNCTION: TEST 0x1234
void function01()
{
// GLOBAL: TEST 0x5555
static int g_hello = 123;
}

View File

@@ -1,8 +1,8 @@
// Sample for python unit tests
// Not part of the decomp
// OFFSET: TEST 0x10000001
// FUNCTION: TEST 0x10000001
inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there
// OFFSET: TEST 0x10000002
// FUNCTION: TEST 0x10000002
inline const char* OneLine() const { return "MxDSObject"; };

View File

@@ -9,7 +9,7 @@ int no_offset_comment()
return -1;
}
// OFFSET: TEST 0xdeadbeef
// FUNCTION: TEST 0xdeadbeef
void regular_ole_function()
{
printf("hi there");

View File

@@ -3,22 +3,22 @@
// Handling multiple offset markers
// OFFSET: TEST 0x1234
// OFFSET: HELLO 0x5555
// FUNCTION: TEST 0x1234
// FUNCTION: HELLO 0x5555
void different_modules()
{
// TODO
}
// OFFSET: TEST 0x2345
// OFFSET: TEST 0x1234
// FUNCTION: TEST 0x2345
// FUNCTION: TEST 0x1234
void same_module()
{
// TODO
}
// OFFSET: TEST 0x2002
// OFFSET: test 0x1001
// FUNCTION: TEST 0x2002
// FUNCTION: test 0x1001
void same_case_insensitive()
{
// TODO

View File

@@ -1,10 +1,10 @@
// Sample for python unit tests
// Not part of the decomp
// OFFSET: TEST 0x1234
// FUNCTION: TEST 0x1234
void short_function() { static char* msg = "oneliner"; }
// OFFSET: TEST 0x5555
// FUNCTION: TEST 0x5555
void function_after_one_liner()
{
// This function comes after the previous that is on a single line.

View File

@@ -1,19 +1,19 @@
// Sample for python unit tests
// Not part of the decomp
// OFFSET: TEST 0x1001
// FUNCTION: TEST 0x1001
void function_order01()
{
// TODO
}
// OFFSET: TEST 0x1003
// FUNCTION: TEST 0x1003
void function_order03()
{
// TODO
}
// OFFSET: TEST 0x1002
// FUNCTION: TEST 0x1002
void function_order02()
{
// TODO

View File

@@ -4,18 +4,18 @@
// While it's reasonable to expect a well-formed file (and clang-format
// will make sure we get one), this will put the parser through its paces.
// OFFSET: TEST 0x1234
// FUNCTION: TEST 0x1234
void curly_with_spaces()
{
static char* msg = "hello";
}
// OFFSET: TEST 0x5555
// FUNCTION: TEST 0x5555
void weird_closing_curly()
{
int x = 123; }
// OFFSET: HELLO 0x5656
// FUNCTION: HELLO 0x5656
void bad_indenting() {
if (0)
{

View File

@@ -1,127 +1,360 @@
import os
from typing import List, TextIO
from isledecomp.parser import find_code_blocks
from isledecomp.parser.util import CodeBlock
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
import pytest
from isledecomp.parser.parser import (
ReaderState,
DecompParser,
)
from isledecomp.parser.error import ParserError
def sample_file(filename: str) -> TextIO:
"""Wrapper for opening the samples from the directory that does not
depend on the cwd where we run the test"""
full_path = os.path.join(SAMPLE_DIR, filename)
return open(full_path, "r", encoding="utf-8")
@pytest.fixture(name="parser")
def fixture_parser():
return DecompParser()
def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
"""Helper to make this more idiomatic"""
just_offsets = [block.offset for block in blocks]
return just_offsets == sorted(just_offsets)
def test_missing_sig(parser):
"""In the hopefully rare scenario that the function signature and marker
are swapped, we still have enough to match witch reccmp"""
parser.read_lines(
[
"void my_function()",
"// FUNCTION: TEST 0x1234",
"{",
"}",
]
)
assert parser.state == ReaderState.SEARCH
assert len(parser.functions) == 1
assert parser.functions[0].line_number == 3
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.MISSED_START_OF_FUNCTION
# Tests are below #
def test_not_exact_syntax(parser):
"""Alert to inexact syntax right here in the parser instead of kicking it downstream.
Doing this means we don't have to save the actual text."""
parser.read_line("// function: test 0x1234")
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.BAD_DECOMP_MARKER
def test_sanity():
"""Read a very basic file"""
with sample_file("basic_file.cpp") as f:
blocks = find_code_blocks(f)
def test_invalid_marker(parser):
"""We matched a decomp marker, but it's not one we care about"""
parser.read_line("// BANANA: TEST 0x1234")
assert parser.state == ReaderState.SEARCH
assert len(blocks) == 3
assert code_blocks_are_sorted(blocks) is True
# n.b. The parser returns line numbers as 1-based
# Function starts when we see the opening curly brace
assert blocks[0].start_line == 8
assert blocks[0].end_line == 10
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.BOGUS_MARKER
def test_oneline():
"""(Assuming clang-format permits this) This sample has a function
on a single line. This will test the end-of-function detection"""
with sample_file("oneline_function.cpp") as f:
blocks = find_code_blocks(f)
assert len(blocks) == 2
assert blocks[0].start_line == 5
assert blocks[0].end_line == 5
def test_incompatible_marker(parser):
"""The marker we just read cannot be handled in the current parser state"""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"// GLOBAL: TEST 0x5000",
]
)
assert parser.state == ReaderState.SEARCH
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
def test_missing_offset():
"""What if the function doesn't have an offset comment?"""
with sample_file("missing_offset.cpp") as f:
blocks = find_code_blocks(f)
# TODO: For now, the function without the offset will just be ignored.
# Would be the same outcome if the comment was present but mangled and
# we failed to match it. We should detect these cases in the future.
assert len(blocks) == 1
def test_variable(parser):
"""Should identify a global variable"""
parser.read_lines(
[
"// GLOBAL: HELLO 0x1234",
"int g_value = 5;",
]
)
assert len(parser.variables) == 1
def test_jumbled_case():
"""The parser just reports what it sees. It is the responsibility of
the downstream tools to do something about a jumbled file.
Just verify that we are reading it correctly."""
with sample_file("out_of_order.cpp") as f:
blocks = find_code_blocks(f)
assert len(blocks) == 3
assert code_blocks_are_sorted(blocks) is False
def test_synthetic_plus_marker(parser):
"""Marker tracking preempts synthetic name detection.
Should fail with error and not log the synthetic"""
parser.read_lines(
[
"// SYNTHETIC: HEY 0x555",
"// FUNCTION: HOWDY 0x1234",
]
)
assert len(parser.functions) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
def test_bad_file():
with sample_file("poorly_formatted.cpp") as f:
blocks = find_code_blocks(f)
def test_different_markers_different_module(parser):
"""Does it make any sense for a function to be a stub in one module,
but not in another? I don't know. But it's no problem for us."""
parser.read_lines(
[
"// FUNCTION: HOWDY 0x1234",
"// STUB: SUP 0x5555",
"void interesting_function() {",
"}",
]
)
assert len(blocks) == 3
assert len(parser.alerts) == 0
assert len(parser.functions) == 2
def test_indented():
"""Offsets for functions inside of a class will probably be indented."""
with sample_file("basic_class.cpp") as f:
blocks = find_code_blocks(f)
def test_different_markers_same_module(parser):
"""Now, if something is a regular function but then a stub,
what do we say about that?"""
parser.read_lines(
[
"// FUNCTION: HOWDY 0x1234",
"// STUB: HOWDY 0x5555",
"void interesting_function() {",
"}",
]
)
# TODO: We don't properly detect the end of these functions
# because the closing brace is indented. However... knowing where each
# function ends is less important (for now) than capturing
# all the functions that are there.
# Use first marker declaration, don't replace
assert len(parser.functions) == 1
assert parser.functions[0].is_stub is False
assert len(blocks) == 2
assert blocks[0].offset == int("0x12345678", 16)
assert blocks[0].start_line == 15
# assert blocks[0].end_line == 18
assert blocks[1].offset == int("0xdeadbeef", 16)
assert blocks[1].start_line == 22
# assert blocks[1].end_line == 24
# Should alert to this
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
def test_inline():
with sample_file("inline.cpp") as f:
blocks = find_code_blocks(f)
def test_unexpected_synthetic(parser):
"""FUNCTION then SYNTHETIC should fail to report either one"""
parser.read_lines(
[
"// FUNCTION: HOWDY 0x1234",
"// SYNTHETIC: HOWDY 0x5555",
"void interesting_function() {",
"}",
]
)
assert len(blocks) == 2
for block in blocks:
assert block.start_line is not None
assert block.start_line == block.end_line
assert parser.state == ReaderState.SEARCH
assert len(parser.functions) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
def test_multiple_offsets():
"""If multiple offset marks appear before for a code block, take them
all but ensure module name (case-insensitive) is distinct.
Use first module occurrence in case of duplicates."""
with sample_file("multiple_offsets.cpp") as f:
blocks = find_code_blocks(f)
@pytest.mark.skip(reason="not implemented yet")
def test_duplicate_offset(parser):
"""Repeating the same module/offset in the same file is probably a typo"""
parser.read_lines(
[
"// GLOBAL: HELLO 0x1234",
"int x = 1;",
"// GLOBAL: HELLO 0x1234",
"int y = 2;",
]
)
assert len(blocks) == 4
assert blocks[0].module == "TEST"
assert blocks[0].start_line == 9
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.DUPLICATE_OFFSET
assert blocks[1].module == "HELLO"
assert blocks[1].start_line == 9
# Duplicate modules are ignored
assert blocks[2].start_line == 16
assert blocks[2].offset == 0x2345
def test_multiple_variables(parser):
"""Theoretically the same global variable can appear in multiple modules"""
parser.read_lines(
[
"// GLOBAL: HELLO 0x1234",
"// GLOBAL: WUZZUP 0x555",
"const char *g_greeting;",
]
)
assert len(parser.alerts) == 0
assert len(parser.variables) == 2
assert blocks[3].module == "TEST"
assert blocks[3].offset == 0x2002
def test_multiple_variables_same_module(parser):
"""Should not overwrite offset"""
parser.read_lines(
[
"// GLOBAL: HELLO 0x1234",
"// GLOBAL: HELLO 0x555",
"const char *g_greeting;",
]
)
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
assert len(parser.variables) == 1
assert parser.variables[0].offset == 0x1234
def test_multiple_vtables(parser):
parser.read_lines(
[
"// VTABLE: HELLO 0x1234",
"// VTABLE: TEST 0x5432",
"class MxString : public MxCore {",
]
)
assert len(parser.alerts) == 0
assert len(parser.vtables) == 2
assert parser.vtables[0].class_name == "MxString"
def test_multiple_vtables_same_module(parser):
"""Should not overwrite offset"""
parser.read_lines(
[
"// VTABLE: HELLO 0x1234",
"// VTABLE: HELLO 0x5432",
"class MxString : public MxCore {",
]
)
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
assert len(parser.vtables) == 1
assert parser.vtables[0].offset == 0x1234
def test_synthetic(parser):
parser.read_lines(
[
"// SYNTHETIC: TEST 0x1234",
"// TestClass::TestMethod",
]
)
assert len(parser.functions) == 1
assert parser.functions[0].lookup_by_name is True
assert parser.functions[0].name == "TestClass::TestMethod"
def test_synthetic_same_module(parser):
parser.read_lines(
[
"// SYNTHETIC: TEST 0x1234",
"// SYNTHETIC: TEST 0x555",
"// TestClass::TestMethod",
]
)
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
assert len(parser.functions) == 1
assert parser.functions[0].offset == 0x1234
def test_synthetic_no_comment(parser):
"""Synthetic marker followed by a code line (i.e. non-comment)"""
parser.read_lines(
[
"// SYNTHETIC: TEST 0x1234",
"int x = 123;",
]
)
assert len(parser.functions) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.BAD_SYNTHETIC
assert parser.state == ReaderState.SEARCH
def test_single_line_function(parser):
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"int hello() { return 1234; }",
]
)
assert len(parser.functions) == 1
assert parser.functions[0].line_number == 2
assert parser.functions[0].end_line == 2
def test_indented_function(parser):
"""Track the number of whitespace characters when we begin the function
and check that against each closing curly brace we read.
Should not report a syntax warning if the function is indented"""
parser.read_lines(
[
" // FUNCTION: TEST 0x1234",
" void indented()",
" {",
" // TODO",
" }",
" // FUNCTION: NEXT 0x555",
]
)
assert len(parser.alerts) == 0
@pytest.mark.xfail(reason="todo")
def test_indented_no_curly_hint(parser):
"""Same as above, but opening curly brace is on the same line.
Without the hint of how many whitespace characters to check, can we
still identify the end of the function?"""
parser.read_lines(
[
" // FUNCTION: TEST 0x1234",
" void indented() {",
" }",
" // FUNCTION: NEXT 0x555",
]
)
assert len(parser.alerts) == 0
def test_implicit_lookup_by_name(parser):
"""FUNCTION (or STUB) offsets must directly precede the function signature.
If we detect a comment instead, we assume that this is a lookup-by-name
function and end here."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"// TestClass::TestMethod()",
]
)
assert parser.state == ReaderState.SEARCH
assert len(parser.functions) == 1
assert parser.functions[0].lookup_by_name is True
assert parser.functions[0].name == "TestClass::TestMethod()"
def test_function_with_spaces(parser):
"""There should not be any spaces between the end of FUNCTION markers
and the start or name of the function. If it's a blank line, we can safely
ignore but should alert to this."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
" ",
"inline void test_function() { };",
]
)
assert len(parser.functions) == 1
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE
def test_function_with_spaces_implicit(parser):
"""Same as above, but for implicit lookup-by-name"""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
" ",
"// Implicit::Method",
]
)
assert len(parser.functions) == 1
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_BLANK_LINE
@pytest.mark.xfail(reason="will assume implicit lookup-by-name function")
def test_function_is_commented(parser):
"""In an ideal world, we would recognize that there is no code here.
Some editors (or users) might comment the function on each line like this
but hopefully it is rare."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"// int my_function()",
"// {",
"// return 5;",
"// }",
]
)
assert len(parser.functions) == 0

View File

@@ -0,0 +1,141 @@
import os
from typing import List, TextIO
import pytest
from isledecomp.parser import DecompParser
from isledecomp.parser.node import ParserSymbol
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
def sample_file(filename: str) -> TextIO:
"""Wrapper for opening the samples from the directory that does not
depend on the cwd where we run the test"""
full_path = os.path.join(SAMPLE_DIR, filename)
return open(full_path, "r", encoding="utf-8")
def code_blocks_are_sorted(blocks: List[ParserSymbol]) -> bool:
"""Helper to make this more idiomatic"""
just_offsets = [block.offset for block in blocks]
return just_offsets == sorted(just_offsets)
@pytest.fixture(name="parser")
def fixture_parser():
return DecompParser()
# Tests are below #
def test_sanity(parser):
"""Read a very basic file"""
with sample_file("basic_file.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 3
assert code_blocks_are_sorted(parser.functions) is True
# n.b. The parser returns line numbers as 1-based
# Function starts when we see the opening curly brace
assert parser.functions[0].line_number == 8
assert parser.functions[0].end_line == 10
def test_oneline(parser):
"""(Assuming clang-format permits this) This sample has a function
on a single line. This will test the end-of-function detection"""
with sample_file("oneline_function.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 2
assert parser.functions[0].line_number == 5
assert parser.functions[0].end_line == 5
def test_missing_offset(parser):
"""What if the function doesn't have an offset comment?"""
with sample_file("missing_offset.cpp") as f:
parser.read_lines(f)
# TODO: For now, the function without the offset will just be ignored.
# Would be the same outcome if the comment was present but mangled and
# we failed to match it. We should detect these cases in the future.
assert len(parser.functions) == 1
def test_jumbled_case(parser):
"""The parser just reports what it sees. It is the responsibility of
the downstream tools to do something about a jumbled file.
Just verify that we are reading it correctly."""
with sample_file("out_of_order.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 3
assert code_blocks_are_sorted(parser.functions) is False
def test_bad_file(parser):
with sample_file("poorly_formatted.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 3
def test_indented(parser):
"""Offsets for functions inside of a class will probably be indented."""
with sample_file("basic_class.cpp") as f:
parser.read_lines(f)
# TODO: We don't properly detect the end of these functions
# because the closing brace is indented. However... knowing where each
# function ends is less important (for now) than capturing
# all the functions that are there.
assert len(parser.functions) == 2
assert parser.functions[0].offset == int("0x12345678", 16)
assert parser.functions[0].line_number == 16
# assert parser.functions[0].end_line == 19
assert parser.functions[1].offset == int("0xdeadbeef", 16)
assert parser.functions[1].line_number == 23
# assert parser.functions[1].end_line == 25
def test_inline(parser):
with sample_file("inline.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 2
for fun in parser.functions:
assert fun.line_number is not None
assert fun.line_number == fun.end_line
def test_multiple_offsets(parser):
"""If multiple offset marks appear before for a code block, take them
all but ensure module name (case-insensitive) is distinct.
Use first module occurrence in case of duplicates."""
with sample_file("multiple_offsets.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 4
assert parser.functions[0].module == "TEST"
assert parser.functions[0].line_number == 9
assert parser.functions[1].module == "HELLO"
assert parser.functions[1].line_number == 9
# Duplicate modules are ignored
assert parser.functions[2].line_number == 16
assert parser.functions[2].offset == 0x2345
assert parser.functions[3].module == "TEST"
assert parser.functions[3].offset == 0x2002
def test_variables(parser):
with sample_file("global_variables.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 1
assert len(parser.variables) == 2

View File

@@ -0,0 +1,150 @@
import pytest
from isledecomp.parser.parser import (
ReaderState as _rs,
DecompParser,
)
from isledecomp.parser.error import ParserError as _pe
# fmt: off
state_change_marker_cases = [
(_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None),
(_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None),
(_rs.SEARCH, "STUB", _rs.WANT_SIG, None),
(_rs.SEARCH, "SYNTHETIC", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "STUB", _rs.WANT_SIG, None),
(_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None),
(_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "STUB", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None),
(_rs.IN_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_FUNC_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None),
]
# fmt: on
@pytest.mark.parametrize(
"state, marker_type, new_state, expected_error", state_change_marker_cases
)
def test_state_change_by_marker(
state: _rs, marker_type: str, new_state: _rs, expected_error: None | _pe
):
p = DecompParser()
p.state = state
mock_line = f"// {marker_type}: TEST 0x1234"
p.read_line(mock_line)
assert p.state == new_state
if expected_error is not None:
assert len(p.alerts) > 0
assert p.alerts[0].code == expected_error
# Reading any of these lines should have no effect in ReaderState.SEARCH
search_lines_no_effect = [
"",
"\t",
" ",
"int x = 0;",
"// Comment",
"/*",
"*/",
"/* Block comment */",
"{",
"}",
]
@pytest.mark.parametrize("line", search_lines_no_effect)
def test_state_search_line(line: str):
p = DecompParser()
p.read_line(line)
assert p.state == _rs.SEARCH
assert len(p.alerts) == 0
global_lines = [
("// A comment", _rs.IN_GLOBAL),
("", _rs.IN_GLOBAL),
("\t", _rs.IN_GLOBAL),
(" ", _rs.IN_GLOBAL),
# TODO: no check for "likely" variable declaration so these all count
("void function()", _rs.SEARCH),
("int x = 123;", _rs.SEARCH),
("just some text", _rs.SEARCH),
]
@pytest.mark.parametrize("line, new_state", global_lines)
def test_state_global_line(line: str, new_state: _rs):
p = DecompParser()
p.read_line("// GLOBAL: TEST 0x1234")
assert p.state == _rs.IN_GLOBAL
p.read_line(line)
assert p.state == new_state
# mostly same as above
in_func_global_lines = [
("// A comment", _rs.IN_FUNC_GLOBAL),
("", _rs.IN_FUNC_GLOBAL),
("\t", _rs.IN_FUNC_GLOBAL),
(" ", _rs.IN_FUNC_GLOBAL),
# TODO: no check for "likely" variable declaration so these all count
("void function()", _rs.IN_FUNC),
("int x = 123;", _rs.IN_FUNC),
("just some text", _rs.IN_FUNC),
]
@pytest.mark.parametrize("line, new_state", in_func_global_lines)
def test_state_in_func_global_line(line: str, new_state: _rs):
p = DecompParser()
p.state = _rs.IN_FUNC
p.read_line("// GLOBAL: TEST 0x1234")
assert p.state == _rs.IN_FUNC_GLOBAL
p.read_line(line)
assert p.state == new_state

View File

@@ -1,11 +1,11 @@
from collections import namedtuple
from typing import List
import pytest
from isledecomp.parser.parser import MarkerDict
from isledecomp.parser.util import (
DecompMarker,
is_blank_or_comment,
match_offset_comment,
is_exact_offset_comment,
distinct_by_module,
match_marker,
is_marker_exact,
get_class_name,
)
@@ -28,76 +28,106 @@ def test_is_blank_or_comment(line: str, expected: bool):
assert is_blank_or_comment(line) is expected
offset_comment_samples = [
marker_samples = [
# (can_parse: bool, exact_match: bool, line: str)
# Should match both expected modules with optional STUB marker
(True, True, "// OFFSET: LEGO1 0xdeadbeef"),
(True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"),
(True, True, "// OFFSET: ISLE 0x12345678"),
(True, True, "// OFFSET: ISLE 0x12345678 STUB"),
(True, True, "// FUNCTION: LEGO1 0xdeadbeef"),
(True, True, "// FUNCTION: ISLE 0x12345678"),
# No trailing spaces allowed
(True, False, "// OFFSET: LEGO1 0xdeadbeef "),
(True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "),
(True, False, "// FUNCTION: LEGO1 0xdeadbeef "),
# Must have exactly one space between elements
(True, False, "//OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET:ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef STUB"),
# Must have 0x prefix for hex number
(True, False, "// OFFSET: ISLE deadbeef"),
(True, False, "//FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// FUNCTION:ISLE 0xdeadbeef"),
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
# Must have 0x prefix for hex number to match at all
(False, False, "// FUNCTION: ISLE deadbeef"),
# Offset, module name, and STUB must be uppercase
(True, False, "// offset: ISLE 0xdeadbeef"),
(True, False, "// offset: isle 0xdeadbeef"),
(True, False, "// OFFSET: LEGO1 0xdeadbeef stub"),
(True, False, "// function: ISLE 0xdeadbeef"),
(True, False, "// function: isle 0xdeadbeef"),
# Hex string must be lowercase
(True, False, "// OFFSET: ISLE 0xDEADBEEF"),
(True, False, "// FUNCTION: ISLE 0xDEADBEEF"),
# TODO: How flexible should we be with matching the module name?
(True, True, "// OFFSET: OMNI 0x12345678"),
(True, True, "// OFFSET: LEG01 0x12345678"),
(True, False, "// OFFSET: hello 0x12345678"),
(True, True, "// FUNCTION: OMNI 0x12345678"),
(True, True, "// FUNCTION: LEG01 0x12345678"),
(True, False, "// FUNCTION: hello 0x12345678"),
# Not close enough to match
(False, False, "// OFFSET: ISLE0x12345678"),
(False, False, "// OFFSET: 0x12345678"),
(False, False, "// FUNCTION: ISLE0x12345678"),
(False, False, "// FUNCTION: 0x12345678"),
(False, False, "// LEGO1: 0x12345678"),
# Hex string shorter than 8 characters
(True, True, "// OFFSET: LEGO1 0x1234"),
(True, True, "// FUNCTION: LEGO1 0x1234"),
# TODO: These match but shouldn't.
# (False, False, '// OFFSET: LEGO1 0'),
# (False, False, '// OFFSET: LEGO1 0x'),
# (False, False, '// FUNCTION: LEGO1 0'),
# (False, False, '// FUNCTION: LEGO1 0x'),
]
@pytest.mark.parametrize("match, _, line", offset_comment_samples)
def test_offset_match(line: str, match: bool, _):
did_match = match_offset_comment(line) is not None
@pytest.mark.parametrize("match, _, line", marker_samples)
def test_marker_match(line: str, match: bool, _):
did_match = match_marker(line) is not None
assert did_match is match
@pytest.mark.parametrize("_, exact, line", offset_comment_samples)
def test_exact_offset_comment(line: str, exact: bool, _):
assert is_exact_offset_comment(line) is exact
@pytest.mark.parametrize("_, exact, line", marker_samples)
def test_marker_exact(line: str, exact: bool, _):
assert is_marker_exact(line) is exact
# Helper for the next test: cut down version of OffsetMatch
MiniOfs = namedtuple("MiniOfs", ["module", "value"])
def test_marker_dict_simple():
d = MarkerDict()
d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
markers = list(d.iter())
assert len(markers) == 1
distinct_by_module_samples = [
# empty set
([], []),
# same module name
([MiniOfs("TEST", 123), MiniOfs("TEST", 555)], [MiniOfs("TEST", 123)]),
# same module name, case-insensitive
([MiniOfs("test", 123), MiniOfs("TEST", 555)], [MiniOfs("test", 123)]),
# duplicates, non-consecutive
(
[MiniOfs("test", 123), MiniOfs("abc", 111), MiniOfs("TEST", 555)],
[MiniOfs("test", 123), MiniOfs("abc", 111)],
),
def test_marker_dict_ofs_replace():
d = MarkerDict()
d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
d.insert(DecompMarker("FUNCTION", "TEST", 0x555))
markers = list(d.iter())
assert len(markers) == 1
assert markers[0].offset == 0x1234
def test_marker_dict_type_replace():
d = MarkerDict()
d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
d.insert(DecompMarker("STUB", "TEST", 0x1234))
markers = list(d.iter())
assert len(markers) == 1
assert markers[0].type == "FUNCTION"
class_name_match_cases = [
("struct MxString {", "MxString"),
("class MxString {", "MxString"),
("// class MxString", "MxString"),
("class MxString : public MxCore {", "MxString"),
("class MxPtrList<MxPresenter>", "MxPtrList<MxPresenter>"),
# If it is possible to match the symbol MxList<LegoPathController *>::`vftable'
# we should get the correct class name if possible. If the template type is a pointer,
# the asterisk and class name are separated by one space.
("// class MxList<LegoPathController *>", "MxList<LegoPathController *>"),
("// class MxList<LegoPathController*>", "MxList<LegoPathController *>"),
("// class MxList<LegoPathController* >", "MxList<LegoPathController *>"),
# I don't know if this would ever come up, but sure, why not?
("// class MxList<LegoPathController**>", "MxList<LegoPathController **>"),
]
@pytest.mark.parametrize("sample, expected", distinct_by_module_samples)
def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]):
assert distinct_by_module(sample) == expected
@pytest.mark.parametrize("line, class_name", class_name_match_cases)
def test_get_class_name(line: str, class_name: str):
assert get_class_name(line) == class_name
class_name_no_match_cases = [
"MxString { ",
"clas MxString",
"// MxPtrList<MxPresenter>::`scalar deleting destructor'",
]
@pytest.mark.parametrize("line", class_name_no_match_cases)
def test_get_class_name_none(line: str):
assert get_class_name(line) is None