Merge from parser2 branch

This commit is contained in:
disinvite
2023-12-01 15:10:32 -05:00
parent 47a6ea2de7
commit 75802101ac
20 changed files with 923 additions and 352 deletions

View File

@@ -3,6 +3,7 @@
// A very simple class
// VTABLE: TEST 0x1001002
class TestClass {
public:
TestClass();
@@ -10,14 +11,14 @@ public:
virtual MxResult Tickle() override; // vtable+08
// OFFSET: TEST 0x12345678
// FUNCTION: TEST 0x12345678
inline const char* ClassName() const // vtable+0c
{
// 0xabcd1234
return "TestClass";
}
// OFFSET: TEST 0xdeadbeef
// FUNCTION: TEST 0xdeadbeef
inline MxBool IsA(const char* name) const override // vtable+10
{
return !strcmp(name, TestClass::ClassName());

View File

@@ -3,19 +3,19 @@
// A very simple well-formed code file
// OFFSET: TEST 0x1234
// FUNCTION: TEST 0x1234
void function01()
{
// TODO
}
// OFFSET: TEST 0x2345
// FUNCTION: TEST 0x2345
void function02()
{
// TODO
}
// OFFSET: TEST 0x3456
// FUNCTION: TEST 0x3456
void function03()
{
// TODO

View File

@@ -0,0 +1,14 @@
// Sample for python unit tests
// Not part of the decomp
// Global variables inside and outside of functions
// GLOBAL: TEST 0x1000
const char *g_message = "test";
// FUNCTION: TEST 0x1234
void function01()
{
// GLOBAL: TEST 0x5555
static int g_hello = 123;
}

View File

@@ -1,8 +1,8 @@
// Sample for python unit tests
// Not part of the decomp
// OFFSET: TEST 0x10000001
// FUNCTION: TEST 0x10000001
inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there
// OFFSET: TEST 0x10000002
// FUNCTION: TEST 0x10000002
inline const char* OneLine() const { return "MxDSObject"; };

View File

@@ -9,7 +9,7 @@ int no_offset_comment()
return -1;
}
// OFFSET: TEST 0xdeadbeef
// FUNCTION: TEST 0xdeadbeef
void regular_ole_function()
{
printf("hi there");

View File

@@ -3,22 +3,22 @@
// Handling multiple offset markers
// OFFSET: TEST 0x1234
// OFFSET: HELLO 0x5555
// FUNCTION: TEST 0x1234
// FUNCTION: HELLO 0x5555
void different_modules()
{
// TODO
}
// OFFSET: TEST 0x2345
// OFFSET: TEST 0x1234
// FUNCTION: TEST 0x2345
// FUNCTION: TEST 0x1234
void same_module()
{
// TODO
}
// OFFSET: TEST 0x2002
// OFFSET: test 0x1001
// FUNCTION: TEST 0x2002
// FUNCTION: test 0x1001
void same_case_insensitive()
{
// TODO

View File

@@ -1,10 +1,10 @@
// Sample for python unit tests
// Not part of the decomp
// OFFSET: TEST 0x1234
// FUNCTION: TEST 0x1234
void short_function() { static char* msg = "oneliner"; }
// OFFSET: TEST 0x5555
// FUNCTION: TEST 0x5555
void function_after_one_liner()
{
// This function comes after the previous that is on a single line.

View File

@@ -1,19 +1,19 @@
// Sample for python unit tests
// Not part of the decomp
// OFFSET: TEST 0x1001
// FUNCTION: TEST 0x1001
void function_order01()
{
// TODO
}
// OFFSET: TEST 0x1003
// FUNCTION: TEST 0x1003
void function_order03()
{
// TODO
}
// OFFSET: TEST 0x1002
// FUNCTION: TEST 0x1002
void function_order02()
{
// TODO

View File

@@ -4,18 +4,18 @@
// While it's reasonable to expect a well-formed file (and clang-format
// will make sure we get one), this will put the parser through its paces.
// OFFSET: TEST 0x1234
// FUNCTION: TEST 0x1234
void curly_with_spaces()
{
static char* msg = "hello";
}
// OFFSET: TEST 0x5555
// FUNCTION: TEST 0x5555
void weird_closing_curly()
{
int x = 123; }
// OFFSET: HELLO 0x5656
// FUNCTION: HELLO 0x5656
void bad_indenting() {
if (0)
{

View File

@@ -1,127 +1,170 @@
import os
from typing import List, TextIO
from isledecomp.parser import find_code_blocks
from isledecomp.parser.util import CodeBlock
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
import pytest
from isledecomp.parser.parser import (
ReaderState,
DecompParser,
)
from isledecomp.parser.util import DecompMarker
from isledecomp.parser.error import ParserError
def sample_file(filename: str) -> TextIO:
"""Wrapper for opening the samples from the directory that does not
depend on the cwd where we run the test"""
full_path = os.path.join(SAMPLE_DIR, filename)
return open(full_path, "r", encoding="utf-8")
@pytest.fixture
def parser():
return DecompParser()
def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
"""Helper to make this more idiomatic"""
just_offsets = [block.offset for block in blocks]
return just_offsets == sorted(just_offsets)
@pytest.mark.skip(reason="todo")
def test_missing_sig(parser):
"""Bad syntax: function signature is missing"""
parser.read_lines(["// FUNCTION: TEST 0x1234", "{"])
assert parser.state == ReaderState.IN_FUNC
assert len(parser.alerts) == 1
parser.read_line("}")
assert len(parser.functions) == 1
assert parser.functions[0] != "{"
# Tests are below #
def test_not_exact_syntax(parser):
"""Alert to inexact syntax right here in the parser instead of kicking it downstream.
Doing this means we don't have to save the actual text."""
parser.read_line("// function: test 1234")
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.BAD_DECOMP_MARKER
def test_sanity():
"""Read a very basic file"""
with sample_file("basic_file.cpp") as f:
blocks = find_code_blocks(f)
def test_invalid_marker(parser):
"""We matched a decomp marker, but it's not one we care about"""
parser.read_line("// BANANA: TEST 0x1234")
assert parser.state == ReaderState.SEARCH
assert len(blocks) == 3
assert code_blocks_are_sorted(blocks) is True
# n.b. The parser returns line numbers as 1-based
# Function starts when we see the opening curly brace
assert blocks[0].start_line == 8
assert blocks[0].end_line == 10
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.BOGUS_MARKER
def test_oneline():
"""(Assuming clang-format permits this) This sample has a function
on a single line. This will test the end-of-function detection"""
with sample_file("oneline_function.cpp") as f:
blocks = find_code_blocks(f)
assert len(blocks) == 2
assert blocks[0].start_line == 5
assert blocks[0].end_line == 5
def test_unexpected_marker(parser):
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"// GLOBAL: TEST 0x5000",
]
)
assert parser.state == ReaderState.SEARCH
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
def test_missing_offset():
"""What if the function doesn't have an offset comment?"""
with sample_file("missing_offset.cpp") as f:
blocks = find_code_blocks(f)
# TODO: For now, the function without the offset will just be ignored.
# Would be the same outcome if the comment was present but mangled and
# we failed to match it. We should detect these cases in the future.
assert len(blocks) == 1
def test_variable(parser):
parser.read_lines(
[
"// GLOBAL: HELLO 0x1234",
"int g_value = 5;",
]
)
assert len(parser.variables) == 1
def test_jumbled_case():
"""The parser just reports what it sees. It is the responsibility of
the downstream tools to do something about a jumbled file.
Just verify that we are reading it correctly."""
with sample_file("out_of_order.cpp") as f:
blocks = find_code_blocks(f)
assert len(blocks) == 3
assert code_blocks_are_sorted(blocks) is False
def test_synthetic_plus_marker(parser):
"""Should fail with error and not log the synthetic"""
parser.read_lines(
[
"// SYNTHETIC: HEY 0x555",
"// FUNCTION: HOWDY 0x1234",
]
)
assert len(parser.functions) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
def test_bad_file():
with sample_file("poorly_formatted.cpp") as f:
blocks = find_code_blocks(f)
def test_different_markers_different_module(parser):
"""Does it make any sense for a function to be a stub in one module,
but not in another? I don't know. But it's no problem for us."""
parser.read_lines(
[
"// FUNCTION: HOWDY 0x1234",
"// STUB: SUP 0x5555",
"void interesting_function() {",
"}",
]
)
assert len(blocks) == 3
assert len(parser.alerts) == 0
assert len(parser.functions) == 2
def test_indented():
"""Offsets for functions inside of a class will probably be indented."""
with sample_file("basic_class.cpp") as f:
blocks = find_code_blocks(f)
def test_different_markers_same_module(parser):
"""Now, if something is a regular function but then a stub,
what do we say about that?"""
parser.read_lines(
[
"// FUNCTION: HOWDY 0x1234",
"// STUB: HOWDY 0x5555",
"void interesting_function() {",
"}",
]
)
# TODO: We don't properly detect the end of these functions
# because the closing brace is indented. However... knowing where each
# function ends is less important (for now) than capturing
# all the functions that are there.
# Use first marker declaration, don't replace
assert len(parser.functions) == 1
assert parser.functions[0].is_stub is False
assert len(blocks) == 2
assert blocks[0].offset == int("0x12345678", 16)
assert blocks[0].start_line == 15
# assert blocks[0].end_line == 18
assert blocks[1].offset == int("0xdeadbeef", 16)
assert blocks[1].start_line == 22
# assert blocks[1].end_line == 24
# Should alert to this
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
def test_inline():
with sample_file("inline.cpp") as f:
blocks = find_code_blocks(f)
def test_unexpected_synthetic(parser):
"""FUNCTION then SYNTHETIC should fail to report either one"""
parser.read_lines(
[
"// FUNCTION: HOWDY 0x1234",
"// SYNTHETIC: HOWDY 0x5555",
"void interesting_function() {",
"}",
]
)
assert len(blocks) == 2
for block in blocks:
assert block.start_line is not None
assert block.start_line == block.end_line
assert parser.state == ReaderState.SEARCH
assert len(parser.functions) == 0
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
def test_multiple_offsets():
"""If multiple offset marks appear before for a code block, take them
all but ensure module name (case-insensitive) is distinct.
Use first module occurrence in case of duplicates."""
with sample_file("multiple_offsets.cpp") as f:
blocks = find_code_blocks(f)
@pytest.mark.skip(reason="not implemented yet")
def test_duplicate_offset(parser):
"""Repeating the same module/offset in the same file is probably a typo"""
parser.read_lines(
[
"// GLOBAL: HELLO 0x1234",
"int x = 1;",
"// GLOBAL: HELLO 0x1234",
"int y = 2;",
]
)
assert len(blocks) == 4
assert blocks[0].module == "TEST"
assert blocks[0].start_line == 9
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.DUPLICATE_OFFSET
assert blocks[1].module == "HELLO"
assert blocks[1].start_line == 9
# Duplicate modules are ignored
assert blocks[2].start_line == 16
assert blocks[2].offset == 0x2345
def test_multiple_variables(parser):
"""Theoretically the same global variable can appear in multiple modules"""
parser.read_lines(
[
"// GLOBAL: HELLO 0x1234",
"// GLOBAL: WUZZUP 0x555",
"const char *g_greeting;",
]
)
assert len(parser.alerts) == 0
assert len(parser.variables) == 2
assert blocks[3].module == "TEST"
assert blocks[3].offset == 0x2002
def test_multiple_vtables(parser):
parser.read_lines(
[
"// VTABLE: HELLO 0x1234",
"// VTABLE: TEST 0x5432",
"class MxString : public MxCore {",
]
)
assert len(parser.alerts) == 0
assert len(parser.vtables) == 2

View File

@@ -0,0 +1,141 @@
import os
import pytest
from typing import List, TextIO
from isledecomp.parser import DecompParser
from isledecomp.parser.node import ParserSymbol
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
def sample_file(filename: str) -> TextIO:
"""Wrapper for opening the samples from the directory that does not
depend on the cwd where we run the test"""
full_path = os.path.join(SAMPLE_DIR, filename)
return open(full_path, "r", encoding="utf-8")
def code_blocks_are_sorted(blocks: List[ParserSymbol]) -> bool:
"""Helper to make this more idiomatic"""
just_offsets = [block.offset for block in blocks]
return just_offsets == sorted(just_offsets)
@pytest.fixture
def parser():
return DecompParser()
# Tests are below #
def test_sanity(parser):
"""Read a very basic file"""
with sample_file("basic_file.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 3
assert code_blocks_are_sorted(parser.functions) is True
# n.b. The parser returns line numbers as 1-based
# Function starts when we see the opening curly brace
assert parser.functions[0].line_number == 8
assert parser.functions[0].end_line == 10
def test_oneline(parser):
"""(Assuming clang-format permits this) This sample has a function
on a single line. This will test the end-of-function detection"""
with sample_file("oneline_function.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 2
assert parser.functions[0].line_number == 5
assert parser.functions[0].end_line == 5
def test_missing_offset(parser):
"""What if the function doesn't have an offset comment?"""
with sample_file("missing_offset.cpp") as f:
parser.read_lines(f)
# TODO: For now, the function without the offset will just be ignored.
# Would be the same outcome if the comment was present but mangled and
# we failed to match it. We should detect these cases in the future.
assert len(parser.functions) == 1
def test_jumbled_case(parser):
"""The parser just reports what it sees. It is the responsibility of
the downstream tools to do something about a jumbled file.
Just verify that we are reading it correctly."""
with sample_file("out_of_order.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 3
assert code_blocks_are_sorted(parser.functions) is False
def test_bad_file(parser):
with sample_file("poorly_formatted.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 3
def test_indented(parser):
"""Offsets for functions inside of a class will probably be indented."""
with sample_file("basic_class.cpp") as f:
parser.read_lines(f)
# TODO: We don't properly detect the end of these functions
# because the closing brace is indented. However... knowing where each
# function ends is less important (for now) than capturing
# all the functions that are there.
assert len(parser.functions) == 2
assert parser.functions[0].offset == int("0x12345678", 16)
assert parser.functions[0].line_number == 16
# assert parser.functions[0].end_line == 19
assert parser.functions[1].offset == int("0xdeadbeef", 16)
assert parser.functions[1].line_number == 23
# assert parser.functions[1].end_line == 25
def test_inline(parser):
with sample_file("inline.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 2
for fun in parser.functions:
assert fun.line_number is not None
assert fun.line_number == fun.end_line
def test_multiple_offsets(parser):
"""If multiple offset marks appear before for a code block, take them
all but ensure module name (case-insensitive) is distinct.
Use first module occurrence in case of duplicates."""
with sample_file("multiple_offsets.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 4
assert parser.functions[0].module == "TEST"
assert parser.functions[0].line_number == 9
assert parser.functions[1].module == "HELLO"
assert parser.functions[1].line_number == 9
# Duplicate modules are ignored
assert parser.functions[2].line_number == 16
assert parser.functions[2].offset == 0x2345
assert parser.functions[3].module == "TEST"
assert parser.functions[3].offset == 0x2002
def test_variables(parser):
with sample_file("global_variables.cpp") as f:
parser.read_lines(f)
assert len(parser.functions) == 1
assert len(parser.variables) == 2

View File

@@ -0,0 +1,150 @@
import pytest
from isledecomp.parser.parser import (
ReaderState as _rs,
DecompParser,
)
from isledecomp.parser.util import DecompMarker
from isledecomp.parser.error import ParserError as _pe
# fmt: off
state_change_marker_cases = [
(_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None),
(_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None),
(_rs.SEARCH, "STUB", _rs.WANT_SIG, None),
(_rs.SEARCH, "SYNTHETIC", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "STUB", _rs.WANT_SIG, None),
(_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None),
(_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None),
(_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "STUB", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
(_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None),
(_rs.IN_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
(_rs.IN_FUNC_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
(_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None),
]
# fmt: on
@pytest.mark.parametrize(
"state, marker_type, new_state, expected_error", state_change_marker_cases
)
def test_state_change_by_marker(
state: _rs, marker_type: str, new_state: _rs, expected_error: None | _pe
):
p = DecompParser()
p.state = state
p._handle_marker(DecompMarker(marker_type, "TEST", 0x1234))
assert p.state == new_state
if expected_error is not None:
assert len(p.alerts) > 0
assert p.alerts[0].code == expected_error
# Reading any of these lines should have no effect in ReaderState.SEARCH
search_lines_no_effect = [
"",
"\t",
" ",
"int x = 0;",
"// Comment",
"/*",
"*/",
"/* Block comment */",
"{",
"}",
]
@pytest.mark.parametrize("line", search_lines_no_effect)
def test_state_search_line(line: str):
p = DecompParser()
p.read_line(line)
assert p.state == _rs.SEARCH
assert len(p.alerts) == 0
global_lines = [
("// A comment", _rs.IN_GLOBAL),
("", _rs.IN_GLOBAL),
("\t", _rs.IN_GLOBAL),
(" ", _rs.IN_GLOBAL),
# TODO: no check for "likely" variable declaration so these all count
("void function()", _rs.SEARCH),
("int x = 123;", _rs.SEARCH),
("just some text", _rs.SEARCH),
]
@pytest.mark.parametrize("line, new_state", global_lines)
def test_state_global_line(line: str, new_state: _rs):
p = DecompParser()
p.read_line("// GLOBAL: TEST 0x1234")
assert p.state == _rs.IN_GLOBAL
p.read_line(line)
assert p.state == new_state
# mostly same as above
in_func_global_lines = [
("// A comment", _rs.IN_FUNC_GLOBAL),
("", _rs.IN_FUNC_GLOBAL),
("\t", _rs.IN_FUNC_GLOBAL),
(" ", _rs.IN_FUNC_GLOBAL),
# TODO: no check for "likely" variable declaration so these all count
("void function()", _rs.IN_FUNC),
("int x = 123;", _rs.IN_FUNC),
("just some text", _rs.IN_FUNC),
]
@pytest.mark.parametrize("line, new_state", in_func_global_lines)
def test_state_in_func_global_line(line: str, new_state: _rs):
p = DecompParser()
p.state = _rs.IN_FUNC
p.read_line("// GLOBAL: TEST 0x1234")
assert p.state == _rs.IN_FUNC_GLOBAL
p.read_line(line)
assert p.state == new_state

View File

@@ -1,11 +1,12 @@
from collections import namedtuple
from typing import List
import pytest
from isledecomp.parser.parser import MarkerDict
from isledecomp.parser.util import (
DecompMarker,
is_blank_or_comment,
match_offset_comment,
is_exact_offset_comment,
distinct_by_module,
match_marker,
is_marker_exact,
)
@@ -28,76 +29,72 @@ def test_is_blank_or_comment(line: str, expected: bool):
assert is_blank_or_comment(line) is expected
offset_comment_samples = [
marker_samples = [
# (can_parse: bool, exact_match: bool, line: str)
# Should match both expected modules with optional STUB marker
(True, True, "// OFFSET: LEGO1 0xdeadbeef"),
(True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"),
(True, True, "// OFFSET: ISLE 0x12345678"),
(True, True, "// OFFSET: ISLE 0x12345678 STUB"),
(True, True, "// FUNCTION: LEGO1 0xdeadbeef"),
(True, True, "// FUNCTION: ISLE 0x12345678"),
# No trailing spaces allowed
(True, False, "// OFFSET: LEGO1 0xdeadbeef "),
(True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "),
(True, False, "// FUNCTION: LEGO1 0xdeadbeef "),
# Must have exactly one space between elements
(True, False, "//OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET:ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef STUB"),
(True, False, "//FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// FUNCTION:ISLE 0xdeadbeef"),
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
# Must have 0x prefix for hex number
(True, False, "// OFFSET: ISLE deadbeef"),
(True, False, "// FUNCTION: ISLE deadbeef"),
# Offset, module name, and STUB must be uppercase
(True, False, "// offset: ISLE 0xdeadbeef"),
(True, False, "// offset: isle 0xdeadbeef"),
(True, False, "// OFFSET: LEGO1 0xdeadbeef stub"),
(True, False, "// function: ISLE 0xdeadbeef"),
(True, False, "// function: isle 0xdeadbeef"),
# Hex string must be lowercase
(True, False, "// OFFSET: ISLE 0xDEADBEEF"),
(True, False, "// FUNCTION: ISLE 0xDEADBEEF"),
# TODO: How flexible should we be with matching the module name?
(True, True, "// OFFSET: OMNI 0x12345678"),
(True, True, "// OFFSET: LEG01 0x12345678"),
(True, False, "// OFFSET: hello 0x12345678"),
(True, True, "// FUNCTION: OMNI 0x12345678"),
(True, True, "// FUNCTION: LEG01 0x12345678"),
(True, False, "// FUNCTION: hello 0x12345678"),
# Not close enough to match
(False, False, "// OFFSET: ISLE0x12345678"),
(False, False, "// OFFSET: 0x12345678"),
(False, False, "// FUNCTION: ISLE0x12345678"),
(False, False, "// FUNCTION: 0x12345678"),
(False, False, "// LEGO1: 0x12345678"),
# Hex string shorter than 8 characters
(True, True, "// OFFSET: LEGO1 0x1234"),
(True, True, "// FUNCTION: LEGO1 0x1234"),
# TODO: These match but shouldn't.
# (False, False, '// OFFSET: LEGO1 0'),
# (False, False, '// OFFSET: LEGO1 0x'),
# (False, False, '// FUNCTION: LEGO1 0'),
# (False, False, '// FUNCTION: LEGO1 0x'),
]
@pytest.mark.parametrize("match, _, line", offset_comment_samples)
def test_offset_match(line: str, match: bool, _):
did_match = match_offset_comment(line) is not None
@pytest.mark.parametrize("match, _, line", marker_samples)
def test_marker_match(line: str, match: bool, _):
did_match = match_marker(line) is not None
assert did_match is match
@pytest.mark.parametrize("_, exact, line", offset_comment_samples)
def test_exact_offset_comment(line: str, exact: bool, _):
assert is_exact_offset_comment(line) is exact
@pytest.mark.parametrize("_, exact, line", marker_samples)
def test_marker_exact(line: str, exact: bool, _):
assert is_marker_exact(line) is exact
# Helper for the next test: cut down version of OffsetMatch
MiniOfs = namedtuple("MiniOfs", ["module", "value"])
distinct_by_module_samples = [
# empty set
([], []),
# same module name
([MiniOfs("TEST", 123), MiniOfs("TEST", 555)], [MiniOfs("TEST", 123)]),
# same module name, case-insensitive
([MiniOfs("test", 123), MiniOfs("TEST", 555)], [MiniOfs("test", 123)]),
# duplicates, non-consecutive
(
[MiniOfs("test", 123), MiniOfs("abc", 111), MiniOfs("TEST", 555)],
[MiniOfs("test", 123), MiniOfs("abc", 111)],
),
]
def test_marker_dict_simple():
d = MarkerDict()
d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
markers = list(d.iter())
assert len(markers) == 1
@pytest.mark.parametrize("sample, expected", distinct_by_module_samples)
def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]):
assert distinct_by_module(sample) == expected
def test_marker_dict_ofs_replace():
d = MarkerDict()
d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
d.insert(DecompMarker("FUNCTION", "TEST", 0x555))
markers = list(d.iter())
assert len(markers) == 1
assert markers[0].offset == 0x1234
def test_marker_dict_type_replace():
d = MarkerDict()
d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
d.insert(DecompMarker("STUB", "TEST", 0x1234))
markers = list(d.iter())
assert len(markers) == 1
assert markers[0].type == "FUNCTION"