mirror of
				https://github.com/isledecomp/isle.git
				synced 2025-10-25 17:34:05 +00:00 
			
		
		
		
	Merge from parser2 branch
This commit is contained in:
		| @@ -1,127 +1,170 @@ | ||||
| import os | ||||
| from typing import List, TextIO | ||||
| from isledecomp.parser import find_code_blocks | ||||
| from isledecomp.parser.util import CodeBlock | ||||
| 
 | ||||
| SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples") | ||||
| import pytest | ||||
| from isledecomp.parser.parser import ( | ||||
|     ReaderState, | ||||
|     DecompParser, | ||||
| ) | ||||
| from isledecomp.parser.util import DecompMarker | ||||
| from isledecomp.parser.error import ParserError | ||||
| 
 | ||||
| 
 | ||||
| def sample_file(filename: str) -> TextIO: | ||||
|     """Wrapper for opening the samples from the directory that does not | ||||
|     depend on the cwd where we run the test""" | ||||
|     full_path = os.path.join(SAMPLE_DIR, filename) | ||||
|     return open(full_path, "r", encoding="utf-8") | ||||
| @pytest.fixture | ||||
| def parser(): | ||||
|     return DecompParser() | ||||
| 
 | ||||
| 
 | ||||
| def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool: | ||||
|     """Helper to make this more idiomatic""" | ||||
|     just_offsets = [block.offset for block in blocks] | ||||
|     return just_offsets == sorted(just_offsets) | ||||
| @pytest.mark.skip(reason="todo") | ||||
| def test_missing_sig(parser): | ||||
|     """Bad syntax: function signature is missing""" | ||||
|     parser.read_lines(["// FUNCTION: TEST 0x1234", "{"]) | ||||
|     assert parser.state == ReaderState.IN_FUNC | ||||
|     assert len(parser.alerts) == 1 | ||||
|     parser.read_line("}") | ||||
|     assert len(parser.functions) == 1 | ||||
|     assert parser.functions[0] != "{" | ||||
| 
 | ||||
| 
 | ||||
| # Tests are below # | ||||
| def test_not_exact_syntax(parser): | ||||
|     """Alert to inexact syntax right here in the parser instead of kicking it downstream. | ||||
|     Doing this means we don't have to save the actual text.""" | ||||
|     parser.read_line("// function: test 1234") | ||||
|     assert len(parser.alerts) == 1 | ||||
|     assert parser.alerts[0].code == ParserError.BAD_DECOMP_MARKER | ||||
| 
 | ||||
| 
 | ||||
| def test_sanity(): | ||||
|     """Read a very basic file""" | ||||
|     with sample_file("basic_file.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| def test_invalid_marker(parser): | ||||
|     """We matched a decomp marker, but it's not one we care about""" | ||||
|     parser.read_line("// BANANA: TEST 0x1234") | ||||
|     assert parser.state == ReaderState.SEARCH | ||||
| 
 | ||||
|     assert len(blocks) == 3 | ||||
|     assert code_blocks_are_sorted(blocks) is True | ||||
|     # n.b. The parser returns line numbers as 1-based | ||||
|     # Function starts when we see the opening curly brace | ||||
|     assert blocks[0].start_line == 8 | ||||
|     assert blocks[0].end_line == 10 | ||||
|     assert len(parser.alerts) == 1 | ||||
|     assert parser.alerts[0].code == ParserError.BOGUS_MARKER | ||||
| 
 | ||||
| 
 | ||||
| def test_oneline(): | ||||
|     """(Assuming clang-format permits this) This sample has a function | ||||
|     on a single line. This will test the end-of-function detection""" | ||||
|     with sample_file("oneline_function.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| 
 | ||||
|     assert len(blocks) == 2 | ||||
|     assert blocks[0].start_line == 5 | ||||
|     assert blocks[0].end_line == 5 | ||||
| def test_unexpected_marker(parser): | ||||
|     parser.read_lines( | ||||
|         [ | ||||
|             "// FUNCTION: TEST 0x1234", | ||||
|             "// GLOBAL: TEST 0x5000", | ||||
|         ] | ||||
|     ) | ||||
|     assert parser.state == ReaderState.SEARCH | ||||
|     assert len(parser.alerts) == 1 | ||||
|     assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER | ||||
| 
 | ||||
| 
 | ||||
| def test_missing_offset(): | ||||
|     """What if the function doesn't have an offset comment?""" | ||||
|     with sample_file("missing_offset.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| 
 | ||||
|     # TODO: For now, the function without the offset will just be ignored. | ||||
|     # Would be the same outcome if the comment was present but mangled and | ||||
|     # we failed to match it. We should detect these cases in the future. | ||||
|     assert len(blocks) == 1 | ||||
| def test_variable(parser): | ||||
|     parser.read_lines( | ||||
|         [ | ||||
|             "// GLOBAL: HELLO 0x1234", | ||||
|             "int g_value = 5;", | ||||
|         ] | ||||
|     ) | ||||
|     assert len(parser.variables) == 1 | ||||
| 
 | ||||
| 
 | ||||
| def test_jumbled_case(): | ||||
|     """The parser just reports what it sees. It is the responsibility of | ||||
|     the downstream tools to do something about a jumbled file. | ||||
|     Just verify that we are reading it correctly.""" | ||||
|     with sample_file("out_of_order.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| 
 | ||||
|     assert len(blocks) == 3 | ||||
|     assert code_blocks_are_sorted(blocks) is False | ||||
| def test_synthetic_plus_marker(parser): | ||||
|     """Should fail with error and not log the synthetic""" | ||||
|     parser.read_lines( | ||||
|         [ | ||||
|             "// SYNTHETIC: HEY 0x555", | ||||
|             "// FUNCTION: HOWDY 0x1234", | ||||
|         ] | ||||
|     ) | ||||
|     assert len(parser.functions) == 0 | ||||
|     assert len(parser.alerts) == 1 | ||||
|     assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER | ||||
| 
 | ||||
| 
 | ||||
| def test_bad_file(): | ||||
|     with sample_file("poorly_formatted.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| def test_different_markers_different_module(parser): | ||||
|     """Does it make any sense for a function to be a stub in one module, | ||||
|     but not in another? I don't know. But it's no problem for us.""" | ||||
|     parser.read_lines( | ||||
|         [ | ||||
|             "// FUNCTION: HOWDY 0x1234", | ||||
|             "// STUB: SUP 0x5555", | ||||
|             "void interesting_function() {", | ||||
|             "}", | ||||
|         ] | ||||
|     ) | ||||
| 
 | ||||
|     assert len(blocks) == 3 | ||||
|     assert len(parser.alerts) == 0 | ||||
|     assert len(parser.functions) == 2 | ||||
| 
 | ||||
| 
 | ||||
| def test_indented(): | ||||
|     """Offsets for functions inside of a class will probably be indented.""" | ||||
|     with sample_file("basic_class.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| def test_different_markers_same_module(parser): | ||||
|     """Now, if something is a regular function but then a stub, | ||||
|     what do we say about that?""" | ||||
|     parser.read_lines( | ||||
|         [ | ||||
|             "// FUNCTION: HOWDY 0x1234", | ||||
|             "// STUB: HOWDY 0x5555", | ||||
|             "void interesting_function() {", | ||||
|             "}", | ||||
|         ] | ||||
|     ) | ||||
| 
 | ||||
|     # TODO: We don't properly detect the end of these functions | ||||
|     # because the closing brace is indented. However... knowing where each | ||||
|     # function ends is less important (for now) than capturing | ||||
|     # all the functions that are there. | ||||
|     # Use first marker declaration, don't replace | ||||
|     assert len(parser.functions) == 1 | ||||
|     assert parser.functions[0].is_stub is False | ||||
| 
 | ||||
|     assert len(blocks) == 2 | ||||
|     assert blocks[0].offset == int("0x12345678", 16) | ||||
|     assert blocks[0].start_line == 15 | ||||
|     # assert blocks[0].end_line == 18 | ||||
| 
 | ||||
|     assert blocks[1].offset == int("0xdeadbeef", 16) | ||||
|     assert blocks[1].start_line == 22 | ||||
|     # assert blocks[1].end_line == 24 | ||||
|     # Should alert to this | ||||
|     assert len(parser.alerts) == 1 | ||||
|     assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE | ||||
| 
 | ||||
| 
 | ||||
| def test_inline(): | ||||
|     with sample_file("inline.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| def test_unexpected_synthetic(parser): | ||||
|     """FUNCTION then SYNTHETIC should fail to report either one""" | ||||
|     parser.read_lines( | ||||
|         [ | ||||
|             "// FUNCTION: HOWDY 0x1234", | ||||
|             "// SYNTHETIC: HOWDY 0x5555", | ||||
|             "void interesting_function() {", | ||||
|             "}", | ||||
|         ] | ||||
|     ) | ||||
| 
 | ||||
|     assert len(blocks) == 2 | ||||
|     for block in blocks: | ||||
|         assert block.start_line is not None | ||||
|         assert block.start_line == block.end_line | ||||
|     assert parser.state == ReaderState.SEARCH | ||||
|     assert len(parser.functions) == 0 | ||||
|     assert len(parser.alerts) == 1 | ||||
|     assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER | ||||
| 
 | ||||
| 
 | ||||
| def test_multiple_offsets(): | ||||
|     """If multiple offset marks appear before for a code block, take them | ||||
|     all but ensure module name (case-insensitive) is distinct. | ||||
|     Use first module occurrence in case of duplicates.""" | ||||
|     with sample_file("multiple_offsets.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| @pytest.mark.skip(reason="not implemented yet") | ||||
| def test_duplicate_offset(parser): | ||||
|     """Repeating the same module/offset in the same file is probably a typo""" | ||||
|     parser.read_lines( | ||||
|         [ | ||||
|             "// GLOBAL: HELLO 0x1234", | ||||
|             "int x = 1;", | ||||
|             "// GLOBAL: HELLO 0x1234", | ||||
|             "int y = 2;", | ||||
|         ] | ||||
|     ) | ||||
| 
 | ||||
|     assert len(blocks) == 4 | ||||
|     assert blocks[0].module == "TEST" | ||||
|     assert blocks[0].start_line == 9 | ||||
|     assert len(parser.alerts) == 1 | ||||
|     assert parser.alerts[0].code == ParserError.DUPLICATE_OFFSET | ||||
| 
 | ||||
|     assert blocks[1].module == "HELLO" | ||||
|     assert blocks[1].start_line == 9 | ||||
| 
 | ||||
|     # Duplicate modules are ignored | ||||
|     assert blocks[2].start_line == 16 | ||||
|     assert blocks[2].offset == 0x2345 | ||||
| def test_multiple_variables(parser): | ||||
|     """Theoretically the same global variable can appear in multiple modules""" | ||||
|     parser.read_lines( | ||||
|         [ | ||||
|             "// GLOBAL: HELLO 0x1234", | ||||
|             "// GLOBAL: WUZZUP 0x555", | ||||
|             "const char *g_greeting;", | ||||
|         ] | ||||
|     ) | ||||
|     assert len(parser.alerts) == 0 | ||||
|     assert len(parser.variables) == 2 | ||||
| 
 | ||||
|     assert blocks[3].module == "TEST" | ||||
|     assert blocks[3].offset == 0x2002 | ||||
| 
 | ||||
| def test_multiple_vtables(parser): | ||||
|     parser.read_lines( | ||||
|         [ | ||||
|             "// VTABLE: HELLO 0x1234", | ||||
|             "// VTABLE: TEST 0x5432", | ||||
|             "class MxString : public MxCore {", | ||||
|         ] | ||||
|     ) | ||||
|     assert len(parser.alerts) == 0 | ||||
|     assert len(parser.vtables) == 2 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 disinvite
					disinvite