Checkorder tool to keep functions in original binary order (#228)

* First commit of order tool * More flexible match on module name. Bugfix on blank_or_comment * Report inexact offset comments in verbose mode. Bugfix for exact regex * Refactor checkorder into reusable isledecomp module * Find bad comments in one pass, add awareness of TEMPLATE * Refactor of state machine to prepare for reccmp integration * Use isledecomp lib in reccmp * Build isledecomp in GH actions, fix mypy complaint * Ensure unit test cpp files will be ignored by reccmp * Allow multiple offset markers, pep8 cleanup * Remove unused variable * Code style, remove unneeded module and TODO * Final renaming and type hints * Fix checkorder issues, add GH action and enforce (#2) * Fix checkorder issues * Add GH action * Test error case * Works * Fixes --------- Co-authored-by: Christian Semmler <mail@csemmler.com>
2025-10-23 00:14:22 +00:00 · 2023-11-21 03:44:45 -05:00
parent 714d36b57d
commit 1ae3b07dc2
84 changed files with 4021 additions and 3209 deletions
--- a/tools/isledecomp/tests/init.py
+++ b/tools/isledecomp/tests/init.py
--- a/tools/isledecomp/tests/samples/basic_class.cpp
+++ b/tools/isledecomp/tests/samples/basic_class.cpp
@@ -0,0 +1,29 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// A very simple class
+
+class TestClass {
+public:
+  TestClass();
+  virtual ~TestClass() override;
+
+  virtual MxResult Tickle() override; // vtable+08
+
+  // OFFSET: TEST 0x12345678
+  inline const char* ClassName() const // vtable+0c
+  {
+    // 0xabcd1234
+    return "TestClass";
+  }
+
+  // OFFSET: TEST 0xdeadbeef
+  inline MxBool IsA(const char* name) const override // vtable+10
+  {
+    return !strcmp(name, TestClass::ClassName());
+  }
+
+private:
+  int m_hello;
+  int m_hiThere;
+};
--- a/tools/isledecomp/tests/samples/basic_file.cpp
+++ b/tools/isledecomp/tests/samples/basic_file.cpp
@@ -0,0 +1,22 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// A very simple well-formed code file
+
+// OFFSET: TEST 0x1234
+void function01()
+{
+  // TODO
+}
+
+// OFFSET: TEST 0x2345
+void function02()
+{
+  // TODO
+}
+
+// OFFSET: TEST 0x3456
+void function03()
+{
+  // TODO
+}
--- a/tools/isledecomp/tests/samples/inline.cpp
+++ b/tools/isledecomp/tests/samples/inline.cpp
@@ -0,0 +1,8 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// OFFSET: TEST 0x10000001
+inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there
+
+// OFFSET: TEST 0x10000002
+inline const char* OneLine() const { return "MxDSObject"; };
--- a/tools/isledecomp/tests/samples/missing_offset.cpp
+++ b/tools/isledecomp/tests/samples/missing_offset.cpp
@@ -0,0 +1,16 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+#include <stdio.h>
+
+int no_offset_comment()
+{
+  static int dummy = 123;
+  return -1;
+}
+
+// OFFSET: TEST 0xdeadbeef
+void regular_ole_function()
+{
+  printf("hi there");
+}
--- a/tools/isledecomp/tests/samples/multiple_offsets.cpp
+++ b/tools/isledecomp/tests/samples/multiple_offsets.cpp
@@ -0,0 +1,25 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// Handling multiple offset markers
+
+// OFFSET: TEST 0x1234
+// OFFSET: HELLO 0x5555
+void different_modules()
+{
+  // TODO
+}
+
+// OFFSET: TEST 0x2345
+// OFFSET: TEST 0x1234
+void same_module()
+{
+  // TODO
+}
+
+// OFFSET: TEST 0x2002
+// OFFSET: test 0x1001
+void same_case_insensitive()
+{
+  // TODO
+}
--- a/tools/isledecomp/tests/samples/oneline_function.cpp
+++ b/tools/isledecomp/tests/samples/oneline_function.cpp
@@ -0,0 +1,12 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// OFFSET: TEST 0x1234
+void short_function() { static char* msg = "oneliner"; }
+
+// OFFSET: TEST 0x5555
+void function_after_one_liner()
+{
+  // This function comes after the previous that is on a single line.
+  // Do we report the offset for this one correctly?
+}
--- a/tools/isledecomp/tests/samples/out_of_order.cpp
+++ b/tools/isledecomp/tests/samples/out_of_order.cpp
@@ -0,0 +1,20 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// OFFSET: TEST 0x1001
+void function_order01()
+{
+    // TODO
+}
+
+// OFFSET: TEST 0x1003
+void function_order03()
+{
+    // TODO
+}
+
+// OFFSET: TEST 0x1002
+void function_order02()
+{
+    // TODO
+}
--- a/tools/isledecomp/tests/samples/poorly_formatted.cpp
+++ b/tools/isledecomp/tests/samples/poorly_formatted.cpp
@@ -0,0 +1,23 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// While it's reasonable to expect a well-formed file (and clang-format
+// will make sure we get one), this will put the parser through its paces.
+
+// OFFSET: TEST 0x1234
+void curly_with_spaces()
+  {
+  static char* msg = "hello";
+  }
+
+// OFFSET: TEST 0x5555
+void weird_closing_curly()
+{
+  int x = 123; }
+
+// OFFSET: HELLO 0x5656
+void bad_indenting() {
+  if (0)
+{
+  int y = 5;
+}}
--- a/tools/isledecomp/tests/test_parser.py
+++ b/tools/isledecomp/tests/test_parser.py
@@ -0,0 +1,128 @@
+import os
+import pytest
+from typing import List, TextIO
+from isledecomp.parser import find_code_blocks
+from isledecomp.parser.util import CodeBlock
+
+SAMPLE_DIR = os.path.join(os.path.dirname(__file__), 'samples')
+
+
+def sample_file(filename: str) -> TextIO:
+    """Wrapper for opening the samples from the directory that does not
+       depend on the cwd where we run the test"""
+    full_path = os.path.join(SAMPLE_DIR, filename)
+    return open(full_path, 'r')
+
+
+def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
+    """Helper to make this more idiomatic"""
+    just_offsets = [block.offset for block in blocks]
+    return just_offsets == sorted(just_offsets)
+
+
+# Tests are below #
+
+
+def test_sanity():
+    """Read a very basic file"""
+    with sample_file('basic_file.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    assert len(blocks) == 3
+    assert code_blocks_are_sorted(blocks) is True
+    # n.b. The parser returns line numbers as 1-based
+    # Function starts when we see the opening curly brace
+    assert blocks[0].start_line == 8
+    assert blocks[0].end_line == 10
+
+
+def test_oneline():
+    """(Assuming clang-format permits this) This sample has a function
+    on a single line. This will test the end-of-function detection"""
+    with sample_file('oneline_function.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    assert len(blocks) == 2
+    assert blocks[0].start_line == 5
+    assert blocks[0].end_line == 5
+
+
+def test_missing_offset():
+    """What if the function doesn't have an offset comment?"""
+    with sample_file('missing_offset.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    # TODO: For now, the function without the offset will just be ignored.
+    # Would be the same outcome if the comment was present but mangled and
+    # we failed to match it. We should detect these cases in the future.
+    assert len(blocks) == 1
+
+
+def test_jumbled_case():
+    """The parser just reports what it sees. It is the responsibility of
+       the downstream tools to do something about a jumbled file.
+       Just verify that we are reading it correctly."""
+    with sample_file('out_of_order.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    assert len(blocks) == 3
+    assert code_blocks_are_sorted(blocks) is False
+
+
+def test_bad_file():
+    with sample_file('poorly_formatted.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    assert len(blocks) == 3
+
+
+def test_indented():
+    """Offsets for functions inside of a class will probably be indented."""
+    with sample_file('basic_class.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    # TODO: We don't properly detect the end of these functions
+    # because the closing brace is indented. However... knowing where each
+    # function ends is less important (for now) than capturing
+    # all the functions that are there.
+
+    assert len(blocks) == 2
+    assert blocks[0].offset == int('0x12345678', 16)
+    assert blocks[0].start_line == 15
+    # assert blocks[0].end_line == 18
+
+    assert blocks[1].offset == int('0xdeadbeef', 16)
+    assert blocks[1].start_line == 22
+    # assert blocks[1].end_line == 24
+
+
+def test_inline():
+    with sample_file('inline.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    assert len(blocks) == 2
+    for block in blocks:
+        assert block.start_line is not None
+        assert block.start_line == block.end_line
+
+
+def test_multiple_offsets():
+    """If multiple offset marks appear before for a code block, take them
+       all but ensure module name (case-insensitive) is distinct.
+       Use first module occurrence in case of duplicates."""
+    with sample_file('multiple_offsets.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    assert len(blocks) == 4
+    assert blocks[0].module == 'TEST'
+    assert blocks[0].start_line == 9
+
+    assert blocks[1].module == 'HELLO'
+    assert blocks[1].start_line == 9
+
+    # Duplicate modules are ignored
+    assert blocks[2].start_line == 16
+    assert blocks[2].offset == 0x2345
+
+    assert blocks[3].module == 'TEST'
+    assert blocks[3].offset == 0x2002
--- a/tools/isledecomp/tests/test_parser_util.py
+++ b/tools/isledecomp/tests/test_parser_util.py
@@ -0,0 +1,113 @@
+import pytest
+from collections import namedtuple
+from typing import List
+from isledecomp.parser.util import (
+    is_blank_or_comment,
+    match_offset_comment,
+    is_exact_offset_comment,
+    distinct_by_module,
+)
+
+
+blank_or_comment_param = [
+    (True,  ''),
+    (True,  '\t'),
+    (True,  '    '),
+    (False, '\tint abc=123;'),
+    (True,  '// OFFSET: LEGO1 0xdeadbeef'),
+    (True,  '   /* Block comment beginning'),
+    (True,  'Block comment ending */   '),
+
+    # TODO: does clang-format have anything to say about these cases?
+    (False, 'x++; // Comment folows'),
+    (False, 'x++; /* Block comment begins'),
+]
+
+
+@pytest.mark.parametrize('expected, line', blank_or_comment_param)
+def test_is_blank_or_comment(line: str, expected: bool):
+    assert is_blank_or_comment(line) is expected
+
+
+offset_comment_samples = [
+    # (can_parse: bool, exact_match: bool, line: str)
+    # Should match both expected modules with optional STUB marker
+    (True,  True,  '// OFFSET: LEGO1 0xdeadbeef'),
+    (True,  True,  '// OFFSET: LEGO1 0xdeadbeef STUB'),
+    (True,  True,  '// OFFSET: ISLE 0x12345678'),
+    (True,  True,  '// OFFSET: ISLE 0x12345678 STUB'),
+
+    # No trailing spaces allowed
+    (True,  False, '// OFFSET: LEGO1 0xdeadbeef  '),
+    (True,  False, '// OFFSET: LEGO1 0xdeadbeef STUB '),
+
+    # Must have exactly one space between elements
+    (True,  False, '//OFFSET: ISLE 0xdeadbeef'),
+    (True,  False, '// OFFSET:ISLE 0xdeadbeef'),
+    (True,  False, '//  OFFSET: ISLE 0xdeadbeef'),
+    (True,  False, '// OFFSET:  ISLE 0xdeadbeef'),
+    (True,  False, '// OFFSET: ISLE  0xdeadbeef'),
+    (True,  False, '// OFFSET: ISLE 0xdeadbeef  STUB'),
+
+    # Must have 0x prefix for hex number
+    (True,  False, '// OFFSET: ISLE deadbeef'),
+
+    # Offset, module name, and STUB must be uppercase
+    (True,  False, '// offset: ISLE 0xdeadbeef'),
+    (True,  False, '// offset: isle 0xdeadbeef'),
+    (True,  False, '// OFFSET: LEGO1 0xdeadbeef stub'),
+
+    # Hex string must be lowercase
+    (True,  False, '// OFFSET: ISLE 0xDEADBEEF'),
+
+    # TODO: How flexible should we be with matching the module name?
+    (True,  True,  '// OFFSET: OMNI 0x12345678'),
+    (True,  True,  '// OFFSET: LEG01 0x12345678'),
+    (True,  False,  '// OFFSET: hello 0x12345678'),
+
+    # Not close enough to match
+    (False, False, '// OFFSET: ISLE0x12345678'),
+    (False, False, '// OFFSET: 0x12345678'),
+    (False, False, '// LEGO1: 0x12345678'),
+
+    # Hex string shorter than 8 characters
+    (True,  True,  '// OFFSET: LEGO1 0x1234'),
+
+    # TODO: These match but shouldn't.
+    # (False, False, '// OFFSET: LEGO1 0'),
+    # (False, False, '// OFFSET: LEGO1 0x'),
+]
+
+
+@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
+def test_offset_match(line: str, match: bool, exact):
+    did_match = match_offset_comment(line) is not None
+    assert did_match is match
+
+
+@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
+def test_exact_offset_comment(line: str, exact: bool, match):
+    assert is_exact_offset_comment(line) is exact
+
+
+# Helper for the next test: cut down version of OffsetMatch
+MiniOfs = namedtuple('MiniOfs', ['module', 'value'])
+
+distinct_by_module_samples = [
+    # empty set
+    ([], []),
+    # same module name
+    ([MiniOfs('TEST', 123), MiniOfs('TEST', 555)],
+     [MiniOfs('TEST', 123)]),
+    # same module name, case-insensitive
+    ([MiniOfs('test', 123), MiniOfs('TEST', 555)],
+     [MiniOfs('test', 123)]),
+    # duplicates, non-consecutive
+    ([MiniOfs('test', 123), MiniOfs('abc', 111), MiniOfs('TEST', 555)],
+     [MiniOfs('test', 123), MiniOfs('abc', 111)]),
+]
+
+
+@pytest.mark.parametrize('sample, expected', distinct_by_module_samples)
+def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]):
+    assert distinct_by_module(sample) == expected