Checkorder tool to keep functions in original binary order (#228)

* First commit of order tool * More flexible match on module name. Bugfix on blank_or_comment * Report inexact offset comments in verbose mode. Bugfix for exact regex * Refactor checkorder into reusable isledecomp module * Find bad comments in one pass, add awareness of TEMPLATE * Refactor of state machine to prepare for reccmp integration * Use isledecomp lib in reccmp * Build isledecomp in GH actions, fix mypy complaint * Ensure unit test cpp files will be ignored by reccmp * Allow multiple offset markers, pep8 cleanup * Remove unused variable * Code style, remove unneeded module and TODO * Final renaming and type hints * Fix checkorder issues, add GH action and enforce (#2) * Fix checkorder issues * Add GH action * Test error case * Works * Fixes --------- Co-authored-by: Christian Semmler <mail@csemmler.com>
2025-12-10 08:03:13 +00:00 · 2023-11-21 03:44:45 -05:00
parent 714d36b57d
commit 1ae3b07dc2
84 changed files with 4021 additions and 3209 deletions
--- a/tools/isledecomp/.gitignore
+++ b/tools/isledecomp/.gitignore
@@ -0,0 +1 @@
+isledecomp.egg-info/
--- a/tools/isledecomp/isledecomp/init.py
+++ b/tools/isledecomp/isledecomp/init.py
--- a/tools/isledecomp/isledecomp/dir.py
+++ b/tools/isledecomp/isledecomp/dir.py
@@ -0,0 +1,21 @@
+import os
+from typing import Iterator
+
+
+def is_file_cpp(filename: str) -> bool:
+    (basefile, ext) = os.path.splitext(filename)
+    return ext.lower() in ('.h', '.cpp')
+
+
+def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]:
+    """Generator to walk the given directory recursively and return
+       any C++ files found."""
+
+    source = os.path.abspath(source)
+    for subdir, dirs, files in os.walk(source):
+        for file in files:
+            if is_file_cpp(file):
+                yield os.path.join(subdir, file)
+
+        if not recursive:
+            break
--- a/tools/isledecomp/isledecomp/parser/init.py
+++ b/tools/isledecomp/isledecomp/parser/init.py
@@ -0,0 +1 @@
+from .parser import find_code_blocks
--- a/tools/isledecomp/isledecomp/parser/parser.py
+++ b/tools/isledecomp/isledecomp/parser/parser.py
@@ -0,0 +1,142 @@
+# C++ file parser
+
+from typing import List, TextIO
+from enum import Enum
+from .util import (
+    CodeBlock,
+    OffsetMatch,
+    is_blank_or_comment,
+    match_offset_comment,
+    is_exact_offset_comment,
+    get_template_function_name,
+    remove_trailing_comment,
+    distinct_by_module,
+)
+
+
+class ReaderState(Enum):
+    WANT_OFFSET = 0
+    WANT_SIG = 1
+    IN_FUNC = 2
+    IN_TEMPLATE = 3
+    WANT_CURLY = 4
+    FUNCTION_DONE = 5
+
+
+def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
+    """Read the IO stream (file) line-by-line and give the following report:
+       Foreach code block (function) in the file, what are its starting and
+       ending line numbers, and what is the given offset in the original
+       binary. We expect the result to be ordered by line number because we
+       are reading the file from start to finish."""
+
+    blocks: List[CodeBlock] = []
+
+    offset_matches: List[OffsetMatch] = []
+
+    function_sig = None
+    start_line = None
+    end_line = None
+    state = ReaderState.WANT_OFFSET
+
+    # 1-based to match cvdump and your text editor
+    # I know it says 0, but we will increment before each readline()
+    line_no = 0
+    can_seek = True
+
+    while True:
+        # Do this before reading again so that an EOF will not
+        # cause us to miss the last function of the file.
+        if state == ReaderState.FUNCTION_DONE:
+            # Our list of offset marks could have duplicates on
+            # module name, so we'll eliminate those now.
+            for offset_match in distinct_by_module(offset_matches):
+                block = CodeBlock(offset=offset_match.address,
+                                  signature=function_sig,
+                                  start_line=start_line,
+                                  end_line=end_line,
+                                  offset_comment=offset_match.comment,
+                                  module=offset_match.module,
+                                  is_template=offset_match.is_template,
+                                  is_stub=offset_match.is_stub)
+                blocks.append(block)
+            offset_matches = []
+            state = ReaderState.WANT_OFFSET
+
+        if can_seek:
+            line_no += 1
+            line = stream.readline()
+            if line == '':
+                break
+
+        new_match = match_offset_comment(line)
+        if new_match is not None:
+            # We will allow multiple offsets if we have just begun
+            # the code block, but not after we hit the curly brace.
+            if state in (ReaderState.WANT_OFFSET, ReaderState.IN_TEMPLATE,
+                         ReaderState.WANT_SIG):
+                # If we detected an offset marker unexpectedly,
+                # we are handling it here so we can continue seeking.
+                can_seek = True
+
+                offset_matches.append(new_match)
+
+                if new_match.is_template:
+                    state = ReaderState.IN_TEMPLATE
+                else:
+                    state = ReaderState.WANT_SIG
+            else:
+                # We hit another offset unexpectedly.
+                # We can recover easily by just ending the function here.
+                end_line = line_no - 1
+                state = ReaderState.FUNCTION_DONE
+
+                # Pause reading here so we handle the offset marker
+                # on the next loop iteration
+                can_seek = False
+
+        elif state == ReaderState.IN_TEMPLATE:
+            # TEMPLATE functions are a special case. The signature is
+            # given on the next line (in a // comment)
+            function_sig = get_template_function_name(line)
+            start_line = line_no
+            end_line = line_no
+            state = ReaderState.FUNCTION_DONE
+
+        elif state == ReaderState.WANT_SIG:
+            # Skip blank lines or comments that come after the offset
+            # marker. There is not a formal procedure for this, so just
+            # assume the next "code line" is the function signature
+            if not is_blank_or_comment(line):
+                # Inline functions may end with a comment. Strip that out
+                # to help parsing.
+                function_sig = remove_trailing_comment(line.strip())
+
+                # Now check to see if the opening curly bracket is on the
+                # same line. clang-format should prevent this (BraceWrapping)
+                # but it is easy to detect.
+                # If the entire function is on one line, handle that too.
+                if function_sig.endswith('{'):
+                    start_line = line_no
+                    state = ReaderState.IN_FUNC
+                elif (function_sig.endswith('}') or
+                        function_sig.endswith('};')):
+                    start_line = line_no
+                    end_line = line_no
+                    state = ReaderState.FUNCTION_DONE
+                else:
+                    state = ReaderState.WANT_CURLY
+
+        elif state == ReaderState.WANT_CURLY:
+            if line.strip() == '{':
+                start_line = line_no
+                state = ReaderState.IN_FUNC
+
+        elif state == ReaderState.IN_FUNC:
+            # Naive but reasonable assumption that functions will end with
+            # a curly brace on its own line with no prepended spaces.
+            if line.startswith('}'):
+                end_line = line_no
+                state = ReaderState.FUNCTION_DONE
+
+    return blocks
--- a/tools/isledecomp/isledecomp/parser/util.py
+++ b/tools/isledecomp/isledecomp/parser/util.py
@@ -0,0 +1,97 @@
+# C++ Parser utility functions and data structures
+from __future__ import annotations  # python <3.10 compatibility
+import re
+from typing import List
+from collections import namedtuple
+
+
+CodeBlock = namedtuple('CodeBlock',
+                       ['offset', 'signature', 'start_line', 'end_line',
+                        'offset_comment', 'module', 'is_template', 'is_stub'])
+
+OffsetMatch = namedtuple('OffsetMatch', ['module', 'address', 'is_template',
+                                         'is_stub', 'comment'])
+
+# This has not been formally established, but considering that "STUB"
+# is a temporary state for a function, we assume it will appear last,
+# after any other modifiers (i.e. TEMPLATE)
+
+# To match a reasonable variance of formatting for the offset comment
+offsetCommentRegex = re.compile(r'\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?',  # nopep8
+                                flags=re.I)
+
+# To match the exact syntax (text upper case, hex lower case, with spaces)
+# that is used in most places
+offsetCommentExactRegex = re.compile(r'^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$')  # nopep8
+
+
+# The goal here is to just read whatever is on the next line, so some
+# flexibility in the formatting seems OK
+templateCommentRegex = re.compile(r'\s*//\s+(.*)')
+
+
+# To remove any comment (//) or block comment (/*) and its leading spaces
+# from the end of a code line
+trailingCommentRegex = re.compile(r'(\s*(?://|/\*).*)$')
+
+
+def get_template_function_name(line: str) -> str:
+    """Parse function signature for special TEMPLATE functions"""
+    template_match = templateCommentRegex.match(line)
+
+    # If we don't match, you get whatever is on the line as the signature
+    if template_match is not None:
+        return template_match.group(1)
+
+    return line
+
+
+def remove_trailing_comment(line: str) -> str:
+    return trailingCommentRegex.sub('', line)
+
+
+def is_blank_or_comment(line: str) -> bool:
+    """Helper to read ahead after the offset comment is matched.
+       There could be blank lines or other comments before the
+       function signature, and we want to skip those."""
+    line_strip = line.strip()
+    return (len(line_strip) == 0
+            or line_strip.startswith('//')
+            or line_strip.startswith('/*')
+            or line_strip.endswith('*/'))
+
+
+def is_exact_offset_comment(line: str) -> bool:
+    """If the offset comment does not match our (unofficial) syntax
+       we may want to alert the user to fix it for style points."""
+    return offsetCommentExactRegex.match(line) is not None
+
+
+def match_offset_comment(line: str) -> OffsetMatch | None:
+    match = offsetCommentRegex.match(line)
+    if match is None:
+        return None
+
+    return OffsetMatch(module=match.group(1),
+                       address=int(match.group(2), 16),
+                       is_template=match.group(3) is not None,
+                       is_stub=match.group(4) is not None,
+                       comment=line.strip())
+
+
+def distinct_by_module(offsets: List) -> List:
+    """Given a list of offset markers, return a list with distinct
+       module names. If module names (case-insensitive) are repeated,
+       choose the offset that appears first."""
+
+    if len(offsets) < 2:
+        return offsets
+
+    # Dict maintains insertion order in python >=3.7
+    offsets_dict = {}
+    for offset in offsets:
+        module_upper = offset.module.upper()
+        if module_upper not in offsets_dict:
+            offsets_dict[module_upper] = offset
+
+    return list(offsets_dict.values())
--- a/tools/isledecomp/setup.py
+++ b/tools/isledecomp/setup.py
@@ -0,0 +1,9 @@
+from setuptools import setup, find_packages
+
+setup(
+    name='isledecomp',
+    version='0.1.0',
+    description='Python tools for the isledecomp project',
+    packages=find_packages(),
+    tests_require=['pytest'],
+)
--- a/tools/isledecomp/tests/init.py
+++ b/tools/isledecomp/tests/init.py
--- a/tools/isledecomp/tests/samples/basic_class.cpp
+++ b/tools/isledecomp/tests/samples/basic_class.cpp
@@ -0,0 +1,29 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// A very simple class
+
+class TestClass {
+public:
+  TestClass();
+  virtual ~TestClass() override;
+
+  virtual MxResult Tickle() override; // vtable+08
+
+  // OFFSET: TEST 0x12345678
+  inline const char* ClassName() const // vtable+0c
+  {
+    // 0xabcd1234
+    return "TestClass";
+  }
+
+  // OFFSET: TEST 0xdeadbeef
+  inline MxBool IsA(const char* name) const override // vtable+10
+  {
+    return !strcmp(name, TestClass::ClassName());
+  }
+
+private:
+  int m_hello;
+  int m_hiThere;
+};
--- a/tools/isledecomp/tests/samples/basic_file.cpp
+++ b/tools/isledecomp/tests/samples/basic_file.cpp
@@ -0,0 +1,22 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// A very simple well-formed code file
+
+// OFFSET: TEST 0x1234
+void function01()
+{
+  // TODO
+}
+
+// OFFSET: TEST 0x2345
+void function02()
+{
+  // TODO
+}
+
+// OFFSET: TEST 0x3456
+void function03()
+{
+  // TODO
+}
--- a/tools/isledecomp/tests/samples/inline.cpp
+++ b/tools/isledecomp/tests/samples/inline.cpp
@@ -0,0 +1,8 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// OFFSET: TEST 0x10000001
+inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there
+
+// OFFSET: TEST 0x10000002
+inline const char* OneLine() const { return "MxDSObject"; };
--- a/tools/isledecomp/tests/samples/missing_offset.cpp
+++ b/tools/isledecomp/tests/samples/missing_offset.cpp
@@ -0,0 +1,16 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+#include <stdio.h>
+
+int no_offset_comment()
+{
+  static int dummy = 123;
+  return -1;
+}
+
+// OFFSET: TEST 0xdeadbeef
+void regular_ole_function()
+{
+  printf("hi there");
+}
--- a/tools/isledecomp/tests/samples/multiple_offsets.cpp
+++ b/tools/isledecomp/tests/samples/multiple_offsets.cpp
@@ -0,0 +1,25 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// Handling multiple offset markers
+
+// OFFSET: TEST 0x1234
+// OFFSET: HELLO 0x5555
+void different_modules()
+{
+  // TODO
+}
+
+// OFFSET: TEST 0x2345
+// OFFSET: TEST 0x1234
+void same_module()
+{
+  // TODO
+}
+
+// OFFSET: TEST 0x2002
+// OFFSET: test 0x1001
+void same_case_insensitive()
+{
+  // TODO
+}
--- a/tools/isledecomp/tests/samples/oneline_function.cpp
+++ b/tools/isledecomp/tests/samples/oneline_function.cpp
@@ -0,0 +1,12 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// OFFSET: TEST 0x1234
+void short_function() { static char* msg = "oneliner"; }
+
+// OFFSET: TEST 0x5555
+void function_after_one_liner()
+{
+  // This function comes after the previous that is on a single line.
+  // Do we report the offset for this one correctly?
+}
--- a/tools/isledecomp/tests/samples/out_of_order.cpp
+++ b/tools/isledecomp/tests/samples/out_of_order.cpp
@@ -0,0 +1,20 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// OFFSET: TEST 0x1001
+void function_order01()
+{
+    // TODO
+}
+
+// OFFSET: TEST 0x1003
+void function_order03()
+{
+    // TODO
+}
+
+// OFFSET: TEST 0x1002
+void function_order02()
+{
+    // TODO
+}
--- a/tools/isledecomp/tests/samples/poorly_formatted.cpp
+++ b/tools/isledecomp/tests/samples/poorly_formatted.cpp
@@ -0,0 +1,23 @@
+// Sample for python unit tests
+// Not part of the decomp
+
+// While it's reasonable to expect a well-formed file (and clang-format
+// will make sure we get one), this will put the parser through its paces.
+
+// OFFSET: TEST 0x1234
+void curly_with_spaces()
+  {
+  static char* msg = "hello";
+  }
+
+// OFFSET: TEST 0x5555
+void weird_closing_curly()
+{
+  int x = 123; }
+
+// OFFSET: HELLO 0x5656
+void bad_indenting() {
+  if (0)
+{
+  int y = 5;
+}}
--- a/tools/isledecomp/tests/test_parser.py
+++ b/tools/isledecomp/tests/test_parser.py
@@ -0,0 +1,128 @@
+import os
+import pytest
+from typing import List, TextIO
+from isledecomp.parser import find_code_blocks
+from isledecomp.parser.util import CodeBlock
+
+SAMPLE_DIR = os.path.join(os.path.dirname(__file__), 'samples')
+
+
+def sample_file(filename: str) -> TextIO:
+    """Wrapper for opening the samples from the directory that does not
+       depend on the cwd where we run the test"""
+    full_path = os.path.join(SAMPLE_DIR, filename)
+    return open(full_path, 'r')
+
+
+def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
+    """Helper to make this more idiomatic"""
+    just_offsets = [block.offset for block in blocks]
+    return just_offsets == sorted(just_offsets)
+
+
+# Tests are below #
+
+
+def test_sanity():
+    """Read a very basic file"""
+    with sample_file('basic_file.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    assert len(blocks) == 3
+    assert code_blocks_are_sorted(blocks) is True
+    # n.b. The parser returns line numbers as 1-based
+    # Function starts when we see the opening curly brace
+    assert blocks[0].start_line == 8
+    assert blocks[0].end_line == 10
+
+
+def test_oneline():
+    """(Assuming clang-format permits this) This sample has a function
+    on a single line. This will test the end-of-function detection"""
+    with sample_file('oneline_function.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    assert len(blocks) == 2
+    assert blocks[0].start_line == 5
+    assert blocks[0].end_line == 5
+
+
+def test_missing_offset():
+    """What if the function doesn't have an offset comment?"""
+    with sample_file('missing_offset.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    # TODO: For now, the function without the offset will just be ignored.
+    # Would be the same outcome if the comment was present but mangled and
+    # we failed to match it. We should detect these cases in the future.
+    assert len(blocks) == 1
+
+
+def test_jumbled_case():
+    """The parser just reports what it sees. It is the responsibility of
+       the downstream tools to do something about a jumbled file.
+       Just verify that we are reading it correctly."""
+    with sample_file('out_of_order.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    assert len(blocks) == 3
+    assert code_blocks_are_sorted(blocks) is False
+
+
+def test_bad_file():
+    with sample_file('poorly_formatted.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    assert len(blocks) == 3
+
+
+def test_indented():
+    """Offsets for functions inside of a class will probably be indented."""
+    with sample_file('basic_class.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    # TODO: We don't properly detect the end of these functions
+    # because the closing brace is indented. However... knowing where each
+    # function ends is less important (for now) than capturing
+    # all the functions that are there.
+
+    assert len(blocks) == 2
+    assert blocks[0].offset == int('0x12345678', 16)
+    assert blocks[0].start_line == 15
+    # assert blocks[0].end_line == 18
+
+    assert blocks[1].offset == int('0xdeadbeef', 16)
+    assert blocks[1].start_line == 22
+    # assert blocks[1].end_line == 24
+
+
+def test_inline():
+    with sample_file('inline.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    assert len(blocks) == 2
+    for block in blocks:
+        assert block.start_line is not None
+        assert block.start_line == block.end_line
+
+
+def test_multiple_offsets():
+    """If multiple offset marks appear before for a code block, take them
+       all but ensure module name (case-insensitive) is distinct.
+       Use first module occurrence in case of duplicates."""
+    with sample_file('multiple_offsets.cpp') as f:
+        blocks = find_code_blocks(f)
+
+    assert len(blocks) == 4
+    assert blocks[0].module == 'TEST'
+    assert blocks[0].start_line == 9
+
+    assert blocks[1].module == 'HELLO'
+    assert blocks[1].start_line == 9
+
+    # Duplicate modules are ignored
+    assert blocks[2].start_line == 16
+    assert blocks[2].offset == 0x2345
+
+    assert blocks[3].module == 'TEST'
+    assert blocks[3].offset == 0x2002
--- a/tools/isledecomp/tests/test_parser_util.py
+++ b/tools/isledecomp/tests/test_parser_util.py
@@ -0,0 +1,113 @@
+import pytest
+from collections import namedtuple
+from typing import List
+from isledecomp.parser.util import (
+    is_blank_or_comment,
+    match_offset_comment,
+    is_exact_offset_comment,
+    distinct_by_module,
+)
+
+
+blank_or_comment_param = [
+    (True,  ''),
+    (True,  '\t'),
+    (True,  '    '),
+    (False, '\tint abc=123;'),
+    (True,  '// OFFSET: LEGO1 0xdeadbeef'),
+    (True,  '   /* Block comment beginning'),
+    (True,  'Block comment ending */   '),
+
+    # TODO: does clang-format have anything to say about these cases?
+    (False, 'x++; // Comment folows'),
+    (False, 'x++; /* Block comment begins'),
+]
+
+
+@pytest.mark.parametrize('expected, line', blank_or_comment_param)
+def test_is_blank_or_comment(line: str, expected: bool):
+    assert is_blank_or_comment(line) is expected
+
+
+offset_comment_samples = [
+    # (can_parse: bool, exact_match: bool, line: str)
+    # Should match both expected modules with optional STUB marker
+    (True,  True,  '// OFFSET: LEGO1 0xdeadbeef'),
+    (True,  True,  '// OFFSET: LEGO1 0xdeadbeef STUB'),
+    (True,  True,  '// OFFSET: ISLE 0x12345678'),
+    (True,  True,  '// OFFSET: ISLE 0x12345678 STUB'),
+
+    # No trailing spaces allowed
+    (True,  False, '// OFFSET: LEGO1 0xdeadbeef  '),
+    (True,  False, '// OFFSET: LEGO1 0xdeadbeef STUB '),
+
+    # Must have exactly one space between elements
+    (True,  False, '//OFFSET: ISLE 0xdeadbeef'),
+    (True,  False, '// OFFSET:ISLE 0xdeadbeef'),
+    (True,  False, '//  OFFSET: ISLE 0xdeadbeef'),
+    (True,  False, '// OFFSET:  ISLE 0xdeadbeef'),
+    (True,  False, '// OFFSET: ISLE  0xdeadbeef'),
+    (True,  False, '// OFFSET: ISLE 0xdeadbeef  STUB'),
+
+    # Must have 0x prefix for hex number
+    (True,  False, '// OFFSET: ISLE deadbeef'),
+
+    # Offset, module name, and STUB must be uppercase
+    (True,  False, '// offset: ISLE 0xdeadbeef'),
+    (True,  False, '// offset: isle 0xdeadbeef'),
+    (True,  False, '// OFFSET: LEGO1 0xdeadbeef stub'),
+
+    # Hex string must be lowercase
+    (True,  False, '// OFFSET: ISLE 0xDEADBEEF'),
+
+    # TODO: How flexible should we be with matching the module name?
+    (True,  True,  '// OFFSET: OMNI 0x12345678'),
+    (True,  True,  '// OFFSET: LEG01 0x12345678'),
+    (True,  False,  '// OFFSET: hello 0x12345678'),
+
+    # Not close enough to match
+    (False, False, '// OFFSET: ISLE0x12345678'),
+    (False, False, '// OFFSET: 0x12345678'),
+    (False, False, '// LEGO1: 0x12345678'),
+
+    # Hex string shorter than 8 characters
+    (True,  True,  '// OFFSET: LEGO1 0x1234'),
+
+    # TODO: These match but shouldn't.
+    # (False, False, '// OFFSET: LEGO1 0'),
+    # (False, False, '// OFFSET: LEGO1 0x'),
+]
+
+
+@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
+def test_offset_match(line: str, match: bool, exact):
+    did_match = match_offset_comment(line) is not None
+    assert did_match is match
+
+
+@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
+def test_exact_offset_comment(line: str, exact: bool, match):
+    assert is_exact_offset_comment(line) is exact
+
+
+# Helper for the next test: cut down version of OffsetMatch
+MiniOfs = namedtuple('MiniOfs', ['module', 'value'])
+
+distinct_by_module_samples = [
+    # empty set
+    ([], []),
+    # same module name
+    ([MiniOfs('TEST', 123), MiniOfs('TEST', 555)],
+     [MiniOfs('TEST', 123)]),
+    # same module name, case-insensitive
+    ([MiniOfs('test', 123), MiniOfs('TEST', 555)],
+     [MiniOfs('test', 123)]),
+    # duplicates, non-consecutive
+    ([MiniOfs('test', 123), MiniOfs('abc', 111), MiniOfs('TEST', 555)],
+     [MiniOfs('test', 123), MiniOfs('abc', 111)]),
+]
+
+
+@pytest.mark.parametrize('sample, expected', distinct_by_module_samples)
+def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]):
+    assert distinct_by_module(sample) == expected