Python Linting and Code Formatting (#298)

* Create common print_diff function

* Add pylint and black

* Fix linting, move classes to utils

* Add black/pylint to github actions

* Fix linting

* Move Bin and SymInfo into their own files

* Split out format

* Tidy up workdlows and pip, add readme

* Lint tests, add tests to readme
This commit is contained in:
Thomas Phillips
2023-11-26 07:27:42 +13:00
committed by GitHub
parent fb0d1ccb62
commit b14116cc93
22 changed files with 1675 additions and 789 deletions

View File

@@ -0,0 +1,5 @@
from .bin import *
from .dir import *
from .parser import *
from .syminfo import *
from .utils import *

View File

@@ -0,0 +1,47 @@
import struct
# Declare a class that can automatically convert virtual executable addresses
# to file addresses
class Bin:
def __init__(self, filename, logger):
self.logger = logger
self.logger.debug('Parsing headers of "%s"... ', filename)
self.filename = filename
self.file = None
self.imagebase = None
self.textvirt = None
self.textraw = None
def __enter__(self):
self.logger.debug(f"Bin {self.filename} Enter")
self.file = open(self.filename, "rb")
# HACK: Strictly, we should be parsing the header, but we know where
# everything is in these two files so we just jump straight there
# Read ImageBase
self.file.seek(0xB4)
(self.imagebase,) = struct.unpack("<i", self.file.read(4))
# Read .text VirtualAddress
self.file.seek(0x184)
(self.textvirt,) = struct.unpack("<i", self.file.read(4))
# Read .text PointerToRawData
self.file.seek(0x18C)
(self.textraw,) = struct.unpack("<i", self.file.read(4))
self.logger.debug("... Parsing finished")
return self
def __exit__(self, exc_type, exc_value, exc_traceback):
self.logger.debug(f"Bin {self.filename} Exit")
if self.file:
self.file.close()
def get_addr(self, virt):
return virt - self.imagebase - self.textvirt + self.textraw
def read(self, offset, size):
self.file.seek(self.get_addr(offset))
return self.file.read(size)

View File

@@ -1,21 +1,63 @@
import os
import subprocess
import sys
from typing import Iterator
class WinePathConverter:
def __init__(self, unix_cwd):
self.unix_cwd = unix_cwd
self.win_cwd = self._call_winepath_unix2win(self.unix_cwd)
def get_wine_path(self, unix_fn: str) -> str:
if unix_fn.startswith("./"):
return self.win_cwd + "\\" + unix_fn[2:].replace("/", "\\")
if unix_fn.startswith(self.unix_cwd):
return (
self.win_cwd
+ "\\"
+ unix_fn.removeprefix(self.unix_cwd).replace("/", "\\").lstrip("\\")
)
return self._call_winepath_unix2win(unix_fn)
def get_unix_path(self, win_fn: str) -> str:
if win_fn.startswith(".\\") or win_fn.startswith("./"):
return self.unix_cwd + "/" + win_fn[2:].replace("\\", "/")
if win_fn.startswith(self.win_cwd):
return (
self.unix_cwd
+ "/"
+ win_fn.removeprefix(self.win_cwd).replace("\\", "/")
)
return self._call_winepath_win2unix(win_fn)
@staticmethod
def _call_winepath_unix2win(fn: str) -> str:
return subprocess.check_output(["winepath", "-w", fn], text=True).strip()
@staticmethod
def _call_winepath_win2unix(fn: str) -> str:
return subprocess.check_output(["winepath", fn], text=True).strip()
def is_file_cpp(filename: str) -> bool:
(basefile, ext) = os.path.splitext(filename)
return ext.lower() in ('.h', '.cpp')
(_, ext) = os.path.splitext(filename)
return ext.lower() in (".h", ".cpp")
def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]:
"""Generator to walk the given directory recursively and return
any C++ files found."""
any C++ files found."""
source = os.path.abspath(source)
for subdir, dirs, files in os.walk(source):
for subdir, _, files in os.walk(source):
for file in files:
if is_file_cpp(file):
yield os.path.join(subdir, file)
if not recursive:
break
def get_file_in_script_dir(fn):
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)

View File

@@ -7,7 +7,6 @@ from .util import (
OffsetMatch,
is_blank_or_comment,
match_offset_comment,
is_exact_offset_comment,
get_template_function_name,
remove_trailing_comment,
distinct_by_module,
@@ -25,10 +24,10 @@ class ReaderState(Enum):
def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
"""Read the IO stream (file) line-by-line and give the following report:
Foreach code block (function) in the file, what are its starting and
ending line numbers, and what is the given offset in the original
binary. We expect the result to be ordered by line number because we
are reading the file from start to finish."""
Foreach code block (function) in the file, what are its starting and
ending line numbers, and what is the given offset in the original
binary. We expect the result to be ordered by line number because we
are reading the file from start to finish."""
blocks: List[CodeBlock] = []
@@ -51,14 +50,16 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
# Our list of offset marks could have duplicates on
# module name, so we'll eliminate those now.
for offset_match in distinct_by_module(offset_matches):
block = CodeBlock(offset=offset_match.address,
signature=function_sig,
start_line=start_line,
end_line=end_line,
offset_comment=offset_match.comment,
module=offset_match.module,
is_template=offset_match.is_template,
is_stub=offset_match.is_stub)
block = CodeBlock(
offset=offset_match.address,
signature=function_sig,
start_line=start_line,
end_line=end_line,
offset_comment=offset_match.comment,
module=offset_match.module,
is_template=offset_match.is_template,
is_stub=offset_match.is_stub,
)
blocks.append(block)
offset_matches = []
state = ReaderState.WANT_OFFSET
@@ -66,15 +67,18 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
if can_seek:
line_no += 1
line = stream.readline()
if line == '':
if line == "":
break
new_match = match_offset_comment(line)
if new_match is not None:
# We will allow multiple offsets if we have just begun
# the code block, but not after we hit the curly brace.
if state in (ReaderState.WANT_OFFSET, ReaderState.IN_TEMPLATE,
ReaderState.WANT_SIG):
if state in (
ReaderState.WANT_OFFSET,
ReaderState.IN_TEMPLATE,
ReaderState.WANT_SIG,
):
# If we detected an offset marker unexpectedly,
# we are handling it here so we can continue seeking.
can_seek = True
@@ -116,11 +120,10 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
# same line. clang-format should prevent this (BraceWrapping)
# but it is easy to detect.
# If the entire function is on one line, handle that too.
if function_sig.endswith('{'):
if function_sig.endswith("{"):
start_line = line_no
state = ReaderState.IN_FUNC
elif (function_sig.endswith('}') or
function_sig.endswith('};')):
elif function_sig.endswith("}") or function_sig.endswith("};"):
start_line = line_no
end_line = line_no
state = ReaderState.FUNCTION_DONE
@@ -128,14 +131,14 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
state = ReaderState.WANT_CURLY
elif state == ReaderState.WANT_CURLY:
if line.strip() == '{':
if line.strip() == "{":
start_line = line_no
state = ReaderState.IN_FUNC
elif state == ReaderState.IN_FUNC:
# Naive but reasonable assumption that functions will end with
# a curly brace on its own line with no prepended spaces.
if line.startswith('}'):
if line.startswith("}"):
end_line = line_no
state = ReaderState.FUNCTION_DONE

View File

@@ -5,34 +5,49 @@ from typing import List
from collections import namedtuple
CodeBlock = namedtuple('CodeBlock',
['offset', 'signature', 'start_line', 'end_line',
'offset_comment', 'module', 'is_template', 'is_stub'])
CodeBlock = namedtuple(
"CodeBlock",
[
"offset",
"signature",
"start_line",
"end_line",
"offset_comment",
"module",
"is_template",
"is_stub",
],
)
OffsetMatch = namedtuple('OffsetMatch', ['module', 'address', 'is_template',
'is_stub', 'comment'])
OffsetMatch = namedtuple(
"OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"]
)
# This has not been formally established, but considering that "STUB"
# is a temporary state for a function, we assume it will appear last,
# after any other modifiers (i.e. TEMPLATE)
# To match a reasonable variance of formatting for the offset comment
offsetCommentRegex = re.compile(r'\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?', # nopep8
flags=re.I)
offsetCommentRegex = re.compile(
r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?", # nopep8
flags=re.I,
)
# To match the exact syntax (text upper case, hex lower case, with spaces)
# that is used in most places
offsetCommentExactRegex = re.compile(r'^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$') # nopep8
offsetCommentExactRegex = re.compile(
r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$"
) # nopep8
# The goal here is to just read whatever is on the next line, so some
# flexibility in the formatting seems OK
templateCommentRegex = re.compile(r'\s*//\s+(.*)')
templateCommentRegex = re.compile(r"\s*//\s+(.*)")
# To remove any comment (//) or block comment (/*) and its leading spaces
# from the end of a code line
trailingCommentRegex = re.compile(r'(\s*(?://|/\*).*)$')
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
def get_template_function_name(line: str) -> str:
@@ -47,23 +62,25 @@ def get_template_function_name(line: str) -> str:
def remove_trailing_comment(line: str) -> str:
return trailingCommentRegex.sub('', line)
return trailingCommentRegex.sub("", line)
def is_blank_or_comment(line: str) -> bool:
"""Helper to read ahead after the offset comment is matched.
There could be blank lines or other comments before the
function signature, and we want to skip those."""
There could be blank lines or other comments before the
function signature, and we want to skip those."""
line_strip = line.strip()
return (len(line_strip) == 0
or line_strip.startswith('//')
or line_strip.startswith('/*')
or line_strip.endswith('*/'))
return (
len(line_strip) == 0
or line_strip.startswith("//")
or line_strip.startswith("/*")
or line_strip.endswith("*/")
)
def is_exact_offset_comment(line: str) -> bool:
"""If the offset comment does not match our (unofficial) syntax
we may want to alert the user to fix it for style points."""
we may want to alert the user to fix it for style points."""
return offsetCommentExactRegex.match(line) is not None
@@ -72,17 +89,19 @@ def match_offset_comment(line: str) -> OffsetMatch | None:
if match is None:
return None
return OffsetMatch(module=match.group(1),
address=int(match.group(2), 16),
is_template=match.group(3) is not None,
is_stub=match.group(4) is not None,
comment=line.strip())
return OffsetMatch(
module=match.group(1),
address=int(match.group(2), 16),
is_template=match.group(3) is not None,
is_stub=match.group(4) is not None,
comment=line.strip(),
)
def distinct_by_module(offsets: List) -> List:
"""Given a list of offset markers, return a list with distinct
module names. If module names (case-insensitive) are repeated,
choose the offset that appears first."""
module names. If module names (case-insensitive) are repeated,
choose the offset that appears first."""
if len(offsets) < 2:
return offsets

View File

@@ -0,0 +1,138 @@
import os
import subprocess
from .utils import get_file_in_script_dir
class RecompiledInfo:
addr = None
size = None
name = None
start = None
# Declare a class that parses the output of cvdump for fast access later
class SymInfo:
funcs = {}
lines = {}
names = {}
def __init__(self, pdb, sym_recompfile, sym_logger, sym_wine_path_converter=None):
self.logger = sym_logger
call = [get_file_in_script_dir("cvdump.exe"), "-l", "-s"]
if sym_wine_path_converter:
# Run cvdump through wine and convert path to Windows-friendly wine path
call.insert(0, "wine")
call.append(sym_wine_path_converter.get_wine_path(pdb))
else:
call.append(pdb)
self.logger.info("Parsing %s ...", pdb)
self.logger.debug("Command = %s", call)
line_dump = subprocess.check_output(call).decode("utf-8").split("\r\n")
current_section = None
self.logger.debug("Parsing output of cvdump.exe ...")
for i, line in enumerate(line_dump):
if line.startswith("***"):
current_section = line[4:]
if current_section == "SYMBOLS" and "S_GPROC32" in line:
sym_addr = int(line[26:34], 16)
info = RecompiledInfo()
info.addr = (
sym_addr + sym_recompfile.imagebase + sym_recompfile.textvirt
)
use_dbg_offs = False
if use_dbg_offs:
debug_offs = line_dump[i + 2]
debug_start = int(debug_offs[22:30], 16)
debug_end = int(debug_offs[43:], 16)
info.start = debug_start
info.size = debug_end - debug_start
else:
info.start = 0
info.size = int(line[41:49], 16)
info.name = line[77:]
self.names[info.name] = info
self.funcs[sym_addr] = info
elif (
current_section == "LINES"
and line.startswith(" ")
and not line.startswith(" ")
):
sourcepath = line.split()[0]
if sym_wine_path_converter:
# Convert filename to Unix path for file compare
sourcepath = sym_wine_path_converter.get_unix_path(sourcepath)
if sourcepath not in self.lines:
self.lines[sourcepath] = {}
j = i + 2
while True:
ll = line_dump[j].split()
if len(ll) == 0:
break
k = 0
while k < len(ll):
linenum = int(ll[k + 0])
address = int(ll[k + 1], 16)
if linenum not in self.lines[sourcepath]:
self.lines[sourcepath][linenum] = address
k += 2
j += 1
self.logger.debug("... Parsing output of cvdump.exe finished")
def get_recompiled_address(self, filename, line):
recompiled_addr = None
self.logger.debug("Looking for %s:%s", filename, line)
filename_basename = os.path.basename(filename).lower()
for fn in self.lines:
# Sometimes a PDB is compiled with a relative path while we always have
# an absolute path. Therefore we must
try:
if os.path.basename(
fn
).lower() == filename_basename and os.path.samefile(fn, filename):
filename = fn
break
except FileNotFoundError:
continue
if filename in self.lines and line in self.lines[filename]:
recompiled_addr = self.lines[filename][line]
if recompiled_addr in self.funcs:
return self.funcs[recompiled_addr]
self.logger.error(
"Failed to find function symbol with address: %x", recompiled_addr
)
return None
self.logger.error(
"Failed to find function symbol with filename and line: %s:%s",
filename,
line,
)
return None
def get_recompiled_address_from_name(self, name):
self.logger.debug("Looking for %s", name)
if name in self.names:
return self.names[name]
self.logger.error("Failed to find function symbol with name: %s", name)
return None

View File

@@ -0,0 +1,42 @@
import os
import sys
import colorama
def print_diff(udiff, plain):
has_diff = False
for line in udiff:
has_diff = True
color = ""
if line.startswith("++") or line.startswith("@@") or line.startswith("--"):
# Skip unneeded parts of the diff for the brief view
continue
# Work out color if we are printing color
if not plain:
if line.startswith("+"):
color = colorama.Fore.GREEN
elif line.startswith("-"):
color = colorama.Fore.RED
print(color + line)
# Reset color if we're printing in color
if not plain:
print(colorama.Style.RESET_ALL, end="")
return has_diff
def get_file_in_script_dir(fn):
return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn)
class OffsetPlaceholderGenerator:
def __init__(self):
self.counter = 0
self.replacements = {}
def get(self, replace_addr):
if replace_addr in self.replacements:
return self.replacements[replace_addr]
self.counter += 1
replacement = f"<OFFSET{self.counter}>"
self.replacements[replace_addr] = replacement
return replacement

View File

@@ -1,9 +1,9 @@
from setuptools import setup, find_packages
setup(
name='isledecomp',
version='0.1.0',
description='Python tools for the isledecomp project',
name="isledecomp",
version="0.1.0",
description="Python tools for the isledecomp project",
packages=find_packages(),
tests_require=['pytest'],
tests_require=["pytest"],
)

View File

@@ -1,17 +1,16 @@
import os
import pytest
from typing import List, TextIO
from isledecomp.parser import find_code_blocks
from isledecomp.parser.util import CodeBlock
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), 'samples')
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
def sample_file(filename: str) -> TextIO:
"""Wrapper for opening the samples from the directory that does not
depend on the cwd where we run the test"""
depend on the cwd where we run the test"""
full_path = os.path.join(SAMPLE_DIR, filename)
return open(full_path, 'r')
return open(full_path, "r", encoding="utf-8")
def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
@@ -25,7 +24,7 @@ def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
def test_sanity():
"""Read a very basic file"""
with sample_file('basic_file.cpp') as f:
with sample_file("basic_file.cpp") as f:
blocks = find_code_blocks(f)
assert len(blocks) == 3
@@ -39,7 +38,7 @@ def test_sanity():
def test_oneline():
"""(Assuming clang-format permits this) This sample has a function
on a single line. This will test the end-of-function detection"""
with sample_file('oneline_function.cpp') as f:
with sample_file("oneline_function.cpp") as f:
blocks = find_code_blocks(f)
assert len(blocks) == 2
@@ -49,7 +48,7 @@ def test_oneline():
def test_missing_offset():
"""What if the function doesn't have an offset comment?"""
with sample_file('missing_offset.cpp') as f:
with sample_file("missing_offset.cpp") as f:
blocks = find_code_blocks(f)
# TODO: For now, the function without the offset will just be ignored.
@@ -60,9 +59,9 @@ def test_missing_offset():
def test_jumbled_case():
"""The parser just reports what it sees. It is the responsibility of
the downstream tools to do something about a jumbled file.
Just verify that we are reading it correctly."""
with sample_file('out_of_order.cpp') as f:
the downstream tools to do something about a jumbled file.
Just verify that we are reading it correctly."""
with sample_file("out_of_order.cpp") as f:
blocks = find_code_blocks(f)
assert len(blocks) == 3
@@ -70,7 +69,7 @@ def test_jumbled_case():
def test_bad_file():
with sample_file('poorly_formatted.cpp') as f:
with sample_file("poorly_formatted.cpp") as f:
blocks = find_code_blocks(f)
assert len(blocks) == 3
@@ -78,7 +77,7 @@ def test_bad_file():
def test_indented():
"""Offsets for functions inside of a class will probably be indented."""
with sample_file('basic_class.cpp') as f:
with sample_file("basic_class.cpp") as f:
blocks = find_code_blocks(f)
# TODO: We don't properly detect the end of these functions
@@ -87,17 +86,17 @@ def test_indented():
# all the functions that are there.
assert len(blocks) == 2
assert blocks[0].offset == int('0x12345678', 16)
assert blocks[0].offset == int("0x12345678", 16)
assert blocks[0].start_line == 15
# assert blocks[0].end_line == 18
assert blocks[1].offset == int('0xdeadbeef', 16)
assert blocks[1].offset == int("0xdeadbeef", 16)
assert blocks[1].start_line == 22
# assert blocks[1].end_line == 24
def test_inline():
with sample_file('inline.cpp') as f:
with sample_file("inline.cpp") as f:
blocks = find_code_blocks(f)
assert len(blocks) == 2
@@ -108,21 +107,21 @@ def test_inline():
def test_multiple_offsets():
"""If multiple offset marks appear before for a code block, take them
all but ensure module name (case-insensitive) is distinct.
Use first module occurrence in case of duplicates."""
with sample_file('multiple_offsets.cpp') as f:
all but ensure module name (case-insensitive) is distinct.
Use first module occurrence in case of duplicates."""
with sample_file("multiple_offsets.cpp") as f:
blocks = find_code_blocks(f)
assert len(blocks) == 4
assert blocks[0].module == 'TEST'
assert blocks[0].module == "TEST"
assert blocks[0].start_line == 9
assert blocks[1].module == 'HELLO'
assert blocks[1].module == "HELLO"
assert blocks[1].start_line == 9
# Duplicate modules are ignored
assert blocks[2].start_line == 16
assert blocks[2].offset == 0x2345
assert blocks[3].module == 'TEST'
assert blocks[3].module == "TEST"
assert blocks[3].offset == 0x2002

View File

@@ -1,6 +1,6 @@
import pytest
from collections import namedtuple
from typing import List
import pytest
from isledecomp.parser.util import (
is_blank_or_comment,
match_offset_comment,
@@ -10,21 +10,20 @@ from isledecomp.parser.util import (
blank_or_comment_param = [
(True, ''),
(True, '\t'),
(True, ' '),
(False, '\tint abc=123;'),
(True, '// OFFSET: LEGO1 0xdeadbeef'),
(True, ' /* Block comment beginning'),
(True, 'Block comment ending */ '),
(True, ""),
(True, "\t"),
(True, " "),
(False, "\tint abc=123;"),
(True, "// OFFSET: LEGO1 0xdeadbeef"),
(True, " /* Block comment beginning"),
(True, "Block comment ending */ "),
# TODO: does clang-format have anything to say about these cases?
(False, 'x++; // Comment folows'),
(False, 'x++; /* Block comment begins'),
(False, "x++; // Comment folows"),
(False, "x++; /* Block comment begins"),
]
@pytest.mark.parametrize('expected, line', blank_or_comment_param)
@pytest.mark.parametrize("expected, line", blank_or_comment_param)
def test_is_blank_or_comment(line: str, expected: bool):
assert is_blank_or_comment(line) is expected
@@ -32,82 +31,73 @@ def test_is_blank_or_comment(line: str, expected: bool):
offset_comment_samples = [
# (can_parse: bool, exact_match: bool, line: str)
# Should match both expected modules with optional STUB marker
(True, True, '// OFFSET: LEGO1 0xdeadbeef'),
(True, True, '// OFFSET: LEGO1 0xdeadbeef STUB'),
(True, True, '// OFFSET: ISLE 0x12345678'),
(True, True, '// OFFSET: ISLE 0x12345678 STUB'),
(True, True, "// OFFSET: LEGO1 0xdeadbeef"),
(True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"),
(True, True, "// OFFSET: ISLE 0x12345678"),
(True, True, "// OFFSET: ISLE 0x12345678 STUB"),
# No trailing spaces allowed
(True, False, '// OFFSET: LEGO1 0xdeadbeef '),
(True, False, '// OFFSET: LEGO1 0xdeadbeef STUB '),
(True, False, "// OFFSET: LEGO1 0xdeadbeef "),
(True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "),
# Must have exactly one space between elements
(True, False, '//OFFSET: ISLE 0xdeadbeef'),
(True, False, '// OFFSET:ISLE 0xdeadbeef'),
(True, False, '// OFFSET: ISLE 0xdeadbeef'),
(True, False, '// OFFSET: ISLE 0xdeadbeef'),
(True, False, '// OFFSET: ISLE 0xdeadbeef'),
(True, False, '// OFFSET: ISLE 0xdeadbeef STUB'),
(True, False, "//OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET:ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef STUB"),
# Must have 0x prefix for hex number
(True, False, '// OFFSET: ISLE deadbeef'),
(True, False, "// OFFSET: ISLE deadbeef"),
# Offset, module name, and STUB must be uppercase
(True, False, '// offset: ISLE 0xdeadbeef'),
(True, False, '// offset: isle 0xdeadbeef'),
(True, False, '// OFFSET: LEGO1 0xdeadbeef stub'),
(True, False, "// offset: ISLE 0xdeadbeef"),
(True, False, "// offset: isle 0xdeadbeef"),
(True, False, "// OFFSET: LEGO1 0xdeadbeef stub"),
# Hex string must be lowercase
(True, False, '// OFFSET: ISLE 0xDEADBEEF'),
(True, False, "// OFFSET: ISLE 0xDEADBEEF"),
# TODO: How flexible should we be with matching the module name?
(True, True, '// OFFSET: OMNI 0x12345678'),
(True, True, '// OFFSET: LEG01 0x12345678'),
(True, False, '// OFFSET: hello 0x12345678'),
(True, True, "// OFFSET: OMNI 0x12345678"),
(True, True, "// OFFSET: LEG01 0x12345678"),
(True, False, "// OFFSET: hello 0x12345678"),
# Not close enough to match
(False, False, '// OFFSET: ISLE0x12345678'),
(False, False, '// OFFSET: 0x12345678'),
(False, False, '// LEGO1: 0x12345678'),
(False, False, "// OFFSET: ISLE0x12345678"),
(False, False, "// OFFSET: 0x12345678"),
(False, False, "// LEGO1: 0x12345678"),
# Hex string shorter than 8 characters
(True, True, '// OFFSET: LEGO1 0x1234'),
(True, True, "// OFFSET: LEGO1 0x1234"),
# TODO: These match but shouldn't.
# (False, False, '// OFFSET: LEGO1 0'),
# (False, False, '// OFFSET: LEGO1 0x'),
]
@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
def test_offset_match(line: str, match: bool, exact):
@pytest.mark.parametrize("match, _, line", offset_comment_samples)
def test_offset_match(line: str, match: bool, _):
did_match = match_offset_comment(line) is not None
assert did_match is match
@pytest.mark.parametrize('match, exact, line', offset_comment_samples)
def test_exact_offset_comment(line: str, exact: bool, match):
@pytest.mark.parametrize("_, exact, line", offset_comment_samples)
def test_exact_offset_comment(line: str, exact: bool, _):
assert is_exact_offset_comment(line) is exact
# Helper for the next test: cut down version of OffsetMatch
MiniOfs = namedtuple('MiniOfs', ['module', 'value'])
MiniOfs = namedtuple("MiniOfs", ["module", "value"])
distinct_by_module_samples = [
# empty set
([], []),
# same module name
([MiniOfs('TEST', 123), MiniOfs('TEST', 555)],
[MiniOfs('TEST', 123)]),
([MiniOfs("TEST", 123), MiniOfs("TEST", 555)], [MiniOfs("TEST", 123)]),
# same module name, case-insensitive
([MiniOfs('test', 123), MiniOfs('TEST', 555)],
[MiniOfs('test', 123)]),
([MiniOfs("test", 123), MiniOfs("TEST", 555)], [MiniOfs("test", 123)]),
# duplicates, non-consecutive
([MiniOfs('test', 123), MiniOfs('abc', 111), MiniOfs('TEST', 555)],
[MiniOfs('test', 123), MiniOfs('abc', 111)]),
(
[MiniOfs("test", 123), MiniOfs("abc", 111), MiniOfs("TEST", 555)],
[MiniOfs("test", 123), MiniOfs("abc", 111)],
),
]
@pytest.mark.parametrize('sample, expected', distinct_by_module_samples)
@pytest.mark.parametrize("sample, expected", distinct_by_module_samples)
def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]):
assert distinct_by_module(sample) == expected