mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-23 00:14:22 +00:00
(Discussion/Proposals) Consistency regarding annotations of header-implemented functions (#316)
* Open discussion * Move annotations of header-implemented functions back to `.h` files * Adjust `README.md` * Relocate annotation * linter * Comment markers in headers only, rename script, update github actions * type hint compat * Rename github action, better argparse for linter * Type hints, working test for byname ignore * Move annotation * CI rename and enable warnfail, enforce mode always on * Two step linting * or one step * continue on error * two jobs instead * Fixes --------- Co-authored-by: disinvite <disinvite@users.noreply.github.com>
This commit is contained in:

committed by
GitHub

parent
4dd0d60dec
commit
3b155bfe38
@@ -12,7 +12,7 @@ We are continually working on extending the capabilities of our "decompilation l
|
||||
|
||||
All non-inlined functions in the code base with the exception of [3rd party code](/3rdparty) must be annotated with one of the following markers, which include the module name and address of the function as found in the original binaries. This information is then used to compare the recompiled assembly with the original assembly, resulting in an accuracy score. Functions in a given compilation unit must be ordered by their address in ascending order.
|
||||
|
||||
The annotations can be attached to the function implementation, which is the most common case, or use the "comment" syntax (see examples below) for functions that cannot be referred to directly (such as templated, synthetic or non-inlined inline functions). They should appear exclusively in `.cpp` files.
|
||||
The annotations can be attached to the function implementation, which is the most common case, or use the "comment" syntax (see examples below) for functions that cannot be referred to directly (such as templated, synthetic or non-inlined inline functions). The latter should only ever appear in `.h` files.
|
||||
|
||||
### `FUNCTION`
|
||||
|
||||
|
@@ -1,127 +0,0 @@
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from isledecomp.dir import walk_source_dir, is_file_cpp
|
||||
from isledecomp.parser import DecompParser
|
||||
|
||||
|
||||
def sig_truncate(sig: str) -> str:
|
||||
"""Helper to truncate function names to 50 chars and append ellipsis
|
||||
if needed. Goal is to stay under 80 columns for tool output."""
|
||||
return f"{sig[:47]}{'...' if len(sig) >= 50 else ''}"
|
||||
|
||||
|
||||
def check_file(filename: str, verbose: bool = False) -> bool:
|
||||
"""Open and read the given file, then check whether the code blocks
|
||||
are in order. If verbose, print each block."""
|
||||
|
||||
parser = DecompParser()
|
||||
with open(filename, "r", encoding="utf-8") as f:
|
||||
parser.read_lines(f)
|
||||
|
||||
just_offsets = [block.offset for block in parser.functions]
|
||||
sorted_offsets = sorted(just_offsets)
|
||||
file_out_of_order = just_offsets != sorted_offsets
|
||||
|
||||
# TODO: When we add parser error severity, actual errors that obstruct
|
||||
# parsing should probably be shown here regardless of verbose mode
|
||||
|
||||
# If we detect inexact comments, don't print anything unless we are
|
||||
# in verbose mode. If the file is out of order, we always print the
|
||||
# file name.
|
||||
should_report = (len(parser.alerts) > 0 and verbose) or file_out_of_order
|
||||
|
||||
if not should_report and not file_out_of_order:
|
||||
return False
|
||||
|
||||
# Else: we are alerting to some problem in this file
|
||||
print(filename)
|
||||
if verbose:
|
||||
if file_out_of_order:
|
||||
order_lookup = {k: i for i, k in enumerate(sorted_offsets)}
|
||||
prev_offset = 0
|
||||
|
||||
for fun in parser.functions:
|
||||
msg = " ".join(
|
||||
[
|
||||
" " if fun.offset > prev_offset else "!",
|
||||
f"{fun.offset:08x}",
|
||||
f"{fun.end_line - fun.line_number:4} lines",
|
||||
f"{order_lookup[fun.offset]:3}",
|
||||
" ",
|
||||
sig_truncate(fun.name),
|
||||
]
|
||||
)
|
||||
print(msg)
|
||||
prev_offset = fun.offset
|
||||
|
||||
for alert in parser.alerts:
|
||||
print(f"* line {alert.line_number:4} {alert.code} ({alert.line})")
|
||||
|
||||
print()
|
||||
|
||||
return file_out_of_order
|
||||
|
||||
|
||||
def parse_args(test_args: list | None = None) -> dict:
|
||||
p = argparse.ArgumentParser(
|
||||
description="Checks the source files to make sure the function offset comments are in order",
|
||||
)
|
||||
p.add_argument("target", help="The file or directory to check.")
|
||||
p.add_argument(
|
||||
"--enforce",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=False,
|
||||
help="Fail with error code if target is out of order.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--verbose",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=False,
|
||||
help=(
|
||||
"Display each code block in the file and show "
|
||||
"where each consecutive run of blocks is broken."
|
||||
),
|
||||
)
|
||||
|
||||
if test_args is None:
|
||||
args = p.parse_args()
|
||||
else:
|
||||
args = p.parse_args(test_args)
|
||||
|
||||
return vars(args)
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
if os.path.isdir(args["target"]):
|
||||
files_to_check = list(walk_source_dir(args["target"]))
|
||||
elif os.path.isfile(args["target"]) and is_file_cpp(args["target"]):
|
||||
files_to_check = [args["target"]]
|
||||
else:
|
||||
sys.exit("Invalid target")
|
||||
|
||||
files_out_of_order = 0
|
||||
|
||||
for file in files_to_check:
|
||||
is_jumbled = check_file(file, args["verbose"])
|
||||
if is_jumbled:
|
||||
files_out_of_order += 1
|
||||
|
||||
if files_out_of_order > 0:
|
||||
error_message = " ".join(
|
||||
[
|
||||
str(files_out_of_order),
|
||||
"files are" if files_out_of_order > 1 else "file is",
|
||||
"out of order",
|
||||
]
|
||||
)
|
||||
print(error_message)
|
||||
|
||||
if files_out_of_order > 0 and args["enforce"]:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
99
tools/decomplint/decomplint.py
Normal file
99
tools/decomplint/decomplint.py
Normal file
@@ -0,0 +1,99 @@
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import colorama
|
||||
from isledecomp.dir import walk_source_dir, is_file_cpp
|
||||
from isledecomp.parser import DecompLinter
|
||||
|
||||
colorama.init()
|
||||
|
||||
|
||||
def display_errors(alerts, filename):
|
||||
sorted_alerts = sorted(alerts, key=lambda a: a.line_number)
|
||||
|
||||
for alert in sorted_alerts:
|
||||
error_type = (
|
||||
f"{colorama.Fore.RED}error: "
|
||||
if alert.is_error()
|
||||
else f"{colorama.Fore.YELLOW}warning: "
|
||||
)
|
||||
components = [
|
||||
colorama.Fore.LIGHTWHITE_EX,
|
||||
filename,
|
||||
":",
|
||||
str(alert.line_number),
|
||||
" : ",
|
||||
error_type,
|
||||
colorama.Fore.LIGHTWHITE_EX,
|
||||
alert.code.name.lower(),
|
||||
]
|
||||
print("".join(components))
|
||||
|
||||
if alert.line is not None:
|
||||
print(f"{colorama.Fore.WHITE} {alert.line}")
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
p = argparse.ArgumentParser(
|
||||
description="Syntax checking and linting for decomp annotation markers."
|
||||
)
|
||||
p.add_argument("target", help="The file or directory to check.")
|
||||
p.add_argument(
|
||||
"--module",
|
||||
required=False,
|
||||
type=str,
|
||||
help="If present, run targeted checks for markers from the given module.",
|
||||
)
|
||||
p.add_argument(
|
||||
"--warnfail",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=False,
|
||||
help="Fail if syntax warnings are found.",
|
||||
)
|
||||
|
||||
(args, _) = p.parse_known_args()
|
||||
return args
|
||||
|
||||
|
||||
def process_files(files, module=None):
|
||||
warning_count = 0
|
||||
error_count = 0
|
||||
|
||||
linter = DecompLinter()
|
||||
for filename in files:
|
||||
success = linter.check_file(filename, module)
|
||||
|
||||
warnings = [a for a in linter.alerts if a.is_warning()]
|
||||
errors = [a for a in linter.alerts if a.is_error()]
|
||||
|
||||
error_count += len(errors)
|
||||
warning_count += len(warnings)
|
||||
|
||||
if not success:
|
||||
display_errors(linter.alerts, filename)
|
||||
print()
|
||||
|
||||
return (warning_count, error_count)
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
if os.path.isdir(args.target):
|
||||
files_to_check = list(walk_source_dir(args.target))
|
||||
elif os.path.isfile(args.target) and is_file_cpp(args.target):
|
||||
files_to_check = [args.target]
|
||||
else:
|
||||
sys.exit("Invalid target")
|
||||
|
||||
(warning_count, error_count) = process_files(files_to_check, module=args.module)
|
||||
|
||||
print(colorama.Style.RESET_ALL, end="")
|
||||
|
||||
would_fail = error_count > 0 or (warning_count > 0 and args.warnfail)
|
||||
if would_fail:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -1 +1,2 @@
|
||||
from .parser import DecompParser
|
||||
from .linter import DecompLinter
|
||||
|
@@ -1,6 +1,9 @@
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
# TODO: poorly chosen name, should be AlertType or AlertCode or something
|
||||
class ParserError(Enum):
|
||||
# WARN: Stub function exceeds some line number threshold
|
||||
UNLIKELY_STUB = 100
|
||||
@@ -29,6 +32,16 @@ class ParserError(Enum):
|
||||
# and the start of the function. We can ignore it, but the line shouldn't be there
|
||||
UNEXPECTED_BLANK_LINE = 107
|
||||
|
||||
# WARN: We called the finish() method for the parser but had not reached the starting
|
||||
# state of SEARCH
|
||||
UNEXPECTED_END_OF_FILE = 108
|
||||
|
||||
# WARN: We found a marker to be referenced by name outside of a header file.
|
||||
BYNAME_FUNCTION_IN_CPP = 109
|
||||
|
||||
# This code or higher is an error, not a warning
|
||||
DECOMP_ERROR_START = 200
|
||||
|
||||
# ERROR: We found a marker unexpectedly
|
||||
UNEXPECTED_MARKER = 200
|
||||
|
||||
@@ -39,3 +52,20 @@ class ParserError(Enum):
|
||||
|
||||
# ERROR: The line following a synthetic marker was not a comment
|
||||
BAD_SYNTHETIC = 202
|
||||
|
||||
# ERROR: This function offset comes before the previous offset from the same module
|
||||
# This hopefully gives some hint about which functions need to be rearranged.
|
||||
FUNCTION_OUT_OF_ORDER = 203
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserAlert:
|
||||
code: ParserError
|
||||
line_number: int
|
||||
line: Optional[str] = None
|
||||
|
||||
def is_warning(self) -> bool:
|
||||
return self.code.value < ParserError.DECOMP_ERROR_START.value
|
||||
|
||||
def is_error(self) -> bool:
|
||||
return self.code.value >= ParserError.DECOMP_ERROR_START.value
|
||||
|
99
tools/isledecomp/isledecomp/parser/linter.py
Normal file
99
tools/isledecomp/isledecomp/parser/linter.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from typing import List, Optional
|
||||
from .parser import DecompParser
|
||||
from .error import ParserAlert, ParserError
|
||||
|
||||
|
||||
def get_checkorder_filter(module):
|
||||
"""Return a filter function on implemented functions in the given module"""
|
||||
return lambda fun: fun.module == module and not fun.lookup_by_name
|
||||
|
||||
|
||||
class DecompLinter:
|
||||
def __init__(self) -> None:
|
||||
self.alerts: List[ParserAlert] = []
|
||||
self._parser = DecompParser()
|
||||
self._filename: str = ""
|
||||
self._module: Optional[str] = None
|
||||
|
||||
def reset(self):
|
||||
self.alerts = []
|
||||
self._parser.reset()
|
||||
self._filename = ""
|
||||
self._module = None
|
||||
|
||||
def file_is_header(self):
|
||||
return self._filename.lower().endswith(".h")
|
||||
|
||||
def _check_function_order(self):
|
||||
"""Rules:
|
||||
1. Only markers that are implemented in the file are considered. This means we
|
||||
only look at markers that are cross-referenced with cvdump output by their line
|
||||
number. Markers with the lookup_by_name flag set are ignored because we cannot
|
||||
directly influence their order.
|
||||
|
||||
2. Order should be considered for a single module only. If we have multiple
|
||||
markers for a single function (i.e. for LEGO1 functions linked statically to
|
||||
ISLE) then the virtual address space will be very different. If we don't check
|
||||
for one module only, we would incorrectly report that the file is out of order.
|
||||
"""
|
||||
|
||||
if self._module is None:
|
||||
return
|
||||
|
||||
checkorder_filter = get_checkorder_filter(self._module)
|
||||
last_offset = None
|
||||
for fun in filter(checkorder_filter, self._parser.functions):
|
||||
if last_offset is not None:
|
||||
if fun.offset < last_offset:
|
||||
self.alerts.append(
|
||||
ParserAlert(
|
||||
code=ParserError.FUNCTION_OUT_OF_ORDER,
|
||||
line_number=fun.line_number,
|
||||
)
|
||||
)
|
||||
|
||||
last_offset = fun.offset
|
||||
|
||||
def _check_offset_uniqueness(self):
|
||||
# TODO
|
||||
pass
|
||||
|
||||
def _check_byname_allowed(self):
|
||||
if self.file_is_header():
|
||||
return
|
||||
|
||||
for fun in self._parser.functions:
|
||||
if fun.lookup_by_name:
|
||||
self.alerts.append(
|
||||
ParserAlert(
|
||||
code=ParserError.BYNAME_FUNCTION_IN_CPP,
|
||||
line_number=fun.line_number,
|
||||
)
|
||||
)
|
||||
|
||||
def check_lines(self, lines, filename, module=None):
|
||||
"""`lines` is a generic iterable to allow for testing with a list of strings.
|
||||
We assume lines has the entire contents of the compilation unit."""
|
||||
|
||||
self.reset()
|
||||
self._filename = filename
|
||||
self._module = module
|
||||
|
||||
self._parser.read_lines(lines)
|
||||
|
||||
self._parser.finish()
|
||||
self.alerts = self._parser.alerts[::]
|
||||
|
||||
if self._module is not None:
|
||||
self._check_byname_allowed()
|
||||
self._check_offset_uniqueness()
|
||||
|
||||
if not self.file_is_header():
|
||||
self._check_function_order()
|
||||
|
||||
return len(self.alerts) == 0
|
||||
|
||||
def check_file(self, filename, module=None):
|
||||
"""Convenience method for decomplint cli tool"""
|
||||
with open(filename, "r", encoding="utf-8") as f:
|
||||
return self.check_lines(f, filename, module)
|
@@ -6,12 +6,6 @@ class ParserNode:
|
||||
line_number: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserAlert(ParserNode):
|
||||
code: int
|
||||
line: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class ParserSymbol(ParserNode):
|
||||
module: str
|
||||
|
@@ -12,12 +12,11 @@ from .util import (
|
||||
remove_trailing_comment,
|
||||
)
|
||||
from .node import (
|
||||
ParserAlert,
|
||||
ParserFunction,
|
||||
ParserVariable,
|
||||
ParserVtable,
|
||||
)
|
||||
from .error import ParserError
|
||||
from .error import ParserAlert, ParserError
|
||||
|
||||
|
||||
class ReaderState(Enum):
|
||||
@@ -29,6 +28,7 @@ class ReaderState(Enum):
|
||||
IN_GLOBAL = 5
|
||||
IN_FUNC_GLOBAL = 6
|
||||
IN_VTABLE = 7
|
||||
DONE = 100
|
||||
|
||||
|
||||
def marker_is_stub(marker: DecompMarker) -> bool:
|
||||
@@ -56,7 +56,7 @@ def marker_is_vtable(marker: DecompMarker) -> bool:
|
||||
|
||||
|
||||
class MarkerDict:
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
self.markers: dict = {}
|
||||
|
||||
def insert(self, marker: DecompMarker) -> bool:
|
||||
@@ -80,7 +80,7 @@ class DecompParser:
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
# Could combine output lists into a single list to get under the limit,
|
||||
# but not right now
|
||||
def __init__(self):
|
||||
def __init__(self) -> None:
|
||||
# The lists to be populated as we parse
|
||||
self.functions: List[ParserFunction] = []
|
||||
self.vtables: List[ParserVtable] = []
|
||||
@@ -306,6 +306,9 @@ class DecompParser:
|
||||
self._syntax_warning(ParserError.BOGUS_MARKER)
|
||||
|
||||
def read_line(self, line: str):
|
||||
if self.state == ReaderState.DONE:
|
||||
return
|
||||
|
||||
self.last_line = line # TODO: Useful or hack for error reporting?
|
||||
self.line_number += 1
|
||||
|
||||
@@ -392,3 +395,9 @@ class DecompParser:
|
||||
def read_lines(self, lines: Iterable):
|
||||
for line in lines:
|
||||
self.read_line(line)
|
||||
|
||||
def finish(self):
|
||||
if self.state != ReaderState.SEARCH:
|
||||
self._syntax_warning(ParserError.UNEXPECTED_END_OF_FILE)
|
||||
|
||||
self.state = ReaderState.DONE
|
||||
|
85
tools/isledecomp/tests/test_linter.py
Normal file
85
tools/isledecomp/tests/test_linter.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import pytest
|
||||
from isledecomp.parser import DecompLinter
|
||||
from isledecomp.parser.error import ParserError
|
||||
|
||||
|
||||
@pytest.fixture(name="linter")
|
||||
def fixture_linter():
|
||||
return DecompLinter()
|
||||
|
||||
|
||||
def test_simple_in_order(linter):
|
||||
lines = [
|
||||
"// FUNCTION: TEST 0x1000",
|
||||
"void function1() {}",
|
||||
"// FUNCTION: TEST 0x2000",
|
||||
"void function2() {}",
|
||||
"// FUNCTION: TEST 0x3000",
|
||||
"void function3() {}",
|
||||
]
|
||||
assert linter.check_lines(lines, "test.cpp", "TEST") is True
|
||||
|
||||
|
||||
def test_simple_not_in_order(linter):
|
||||
lines = [
|
||||
"// FUNCTION: TEST 0x1000",
|
||||
"void function1() {}",
|
||||
"// FUNCTION: TEST 0x3000",
|
||||
"void function3() {}",
|
||||
"// FUNCTION: TEST 0x2000",
|
||||
"void function2() {}",
|
||||
]
|
||||
assert linter.check_lines(lines, "test.cpp", "TEST") is False
|
||||
assert len(linter.alerts) == 1
|
||||
|
||||
assert linter.alerts[0].code == ParserError.FUNCTION_OUT_OF_ORDER
|
||||
# N.B. Line number given is the start of the function, not the marker
|
||||
assert linter.alerts[0].line_number == 6
|
||||
|
||||
|
||||
def test_byname_ignored(linter):
|
||||
"""Should ignore lookup-by-name markers when checking order."""
|
||||
lines = [
|
||||
"// FUNCTION: TEST 0x1000",
|
||||
"void function1() {}",
|
||||
"// FUNCTION: TEST 0x3000",
|
||||
"// MyClass::MyMethod",
|
||||
"// FUNCTION: TEST 0x2000",
|
||||
"void function2() {}",
|
||||
]
|
||||
# This will fail because byname lookup does not belong in the cpp file
|
||||
assert linter.check_lines(lines, "test.cpp", "TEST") is False
|
||||
# but it should not fail for function order.
|
||||
assert all(
|
||||
alert.code != ParserError.FUNCTION_OUT_OF_ORDER for alert in linter.alerts
|
||||
)
|
||||
|
||||
|
||||
def test_module_isolation(linter):
|
||||
"""Should check the order of markers from a single module only."""
|
||||
lines = [
|
||||
"// FUNCTION: ALPHA 0x0001",
|
||||
"// FUNCTION: TEST 0x1000",
|
||||
"void function1() {}",
|
||||
"// FUNCTION: ALPHA 0x0002",
|
||||
"// FUNCTION: TEST 0x2000",
|
||||
"void function2() {}",
|
||||
"// FUNCTION: ALPHA 0x0003",
|
||||
"// FUNCTION: TEST 0x3000",
|
||||
"void function3() {}",
|
||||
]
|
||||
|
||||
assert linter.check_lines(lines, "test.cpp", "TEST") is True
|
||||
assert linter.check_lines(lines, "test.cpp", "ALPHA") is True
|
||||
|
||||
|
||||
def test_byname_headers_only(linter):
|
||||
"""Markers that ar referenced by name with cvdump belong in header files only."""
|
||||
lines = [
|
||||
"// FUNCTION: TEST 0x1000",
|
||||
"// MyClass::~MyClass",
|
||||
]
|
||||
|
||||
assert linter.check_lines(lines, "test.h", "TEST") is True
|
||||
assert linter.check_lines(lines, "test.cpp", "TEST") is False
|
||||
assert linter.alerts[0].code == ParserError.BYNAME_FUNCTION_IN_CPP
|
@@ -358,3 +358,20 @@ def test_function_is_commented(parser):
|
||||
)
|
||||
|
||||
assert len(parser.functions) == 0
|
||||
|
||||
|
||||
def test_unexpected_eof(parser):
|
||||
"""If a decomp marker finds its way to the last line of the file,
|
||||
report that we could not get anything from it."""
|
||||
parser.read_lines(
|
||||
[
|
||||
"// FUNCTION: TEST 0x1234",
|
||||
"// Cls::Method",
|
||||
"// FUNCTION: TEST 0x5555",
|
||||
]
|
||||
)
|
||||
parser.finish()
|
||||
|
||||
assert len(parser.functions) == 1
|
||||
assert len(parser.alerts) == 1
|
||||
assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE
|
||||
|
Reference in New Issue
Block a user