(Discussion/Proposals) Consistency regarding annotations of header-implemented functions (#316)

* Open discussion

* Move annotations of header-implemented functions back to `.h` files

* Adjust `README.md`

* Relocate annotation

* linter

* Comment markers in headers only, rename script, update github actions

* type hint compat

* Rename github action, better argparse for linter

* Type hints, working test for byname ignore

* Move annotation

* CI rename and enable warnfail, enforce mode always on

* Two step linting

* or one step

* continue on error

* two jobs instead

* Fixes

---------

Co-authored-by: disinvite <disinvite@users.noreply.github.com>
This commit is contained in:
Christian Semmler
2023-12-12 14:27:17 -05:00
committed by GitHub
parent 4dd0d60dec
commit 3b155bfe38
73 changed files with 838 additions and 650 deletions

View File

@@ -12,7 +12,7 @@ We are continually working on extending the capabilities of our "decompilation l
All non-inlined functions in the code base with the exception of [3rd party code](/3rdparty) must be annotated with one of the following markers, which include the module name and address of the function as found in the original binaries. This information is then used to compare the recompiled assembly with the original assembly, resulting in an accuracy score. Functions in a given compilation unit must be ordered by their address in ascending order.
The annotations can be attached to the function implementation, which is the most common case, or use the "comment" syntax (see examples below) for functions that cannot be referred to directly (such as templated, synthetic or non-inlined inline functions). They should appear exclusively in `.cpp` files.
The annotations can be attached to the function implementation, which is the most common case, or use the "comment" syntax (see examples below) for functions that cannot be referred to directly (such as templated, synthetic or non-inlined inline functions). The latter should only ever appear in `.h` files.
### `FUNCTION`

View File

@@ -1,127 +0,0 @@
import os
import sys
import argparse
from isledecomp.dir import walk_source_dir, is_file_cpp
from isledecomp.parser import DecompParser
def sig_truncate(sig: str) -> str:
"""Helper to truncate function names to 50 chars and append ellipsis
if needed. Goal is to stay under 80 columns for tool output."""
return f"{sig[:47]}{'...' if len(sig) >= 50 else ''}"
def check_file(filename: str, verbose: bool = False) -> bool:
"""Open and read the given file, then check whether the code blocks
are in order. If verbose, print each block."""
parser = DecompParser()
with open(filename, "r", encoding="utf-8") as f:
parser.read_lines(f)
just_offsets = [block.offset for block in parser.functions]
sorted_offsets = sorted(just_offsets)
file_out_of_order = just_offsets != sorted_offsets
# TODO: When we add parser error severity, actual errors that obstruct
# parsing should probably be shown here regardless of verbose mode
# If we detect inexact comments, don't print anything unless we are
# in verbose mode. If the file is out of order, we always print the
# file name.
should_report = (len(parser.alerts) > 0 and verbose) or file_out_of_order
if not should_report and not file_out_of_order:
return False
# Else: we are alerting to some problem in this file
print(filename)
if verbose:
if file_out_of_order:
order_lookup = {k: i for i, k in enumerate(sorted_offsets)}
prev_offset = 0
for fun in parser.functions:
msg = " ".join(
[
" " if fun.offset > prev_offset else "!",
f"{fun.offset:08x}",
f"{fun.end_line - fun.line_number:4} lines",
f"{order_lookup[fun.offset]:3}",
" ",
sig_truncate(fun.name),
]
)
print(msg)
prev_offset = fun.offset
for alert in parser.alerts:
print(f"* line {alert.line_number:4} {alert.code} ({alert.line})")
print()
return file_out_of_order
def parse_args(test_args: list | None = None) -> dict:
p = argparse.ArgumentParser(
description="Checks the source files to make sure the function offset comments are in order",
)
p.add_argument("target", help="The file or directory to check.")
p.add_argument(
"--enforce",
action=argparse.BooleanOptionalAction,
default=False,
help="Fail with error code if target is out of order.",
)
p.add_argument(
"--verbose",
action=argparse.BooleanOptionalAction,
default=False,
help=(
"Display each code block in the file and show "
"where each consecutive run of blocks is broken."
),
)
if test_args is None:
args = p.parse_args()
else:
args = p.parse_args(test_args)
return vars(args)
def main():
args = parse_args()
if os.path.isdir(args["target"]):
files_to_check = list(walk_source_dir(args["target"]))
elif os.path.isfile(args["target"]) and is_file_cpp(args["target"]):
files_to_check = [args["target"]]
else:
sys.exit("Invalid target")
files_out_of_order = 0
for file in files_to_check:
is_jumbled = check_file(file, args["verbose"])
if is_jumbled:
files_out_of_order += 1
if files_out_of_order > 0:
error_message = " ".join(
[
str(files_out_of_order),
"files are" if files_out_of_order > 1 else "file is",
"out of order",
]
)
print(error_message)
if files_out_of_order > 0 and args["enforce"]:
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,99 @@
import os
import sys
import argparse
import colorama
from isledecomp.dir import walk_source_dir, is_file_cpp
from isledecomp.parser import DecompLinter
colorama.init()
def display_errors(alerts, filename):
sorted_alerts = sorted(alerts, key=lambda a: a.line_number)
for alert in sorted_alerts:
error_type = (
f"{colorama.Fore.RED}error: "
if alert.is_error()
else f"{colorama.Fore.YELLOW}warning: "
)
components = [
colorama.Fore.LIGHTWHITE_EX,
filename,
":",
str(alert.line_number),
" : ",
error_type,
colorama.Fore.LIGHTWHITE_EX,
alert.code.name.lower(),
]
print("".join(components))
if alert.line is not None:
print(f"{colorama.Fore.WHITE} {alert.line}")
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(
description="Syntax checking and linting for decomp annotation markers."
)
p.add_argument("target", help="The file or directory to check.")
p.add_argument(
"--module",
required=False,
type=str,
help="If present, run targeted checks for markers from the given module.",
)
p.add_argument(
"--warnfail",
action=argparse.BooleanOptionalAction,
default=False,
help="Fail if syntax warnings are found.",
)
(args, _) = p.parse_known_args()
return args
def process_files(files, module=None):
warning_count = 0
error_count = 0
linter = DecompLinter()
for filename in files:
success = linter.check_file(filename, module)
warnings = [a for a in linter.alerts if a.is_warning()]
errors = [a for a in linter.alerts if a.is_error()]
error_count += len(errors)
warning_count += len(warnings)
if not success:
display_errors(linter.alerts, filename)
print()
return (warning_count, error_count)
def main():
args = parse_args()
if os.path.isdir(args.target):
files_to_check = list(walk_source_dir(args.target))
elif os.path.isfile(args.target) and is_file_cpp(args.target):
files_to_check = [args.target]
else:
sys.exit("Invalid target")
(warning_count, error_count) = process_files(files_to_check, module=args.module)
print(colorama.Style.RESET_ALL, end="")
would_fail = error_count > 0 or (warning_count > 0 and args.warnfail)
if would_fail:
sys.exit(1)
if __name__ == "__main__":
main()

View File

@@ -1 +1,2 @@
from .parser import DecompParser
from .linter import DecompLinter

View File

@@ -1,6 +1,9 @@
from enum import Enum
from typing import Optional
from dataclasses import dataclass
# TODO: poorly chosen name, should be AlertType or AlertCode or something
class ParserError(Enum):
# WARN: Stub function exceeds some line number threshold
UNLIKELY_STUB = 100
@@ -29,6 +32,16 @@ class ParserError(Enum):
# and the start of the function. We can ignore it, but the line shouldn't be there
UNEXPECTED_BLANK_LINE = 107
# WARN: We called the finish() method for the parser but had not reached the starting
# state of SEARCH
UNEXPECTED_END_OF_FILE = 108
# WARN: We found a marker to be referenced by name outside of a header file.
BYNAME_FUNCTION_IN_CPP = 109
# This code or higher is an error, not a warning
DECOMP_ERROR_START = 200
# ERROR: We found a marker unexpectedly
UNEXPECTED_MARKER = 200
@@ -39,3 +52,20 @@ class ParserError(Enum):
# ERROR: The line following a synthetic marker was not a comment
BAD_SYNTHETIC = 202
# ERROR: This function offset comes before the previous offset from the same module
# This hopefully gives some hint about which functions need to be rearranged.
FUNCTION_OUT_OF_ORDER = 203
@dataclass
class ParserAlert:
code: ParserError
line_number: int
line: Optional[str] = None
def is_warning(self) -> bool:
return self.code.value < ParserError.DECOMP_ERROR_START.value
def is_error(self) -> bool:
return self.code.value >= ParserError.DECOMP_ERROR_START.value

View File

@@ -0,0 +1,99 @@
from typing import List, Optional
from .parser import DecompParser
from .error import ParserAlert, ParserError
def get_checkorder_filter(module):
"""Return a filter function on implemented functions in the given module"""
return lambda fun: fun.module == module and not fun.lookup_by_name
class DecompLinter:
def __init__(self) -> None:
self.alerts: List[ParserAlert] = []
self._parser = DecompParser()
self._filename: str = ""
self._module: Optional[str] = None
def reset(self):
self.alerts = []
self._parser.reset()
self._filename = ""
self._module = None
def file_is_header(self):
return self._filename.lower().endswith(".h")
def _check_function_order(self):
"""Rules:
1. Only markers that are implemented in the file are considered. This means we
only look at markers that are cross-referenced with cvdump output by their line
number. Markers with the lookup_by_name flag set are ignored because we cannot
directly influence their order.
2. Order should be considered for a single module only. If we have multiple
markers for a single function (i.e. for LEGO1 functions linked statically to
ISLE) then the virtual address space will be very different. If we don't check
for one module only, we would incorrectly report that the file is out of order.
"""
if self._module is None:
return
checkorder_filter = get_checkorder_filter(self._module)
last_offset = None
for fun in filter(checkorder_filter, self._parser.functions):
if last_offset is not None:
if fun.offset < last_offset:
self.alerts.append(
ParserAlert(
code=ParserError.FUNCTION_OUT_OF_ORDER,
line_number=fun.line_number,
)
)
last_offset = fun.offset
def _check_offset_uniqueness(self):
# TODO
pass
def _check_byname_allowed(self):
if self.file_is_header():
return
for fun in self._parser.functions:
if fun.lookup_by_name:
self.alerts.append(
ParserAlert(
code=ParserError.BYNAME_FUNCTION_IN_CPP,
line_number=fun.line_number,
)
)
def check_lines(self, lines, filename, module=None):
"""`lines` is a generic iterable to allow for testing with a list of strings.
We assume lines has the entire contents of the compilation unit."""
self.reset()
self._filename = filename
self._module = module
self._parser.read_lines(lines)
self._parser.finish()
self.alerts = self._parser.alerts[::]
if self._module is not None:
self._check_byname_allowed()
self._check_offset_uniqueness()
if not self.file_is_header():
self._check_function_order()
return len(self.alerts) == 0
def check_file(self, filename, module=None):
"""Convenience method for decomplint cli tool"""
with open(filename, "r", encoding="utf-8") as f:
return self.check_lines(f, filename, module)

View File

@@ -6,12 +6,6 @@ class ParserNode:
line_number: int
@dataclass
class ParserAlert(ParserNode):
code: int
line: str
@dataclass
class ParserSymbol(ParserNode):
module: str

View File

@@ -12,12 +12,11 @@ from .util import (
remove_trailing_comment,
)
from .node import (
ParserAlert,
ParserFunction,
ParserVariable,
ParserVtable,
)
from .error import ParserError
from .error import ParserAlert, ParserError
class ReaderState(Enum):
@@ -29,6 +28,7 @@ class ReaderState(Enum):
IN_GLOBAL = 5
IN_FUNC_GLOBAL = 6
IN_VTABLE = 7
DONE = 100
def marker_is_stub(marker: DecompMarker) -> bool:
@@ -56,7 +56,7 @@ def marker_is_vtable(marker: DecompMarker) -> bool:
class MarkerDict:
def __init__(self):
def __init__(self) -> None:
self.markers: dict = {}
def insert(self, marker: DecompMarker) -> bool:
@@ -80,7 +80,7 @@ class DecompParser:
# pylint: disable=too-many-instance-attributes
# Could combine output lists into a single list to get under the limit,
# but not right now
def __init__(self):
def __init__(self) -> None:
# The lists to be populated as we parse
self.functions: List[ParserFunction] = []
self.vtables: List[ParserVtable] = []
@@ -306,6 +306,9 @@ class DecompParser:
self._syntax_warning(ParserError.BOGUS_MARKER)
def read_line(self, line: str):
if self.state == ReaderState.DONE:
return
self.last_line = line # TODO: Useful or hack for error reporting?
self.line_number += 1
@@ -392,3 +395,9 @@ class DecompParser:
def read_lines(self, lines: Iterable):
for line in lines:
self.read_line(line)
def finish(self):
if self.state != ReaderState.SEARCH:
self._syntax_warning(ParserError.UNEXPECTED_END_OF_FILE)
self.state = ReaderState.DONE

View File

@@ -0,0 +1,85 @@
import pytest
from isledecomp.parser import DecompLinter
from isledecomp.parser.error import ParserError
@pytest.fixture(name="linter")
def fixture_linter():
return DecompLinter()
def test_simple_in_order(linter):
lines = [
"// FUNCTION: TEST 0x1000",
"void function1() {}",
"// FUNCTION: TEST 0x2000",
"void function2() {}",
"// FUNCTION: TEST 0x3000",
"void function3() {}",
]
assert linter.check_lines(lines, "test.cpp", "TEST") is True
def test_simple_not_in_order(linter):
lines = [
"// FUNCTION: TEST 0x1000",
"void function1() {}",
"// FUNCTION: TEST 0x3000",
"void function3() {}",
"// FUNCTION: TEST 0x2000",
"void function2() {}",
]
assert linter.check_lines(lines, "test.cpp", "TEST") is False
assert len(linter.alerts) == 1
assert linter.alerts[0].code == ParserError.FUNCTION_OUT_OF_ORDER
# N.B. Line number given is the start of the function, not the marker
assert linter.alerts[0].line_number == 6
def test_byname_ignored(linter):
"""Should ignore lookup-by-name markers when checking order."""
lines = [
"// FUNCTION: TEST 0x1000",
"void function1() {}",
"// FUNCTION: TEST 0x3000",
"// MyClass::MyMethod",
"// FUNCTION: TEST 0x2000",
"void function2() {}",
]
# This will fail because byname lookup does not belong in the cpp file
assert linter.check_lines(lines, "test.cpp", "TEST") is False
# but it should not fail for function order.
assert all(
alert.code != ParserError.FUNCTION_OUT_OF_ORDER for alert in linter.alerts
)
def test_module_isolation(linter):
"""Should check the order of markers from a single module only."""
lines = [
"// FUNCTION: ALPHA 0x0001",
"// FUNCTION: TEST 0x1000",
"void function1() {}",
"// FUNCTION: ALPHA 0x0002",
"// FUNCTION: TEST 0x2000",
"void function2() {}",
"// FUNCTION: ALPHA 0x0003",
"// FUNCTION: TEST 0x3000",
"void function3() {}",
]
assert linter.check_lines(lines, "test.cpp", "TEST") is True
assert linter.check_lines(lines, "test.cpp", "ALPHA") is True
def test_byname_headers_only(linter):
"""Markers that ar referenced by name with cvdump belong in header files only."""
lines = [
"// FUNCTION: TEST 0x1000",
"// MyClass::~MyClass",
]
assert linter.check_lines(lines, "test.h", "TEST") is True
assert linter.check_lines(lines, "test.cpp", "TEST") is False
assert linter.alerts[0].code == ParserError.BYNAME_FUNCTION_IN_CPP

View File

@@ -358,3 +358,20 @@ def test_function_is_commented(parser):
)
assert len(parser.functions) == 0
def test_unexpected_eof(parser):
"""If a decomp marker finds its way to the last line of the file,
report that we could not get anything from it."""
parser.read_lines(
[
"// FUNCTION: TEST 0x1234",
"// Cls::Method",
"// FUNCTION: TEST 0x5555",
]
)
parser.finish()
assert len(parser.functions) == 1
assert len(parser.alerts) == 1
assert parser.alerts[0].code == ParserError.UNEXPECTED_END_OF_FILE