mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-27 18:34:06 +00:00
Python Linting and Code Formatting (#298)
* Create common print_diff function * Add pylint and black * Fix linting, move classes to utils * Add black/pylint to github actions * Fix linting * Move Bin and SymInfo into their own files * Split out format * Tidy up workdlows and pip, add readme * Lint tests, add tests to readme
This commit is contained in:
@@ -7,7 +7,6 @@ from .util import (
|
||||
OffsetMatch,
|
||||
is_blank_or_comment,
|
||||
match_offset_comment,
|
||||
is_exact_offset_comment,
|
||||
get_template_function_name,
|
||||
remove_trailing_comment,
|
||||
distinct_by_module,
|
||||
@@ -25,10 +24,10 @@ class ReaderState(Enum):
|
||||
|
||||
def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||
"""Read the IO stream (file) line-by-line and give the following report:
|
||||
Foreach code block (function) in the file, what are its starting and
|
||||
ending line numbers, and what is the given offset in the original
|
||||
binary. We expect the result to be ordered by line number because we
|
||||
are reading the file from start to finish."""
|
||||
Foreach code block (function) in the file, what are its starting and
|
||||
ending line numbers, and what is the given offset in the original
|
||||
binary. We expect the result to be ordered by line number because we
|
||||
are reading the file from start to finish."""
|
||||
|
||||
blocks: List[CodeBlock] = []
|
||||
|
||||
@@ -51,14 +50,16 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||
# Our list of offset marks could have duplicates on
|
||||
# module name, so we'll eliminate those now.
|
||||
for offset_match in distinct_by_module(offset_matches):
|
||||
block = CodeBlock(offset=offset_match.address,
|
||||
signature=function_sig,
|
||||
start_line=start_line,
|
||||
end_line=end_line,
|
||||
offset_comment=offset_match.comment,
|
||||
module=offset_match.module,
|
||||
is_template=offset_match.is_template,
|
||||
is_stub=offset_match.is_stub)
|
||||
block = CodeBlock(
|
||||
offset=offset_match.address,
|
||||
signature=function_sig,
|
||||
start_line=start_line,
|
||||
end_line=end_line,
|
||||
offset_comment=offset_match.comment,
|
||||
module=offset_match.module,
|
||||
is_template=offset_match.is_template,
|
||||
is_stub=offset_match.is_stub,
|
||||
)
|
||||
blocks.append(block)
|
||||
offset_matches = []
|
||||
state = ReaderState.WANT_OFFSET
|
||||
@@ -66,15 +67,18 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||
if can_seek:
|
||||
line_no += 1
|
||||
line = stream.readline()
|
||||
if line == '':
|
||||
if line == "":
|
||||
break
|
||||
|
||||
new_match = match_offset_comment(line)
|
||||
if new_match is not None:
|
||||
# We will allow multiple offsets if we have just begun
|
||||
# the code block, but not after we hit the curly brace.
|
||||
if state in (ReaderState.WANT_OFFSET, ReaderState.IN_TEMPLATE,
|
||||
ReaderState.WANT_SIG):
|
||||
if state in (
|
||||
ReaderState.WANT_OFFSET,
|
||||
ReaderState.IN_TEMPLATE,
|
||||
ReaderState.WANT_SIG,
|
||||
):
|
||||
# If we detected an offset marker unexpectedly,
|
||||
# we are handling it here so we can continue seeking.
|
||||
can_seek = True
|
||||
@@ -116,11 +120,10 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||
# same line. clang-format should prevent this (BraceWrapping)
|
||||
# but it is easy to detect.
|
||||
# If the entire function is on one line, handle that too.
|
||||
if function_sig.endswith('{'):
|
||||
if function_sig.endswith("{"):
|
||||
start_line = line_no
|
||||
state = ReaderState.IN_FUNC
|
||||
elif (function_sig.endswith('}') or
|
||||
function_sig.endswith('};')):
|
||||
elif function_sig.endswith("}") or function_sig.endswith("};"):
|
||||
start_line = line_no
|
||||
end_line = line_no
|
||||
state = ReaderState.FUNCTION_DONE
|
||||
@@ -128,14 +131,14 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||
state = ReaderState.WANT_CURLY
|
||||
|
||||
elif state == ReaderState.WANT_CURLY:
|
||||
if line.strip() == '{':
|
||||
if line.strip() == "{":
|
||||
start_line = line_no
|
||||
state = ReaderState.IN_FUNC
|
||||
|
||||
elif state == ReaderState.IN_FUNC:
|
||||
# Naive but reasonable assumption that functions will end with
|
||||
# a curly brace on its own line with no prepended spaces.
|
||||
if line.startswith('}'):
|
||||
if line.startswith("}"):
|
||||
end_line = line_no
|
||||
state = ReaderState.FUNCTION_DONE
|
||||
|
||||
|
||||
@@ -5,34 +5,49 @@ from typing import List
|
||||
from collections import namedtuple
|
||||
|
||||
|
||||
CodeBlock = namedtuple('CodeBlock',
|
||||
['offset', 'signature', 'start_line', 'end_line',
|
||||
'offset_comment', 'module', 'is_template', 'is_stub'])
|
||||
CodeBlock = namedtuple(
|
||||
"CodeBlock",
|
||||
[
|
||||
"offset",
|
||||
"signature",
|
||||
"start_line",
|
||||
"end_line",
|
||||
"offset_comment",
|
||||
"module",
|
||||
"is_template",
|
||||
"is_stub",
|
||||
],
|
||||
)
|
||||
|
||||
OffsetMatch = namedtuple('OffsetMatch', ['module', 'address', 'is_template',
|
||||
'is_stub', 'comment'])
|
||||
OffsetMatch = namedtuple(
|
||||
"OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"]
|
||||
)
|
||||
|
||||
# This has not been formally established, but considering that "STUB"
|
||||
# is a temporary state for a function, we assume it will appear last,
|
||||
# after any other modifiers (i.e. TEMPLATE)
|
||||
|
||||
# To match a reasonable variance of formatting for the offset comment
|
||||
offsetCommentRegex = re.compile(r'\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?', # nopep8
|
||||
flags=re.I)
|
||||
offsetCommentRegex = re.compile(
|
||||
r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?", # nopep8
|
||||
flags=re.I,
|
||||
)
|
||||
|
||||
# To match the exact syntax (text upper case, hex lower case, with spaces)
|
||||
# that is used in most places
|
||||
offsetCommentExactRegex = re.compile(r'^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$') # nopep8
|
||||
offsetCommentExactRegex = re.compile(
|
||||
r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$"
|
||||
) # nopep8
|
||||
|
||||
|
||||
# The goal here is to just read whatever is on the next line, so some
|
||||
# flexibility in the formatting seems OK
|
||||
templateCommentRegex = re.compile(r'\s*//\s+(.*)')
|
||||
templateCommentRegex = re.compile(r"\s*//\s+(.*)")
|
||||
|
||||
|
||||
# To remove any comment (//) or block comment (/*) and its leading spaces
|
||||
# from the end of a code line
|
||||
trailingCommentRegex = re.compile(r'(\s*(?://|/\*).*)$')
|
||||
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
||||
|
||||
|
||||
def get_template_function_name(line: str) -> str:
|
||||
@@ -47,23 +62,25 @@ def get_template_function_name(line: str) -> str:
|
||||
|
||||
|
||||
def remove_trailing_comment(line: str) -> str:
|
||||
return trailingCommentRegex.sub('', line)
|
||||
return trailingCommentRegex.sub("", line)
|
||||
|
||||
|
||||
def is_blank_or_comment(line: str) -> bool:
|
||||
"""Helper to read ahead after the offset comment is matched.
|
||||
There could be blank lines or other comments before the
|
||||
function signature, and we want to skip those."""
|
||||
There could be blank lines or other comments before the
|
||||
function signature, and we want to skip those."""
|
||||
line_strip = line.strip()
|
||||
return (len(line_strip) == 0
|
||||
or line_strip.startswith('//')
|
||||
or line_strip.startswith('/*')
|
||||
or line_strip.endswith('*/'))
|
||||
return (
|
||||
len(line_strip) == 0
|
||||
or line_strip.startswith("//")
|
||||
or line_strip.startswith("/*")
|
||||
or line_strip.endswith("*/")
|
||||
)
|
||||
|
||||
|
||||
def is_exact_offset_comment(line: str) -> bool:
|
||||
"""If the offset comment does not match our (unofficial) syntax
|
||||
we may want to alert the user to fix it for style points."""
|
||||
we may want to alert the user to fix it for style points."""
|
||||
return offsetCommentExactRegex.match(line) is not None
|
||||
|
||||
|
||||
@@ -72,17 +89,19 @@ def match_offset_comment(line: str) -> OffsetMatch | None:
|
||||
if match is None:
|
||||
return None
|
||||
|
||||
return OffsetMatch(module=match.group(1),
|
||||
address=int(match.group(2), 16),
|
||||
is_template=match.group(3) is not None,
|
||||
is_stub=match.group(4) is not None,
|
||||
comment=line.strip())
|
||||
return OffsetMatch(
|
||||
module=match.group(1),
|
||||
address=int(match.group(2), 16),
|
||||
is_template=match.group(3) is not None,
|
||||
is_stub=match.group(4) is not None,
|
||||
comment=line.strip(),
|
||||
)
|
||||
|
||||
|
||||
def distinct_by_module(offsets: List) -> List:
|
||||
"""Given a list of offset markers, return a list with distinct
|
||||
module names. If module names (case-insensitive) are repeated,
|
||||
choose the offset that appears first."""
|
||||
module names. If module names (case-insensitive) are repeated,
|
||||
choose the offset that appears first."""
|
||||
|
||||
if len(offsets) < 2:
|
||||
return offsets
|
||||
|
||||
Reference in New Issue
Block a user