Python Linting and Code Formatting (#298)

* Create common print_diff function

* Add pylint and black

* Fix linting, move classes to utils

* Add black/pylint to github actions

* Fix linting

* Move Bin and SymInfo into their own files

* Split out format

* Tidy up workdlows and pip, add readme

* Lint tests, add tests to readme
This commit is contained in:
Thomas Phillips
2023-11-26 07:27:42 +13:00
committed by GitHub
parent fb0d1ccb62
commit b14116cc93
22 changed files with 1675 additions and 789 deletions

View File

@@ -7,7 +7,6 @@ from .util import (
OffsetMatch,
is_blank_or_comment,
match_offset_comment,
is_exact_offset_comment,
get_template_function_name,
remove_trailing_comment,
distinct_by_module,
@@ -25,10 +24,10 @@ class ReaderState(Enum):
def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
"""Read the IO stream (file) line-by-line and give the following report:
Foreach code block (function) in the file, what are its starting and
ending line numbers, and what is the given offset in the original
binary. We expect the result to be ordered by line number because we
are reading the file from start to finish."""
Foreach code block (function) in the file, what are its starting and
ending line numbers, and what is the given offset in the original
binary. We expect the result to be ordered by line number because we
are reading the file from start to finish."""
blocks: List[CodeBlock] = []
@@ -51,14 +50,16 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
# Our list of offset marks could have duplicates on
# module name, so we'll eliminate those now.
for offset_match in distinct_by_module(offset_matches):
block = CodeBlock(offset=offset_match.address,
signature=function_sig,
start_line=start_line,
end_line=end_line,
offset_comment=offset_match.comment,
module=offset_match.module,
is_template=offset_match.is_template,
is_stub=offset_match.is_stub)
block = CodeBlock(
offset=offset_match.address,
signature=function_sig,
start_line=start_line,
end_line=end_line,
offset_comment=offset_match.comment,
module=offset_match.module,
is_template=offset_match.is_template,
is_stub=offset_match.is_stub,
)
blocks.append(block)
offset_matches = []
state = ReaderState.WANT_OFFSET
@@ -66,15 +67,18 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
if can_seek:
line_no += 1
line = stream.readline()
if line == '':
if line == "":
break
new_match = match_offset_comment(line)
if new_match is not None:
# We will allow multiple offsets if we have just begun
# the code block, but not after we hit the curly brace.
if state in (ReaderState.WANT_OFFSET, ReaderState.IN_TEMPLATE,
ReaderState.WANT_SIG):
if state in (
ReaderState.WANT_OFFSET,
ReaderState.IN_TEMPLATE,
ReaderState.WANT_SIG,
):
# If we detected an offset marker unexpectedly,
# we are handling it here so we can continue seeking.
can_seek = True
@@ -116,11 +120,10 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
# same line. clang-format should prevent this (BraceWrapping)
# but it is easy to detect.
# If the entire function is on one line, handle that too.
if function_sig.endswith('{'):
if function_sig.endswith("{"):
start_line = line_no
state = ReaderState.IN_FUNC
elif (function_sig.endswith('}') or
function_sig.endswith('};')):
elif function_sig.endswith("}") or function_sig.endswith("};"):
start_line = line_no
end_line = line_no
state = ReaderState.FUNCTION_DONE
@@ -128,14 +131,14 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
state = ReaderState.WANT_CURLY
elif state == ReaderState.WANT_CURLY:
if line.strip() == '{':
if line.strip() == "{":
start_line = line_no
state = ReaderState.IN_FUNC
elif state == ReaderState.IN_FUNC:
# Naive but reasonable assumption that functions will end with
# a curly brace on its own line with no prepended spaces.
if line.startswith('}'):
if line.startswith("}"):
end_line = line_no
state = ReaderState.FUNCTION_DONE

View File

@@ -5,34 +5,49 @@ from typing import List
from collections import namedtuple
CodeBlock = namedtuple('CodeBlock',
['offset', 'signature', 'start_line', 'end_line',
'offset_comment', 'module', 'is_template', 'is_stub'])
CodeBlock = namedtuple(
"CodeBlock",
[
"offset",
"signature",
"start_line",
"end_line",
"offset_comment",
"module",
"is_template",
"is_stub",
],
)
OffsetMatch = namedtuple('OffsetMatch', ['module', 'address', 'is_template',
'is_stub', 'comment'])
OffsetMatch = namedtuple(
"OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"]
)
# This has not been formally established, but considering that "STUB"
# is a temporary state for a function, we assume it will appear last,
# after any other modifiers (i.e. TEMPLATE)
# To match a reasonable variance of formatting for the offset comment
offsetCommentRegex = re.compile(r'\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?', # nopep8
flags=re.I)
offsetCommentRegex = re.compile(
r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?", # nopep8
flags=re.I,
)
# To match the exact syntax (text upper case, hex lower case, with spaces)
# that is used in most places
offsetCommentExactRegex = re.compile(r'^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$') # nopep8
offsetCommentExactRegex = re.compile(
r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$"
) # nopep8
# The goal here is to just read whatever is on the next line, so some
# flexibility in the formatting seems OK
templateCommentRegex = re.compile(r'\s*//\s+(.*)')
templateCommentRegex = re.compile(r"\s*//\s+(.*)")
# To remove any comment (//) or block comment (/*) and its leading spaces
# from the end of a code line
trailingCommentRegex = re.compile(r'(\s*(?://|/\*).*)$')
trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
def get_template_function_name(line: str) -> str:
@@ -47,23 +62,25 @@ def get_template_function_name(line: str) -> str:
def remove_trailing_comment(line: str) -> str:
return trailingCommentRegex.sub('', line)
return trailingCommentRegex.sub("", line)
def is_blank_or_comment(line: str) -> bool:
"""Helper to read ahead after the offset comment is matched.
There could be blank lines or other comments before the
function signature, and we want to skip those."""
There could be blank lines or other comments before the
function signature, and we want to skip those."""
line_strip = line.strip()
return (len(line_strip) == 0
or line_strip.startswith('//')
or line_strip.startswith('/*')
or line_strip.endswith('*/'))
return (
len(line_strip) == 0
or line_strip.startswith("//")
or line_strip.startswith("/*")
or line_strip.endswith("*/")
)
def is_exact_offset_comment(line: str) -> bool:
"""If the offset comment does not match our (unofficial) syntax
we may want to alert the user to fix it for style points."""
we may want to alert the user to fix it for style points."""
return offsetCommentExactRegex.match(line) is not None
@@ -72,17 +89,19 @@ def match_offset_comment(line: str) -> OffsetMatch | None:
if match is None:
return None
return OffsetMatch(module=match.group(1),
address=int(match.group(2), 16),
is_template=match.group(3) is not None,
is_stub=match.group(4) is not None,
comment=line.strip())
return OffsetMatch(
module=match.group(1),
address=int(match.group(2), 16),
is_template=match.group(3) is not None,
is_stub=match.group(4) is not None,
comment=line.strip(),
)
def distinct_by_module(offsets: List) -> List:
"""Given a list of offset markers, return a list with distinct
module names. If module names (case-insensitive) are repeated,
choose the offset that appears first."""
module names. If module names (case-insensitive) are repeated,
choose the offset that appears first."""
if len(offsets) < 2:
return offsets