mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-23 16:34:06 +00:00
Merge from parser2 branch
This commit is contained in:
@@ -2,8 +2,7 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
from isledecomp.dir import walk_source_dir, is_file_cpp
|
from isledecomp.dir import walk_source_dir, is_file_cpp
|
||||||
from isledecomp.parser import find_code_blocks
|
from isledecomp.parser import DecompParser
|
||||||
from isledecomp.parser.util import is_exact_offset_comment
|
|
||||||
|
|
||||||
|
|
||||||
def sig_truncate(sig: str) -> str:
|
def sig_truncate(sig: str) -> str:
|
||||||
@@ -16,23 +15,22 @@ def check_file(filename: str, verbose: bool = False) -> bool:
|
|||||||
"""Open and read the given file, then check whether the code blocks
|
"""Open and read the given file, then check whether the code blocks
|
||||||
are in order. If verbose, print each block."""
|
are in order. If verbose, print each block."""
|
||||||
|
|
||||||
|
parser = DecompParser()
|
||||||
with open(filename, "r", encoding="utf-8") as f:
|
with open(filename, "r", encoding="utf-8") as f:
|
||||||
code_blocks = find_code_blocks(f)
|
parser.read_lines(f)
|
||||||
|
|
||||||
bad_comments = [
|
|
||||||
(block.start_line, block.offset_comment)
|
|
||||||
for block in code_blocks
|
|
||||||
if not is_exact_offset_comment(block.offset_comment)
|
|
||||||
]
|
|
||||||
|
|
||||||
just_offsets = [block.offset for block in code_blocks]
|
just_offsets = [block.offset for block in parser.functions]
|
||||||
sorted_offsets = sorted(just_offsets)
|
sorted_offsets = sorted(just_offsets)
|
||||||
file_out_of_order = just_offsets != sorted_offsets
|
file_out_of_order = just_offsets != sorted_offsets
|
||||||
|
|
||||||
|
# TODO: When we add parser error severity, actual errors that obstruct
|
||||||
|
# parsing should probably be shown here regardless of verbose mode
|
||||||
|
|
||||||
# If we detect inexact comments, don't print anything unless we are
|
# If we detect inexact comments, don't print anything unless we are
|
||||||
# in verbose mode. If the file is out of order, we always print the
|
# in verbose mode. If the file is out of order, we always print the
|
||||||
# file name.
|
# file name.
|
||||||
should_report = (len(bad_comments) > 0 and verbose) or file_out_of_order
|
should_report = (len(parser.alerts) > 0 and verbose) or file_out_of_order
|
||||||
|
|
||||||
if not should_report and not file_out_of_order:
|
if not should_report and not file_out_of_order:
|
||||||
return False
|
return False
|
||||||
@@ -44,22 +42,22 @@ def check_file(filename: str, verbose: bool = False) -> bool:
|
|||||||
order_lookup = {k: i for i, k in enumerate(sorted_offsets)}
|
order_lookup = {k: i for i, k in enumerate(sorted_offsets)}
|
||||||
prev_offset = 0
|
prev_offset = 0
|
||||||
|
|
||||||
for block in code_blocks:
|
for fun in parser.functions:
|
||||||
msg = " ".join(
|
msg = " ".join(
|
||||||
[
|
[
|
||||||
" " if block.offset > prev_offset else "!",
|
" " if fun.offset > prev_offset else "!",
|
||||||
f"{block.offset:08x}",
|
f"{block.offset:08x}",
|
||||||
f"{block.end_line - block.start_line:4} lines",
|
f"{fun.end_line - fun.line_number:4} lines",
|
||||||
f"{order_lookup[block.offset]:3}",
|
f"{order_lookup[fun.offset]:3}",
|
||||||
" ",
|
" ",
|
||||||
sig_truncate(block.signature),
|
sig_truncate(fun.signature),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
print(msg)
|
print(msg)
|
||||||
prev_offset = block.offset
|
prev_offset = fun.offset
|
||||||
|
|
||||||
for line_no, line in bad_comments:
|
for alert in parser.alerts:
|
||||||
print(f"* line {line_no:3} bad offset comment ({line})")
|
print(f"* line {alert.line_number:4} {alert.code} ({alert.line})")
|
||||||
|
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
@@ -1 +1 @@
|
|||||||
from .parser import find_code_blocks
|
from .parser import DecompParser
|
||||||
|
33
tools/isledecomp/isledecomp/parser/error.py
Normal file
33
tools/isledecomp/isledecomp/parser/error.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class ParserError(Enum):
|
||||||
|
# WARN: Stub function exceeds some line number threshold
|
||||||
|
UNLIKELY_STUB = 100
|
||||||
|
|
||||||
|
# WARN: Decomp marker is close enough to be recognized, but does not follow syntax exactly
|
||||||
|
BAD_DECOMP_MARKER = 101
|
||||||
|
|
||||||
|
# WARN: Multiple markers in sequence do not have distinct modules
|
||||||
|
DUPLICATE_MODULE = 102
|
||||||
|
|
||||||
|
# WARN: Detected a dupcliate module/offset pair in the current file
|
||||||
|
DUPLICATE_OFFSET = 103
|
||||||
|
|
||||||
|
# WARN: We read a line that matches the decomp marker pattern, but we are not set up
|
||||||
|
# to handle it
|
||||||
|
BOGUS_MARKER = 104
|
||||||
|
|
||||||
|
# WARN: Under a synthetic marker we expected a comment but found a code line instead
|
||||||
|
SYNTHETIC_NOT_COMMENT = 110
|
||||||
|
|
||||||
|
# WARN: New function marker appeared while we were inside a function
|
||||||
|
MISSED_END_OF_FUNCTION = 117
|
||||||
|
|
||||||
|
# ERROR: We found a marker unexpectedly
|
||||||
|
UNEXPECTED_MARKER = 200
|
||||||
|
|
||||||
|
# ERROR: We found a marker where we expected to find one, but it is incompatible
|
||||||
|
# with the preceding markers.
|
||||||
|
# For example, a GLOBAL cannot follow FUNCTION/STUB
|
||||||
|
INCOMPATIBLE_MARKER = 201
|
41
tools/isledecomp/isledecomp/parser/node.py
Normal file
41
tools/isledecomp/isledecomp/parser/node.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ParserNode:
|
||||||
|
line_number: int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ParserAlert(ParserNode):
|
||||||
|
code: int
|
||||||
|
line: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ParserSymbol(ParserNode):
|
||||||
|
module: str
|
||||||
|
offset: int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ParserFunction(ParserSymbol):
|
||||||
|
name: str
|
||||||
|
is_stub: bool = False
|
||||||
|
is_synthetic: bool = False
|
||||||
|
is_template: bool = False
|
||||||
|
end_line: int = -1
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ParserVariable(ParserSymbol):
|
||||||
|
name: str
|
||||||
|
size: int = -1
|
||||||
|
is_static: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ParserVtable(ParserSymbol):
|
||||||
|
class_name: str
|
||||||
|
num_entries: int = -1
|
@@ -1,145 +1,346 @@
|
|||||||
# C++ file parser
|
# C++ file parser
|
||||||
|
|
||||||
from typing import List, TextIO
|
from typing import List, TextIO, Iterable
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from .util import (
|
from .util import (
|
||||||
CodeBlock,
|
DecompMarker,
|
||||||
OffsetMatch,
|
|
||||||
is_blank_or_comment,
|
is_blank_or_comment,
|
||||||
match_offset_comment,
|
match_marker,
|
||||||
|
is_marker_exact,
|
||||||
get_template_function_name,
|
get_template_function_name,
|
||||||
remove_trailing_comment,
|
remove_trailing_comment,
|
||||||
distinct_by_module,
|
|
||||||
)
|
)
|
||||||
|
from .node import (
|
||||||
|
ParserAlert,
|
||||||
|
ParserNode,
|
||||||
|
ParserFunction,
|
||||||
|
ParserVariable,
|
||||||
|
ParserVtable,
|
||||||
|
)
|
||||||
|
from .error import ParserError
|
||||||
|
|
||||||
|
|
||||||
class ReaderState(Enum):
|
class ReaderState(Enum):
|
||||||
WANT_OFFSET = 0
|
SEARCH = 0
|
||||||
WANT_SIG = 1
|
WANT_SIG = 1
|
||||||
IN_FUNC = 2
|
IN_FUNC = 2
|
||||||
IN_TEMPLATE = 3
|
IN_TEMPLATE = 3
|
||||||
WANT_CURLY = 4
|
WANT_CURLY = 4
|
||||||
FUNCTION_DONE = 5
|
IN_GLOBAL = 5
|
||||||
|
IN_FUNC_GLOBAL = 6
|
||||||
|
IN_VTABLE = 7
|
||||||
|
|
||||||
|
|
||||||
def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
def marker_is_stub(marker: DecompMarker) -> bool:
|
||||||
"""Read the IO stream (file) line-by-line and give the following report:
|
return marker.type.upper() == "STUB"
|
||||||
Foreach code block (function) in the file, what are its starting and
|
|
||||||
ending line numbers, and what is the given offset in the original
|
|
||||||
binary. We expect the result to be ordered by line number because we
|
|
||||||
are reading the file from start to finish."""
|
|
||||||
|
|
||||||
blocks: List[CodeBlock] = []
|
|
||||||
|
|
||||||
offset_matches: List[OffsetMatch] = []
|
def marker_is_variable(marker: DecompMarker) -> bool:
|
||||||
|
return marker.type.upper() == "GLOBAL"
|
||||||
|
|
||||||
function_sig = None
|
|
||||||
start_line = None
|
|
||||||
end_line = None
|
|
||||||
state = ReaderState.WANT_OFFSET
|
|
||||||
|
|
||||||
# 1-based to match cvdump and your text editor
|
def marker_is_synthetic(marker: DecompMarker) -> bool:
|
||||||
# I know it says 0, but we will increment before each readline()
|
return marker.type.upper() in ("SYNTHETIC", "TEMPLATE")
|
||||||
line_no = 0
|
|
||||||
can_seek = True
|
|
||||||
|
|
||||||
while True:
|
|
||||||
# Do this before reading again so that an EOF will not
|
def marker_is_function(marker: DecompMarker) -> bool:
|
||||||
# cause us to miss the last function of the file.
|
return marker.type.upper() in ("FUNCTION", "STUB")
|
||||||
if state == ReaderState.FUNCTION_DONE:
|
|
||||||
# Our list of offset marks could have duplicates on
|
|
||||||
# module name, so we'll eliminate those now.
|
def marker_is_vtable(marker: DecompMarker) -> bool:
|
||||||
for offset_match in distinct_by_module(offset_matches):
|
return marker.type.upper() == "VTABLE"
|
||||||
block = CodeBlock(
|
|
||||||
offset=offset_match.address,
|
|
||||||
signature=function_sig,
|
class MarkerDict:
|
||||||
start_line=start_line,
|
def __init__(self):
|
||||||
|
self.markers: dict = {}
|
||||||
|
|
||||||
|
def insert(self, marker: DecompMarker) -> bool:
|
||||||
|
module = marker.module.upper()
|
||||||
|
# Return True if this insert would overwrite
|
||||||
|
if module in self.markers:
|
||||||
|
return True
|
||||||
|
|
||||||
|
self.markers[module] = (marker.type, marker.offset)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def iter(self):
|
||||||
|
for module in self.markers:
|
||||||
|
(marker_type, offset) = self.markers[module]
|
||||||
|
yield DecompMarker(marker_type, module, offset)
|
||||||
|
|
||||||
|
def empty(self):
|
||||||
|
self.markers = {}
|
||||||
|
|
||||||
|
|
||||||
|
class DecompParser:
|
||||||
|
def __init__(self):
|
||||||
|
self.fun_markers = MarkerDict()
|
||||||
|
self.var_markers = MarkerDict()
|
||||||
|
self.tbl_markers = MarkerDict()
|
||||||
|
self.reset()
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
# Output values
|
||||||
|
self.functions = []
|
||||||
|
self.vtables = []
|
||||||
|
self.variables = []
|
||||||
|
self.alerts = []
|
||||||
|
|
||||||
|
# Internal state machine stuff
|
||||||
|
self.line_number: int = 0
|
||||||
|
self.state: ReaderState = ReaderState.SEARCH
|
||||||
|
|
||||||
|
self.last_line: str = ""
|
||||||
|
self.fun_markers.empty()
|
||||||
|
self.var_markers.empty()
|
||||||
|
self.tbl_markers.empty()
|
||||||
|
self.function_start: int = 0
|
||||||
|
self.function_sig: str = ""
|
||||||
|
|
||||||
|
def _recover(self):
|
||||||
|
"""We hit a syntax error and need to reset temp structures"""
|
||||||
|
self.state = ReaderState.SEARCH
|
||||||
|
self.fun_markers.empty()
|
||||||
|
self.var_markers.empty()
|
||||||
|
self.tbl_markers.empty()
|
||||||
|
|
||||||
|
def _syntax_warning(self, code):
|
||||||
|
self.alerts.append(
|
||||||
|
ParserAlert(
|
||||||
|
line_number=self.line_number,
|
||||||
|
code=code,
|
||||||
|
line=self.last_line.strip(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
def _syntax_error(self, code):
|
||||||
|
self._syntax_warning(code)
|
||||||
|
self._recover()
|
||||||
|
|
||||||
|
def _function_starts_here(self):
|
||||||
|
self.function_start = self.line_number
|
||||||
|
|
||||||
|
def _function_marker(self, marker: DecompMarker):
|
||||||
|
if self.fun_markers.insert(marker):
|
||||||
|
self._syntax_warning(ParserError.DUPLICATE_MODULE)
|
||||||
|
self.state = ReaderState.WANT_SIG
|
||||||
|
|
||||||
|
def _synthetic_marker(self, marker: DecompMarker):
|
||||||
|
if self.fun_markers.insert(marker):
|
||||||
|
self._syntax_warning(ParserError.DUPLICATE_MODULE)
|
||||||
|
self.state = ReaderState.IN_TEMPLATE
|
||||||
|
|
||||||
|
def _function_done(self, unexpected: bool = False):
|
||||||
|
end_line = self.line_number
|
||||||
|
if unexpected:
|
||||||
|
end_line -= -1
|
||||||
|
|
||||||
|
for marker in self.fun_markers.iter():
|
||||||
|
self.functions.append(
|
||||||
|
ParserFunction(
|
||||||
|
line_number=self.function_start,
|
||||||
|
module=marker.module,
|
||||||
|
offset=marker.offset,
|
||||||
|
is_stub=marker_is_stub(marker),
|
||||||
|
is_template=marker_is_synthetic(marker),
|
||||||
|
name=self.function_sig,
|
||||||
end_line=end_line,
|
end_line=end_line,
|
||||||
offset_comment=offset_match.comment,
|
|
||||||
module=offset_match.module,
|
|
||||||
is_template=offset_match.is_template,
|
|
||||||
is_stub=offset_match.is_stub,
|
|
||||||
)
|
)
|
||||||
blocks.append(block)
|
)
|
||||||
offset_matches = []
|
|
||||||
state = ReaderState.WANT_OFFSET
|
|
||||||
|
|
||||||
if can_seek:
|
self.fun_markers.empty()
|
||||||
line_no += 1
|
self.state = ReaderState.SEARCH
|
||||||
line = stream.readline()
|
|
||||||
if line == "":
|
|
||||||
break
|
|
||||||
|
|
||||||
new_match = match_offset_comment(line)
|
def _vtable_marker(self, marker: DecompMarker):
|
||||||
if new_match is not None:
|
if self.tbl_markers.insert(marker):
|
||||||
# We will allow multiple offsets if we have just begun
|
self._syntax_warning(ParserError.DUPLICATE_MODULE)
|
||||||
# the code block, but not after we hit the curly brace.
|
self.state = ReaderState.IN_VTABLE
|
||||||
if state in (
|
|
||||||
ReaderState.WANT_OFFSET,
|
def _vtable_done(self):
|
||||||
ReaderState.IN_TEMPLATE,
|
for marker in self.tbl_markers.iter():
|
||||||
|
self.vtables.append(
|
||||||
|
ParserVtable(
|
||||||
|
line_number=self.line_number,
|
||||||
|
module=marker.module,
|
||||||
|
offset=marker.offset,
|
||||||
|
class_name=self.last_line.strip(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.tbl_markers.empty()
|
||||||
|
self.state = ReaderState.SEARCH
|
||||||
|
|
||||||
|
def _variable_marker(self, marker: DecompMarker):
|
||||||
|
if self.var_markers.insert(marker):
|
||||||
|
self._syntax_warning(ParserError.DUPLICATE_MODULE)
|
||||||
|
|
||||||
|
if self.state in (ReaderState.IN_FUNC, ReaderState.IN_FUNC_GLOBAL):
|
||||||
|
self.state = ReaderState.IN_FUNC_GLOBAL
|
||||||
|
else:
|
||||||
|
self.state = ReaderState.IN_GLOBAL
|
||||||
|
|
||||||
|
def _variable_done(self):
|
||||||
|
for marker in self.var_markers.iter():
|
||||||
|
self.variables.append(
|
||||||
|
ParserVariable(
|
||||||
|
line_number=self.line_number,
|
||||||
|
module=marker.module,
|
||||||
|
offset=marker.offset,
|
||||||
|
name=self.last_line.strip(),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
self.var_markers.empty()
|
||||||
|
if self.state == ReaderState.IN_FUNC_GLOBAL:
|
||||||
|
self.state = ReaderState.IN_FUNC
|
||||||
|
else:
|
||||||
|
self.state = ReaderState.SEARCH
|
||||||
|
|
||||||
|
def _handle_marker(self, marker: DecompMarker):
|
||||||
|
# Cannot handle any markers between function sig and opening curly brace
|
||||||
|
if self.state == ReaderState.WANT_CURLY:
|
||||||
|
self._syntax_error(ParserError.UNEXPECTED_MARKER)
|
||||||
|
return
|
||||||
|
|
||||||
|
# TODO: How uncertain are we of detecting the end of a function
|
||||||
|
# in a clang-formatted file? For now we assume we have missed the
|
||||||
|
# end if we detect a non-GLOBAL marker while state is IN_FUNC.
|
||||||
|
# Maybe these cases should be syntax errors instead
|
||||||
|
|
||||||
|
if marker_is_function(marker):
|
||||||
|
if self.state in (
|
||||||
|
ReaderState.SEARCH,
|
||||||
ReaderState.WANT_SIG,
|
ReaderState.WANT_SIG,
|
||||||
):
|
):
|
||||||
# If we detected an offset marker unexpectedly,
|
# We will allow multiple offsets if we have just begun
|
||||||
# we are handling it here so we can continue seeking.
|
# the code block, but not after we hit the curly brace.
|
||||||
can_seek = True
|
self._function_marker(marker)
|
||||||
|
elif self.state == ReaderState.IN_FUNC:
|
||||||
offset_matches.append(new_match)
|
|
||||||
|
|
||||||
if new_match.is_template:
|
|
||||||
state = ReaderState.IN_TEMPLATE
|
|
||||||
else:
|
|
||||||
state = ReaderState.WANT_SIG
|
|
||||||
else:
|
|
||||||
# We hit another offset unexpectedly.
|
# We hit another offset unexpectedly.
|
||||||
# We can recover easily by just ending the function here.
|
# We can recover easily by just ending the function here.
|
||||||
end_line = line_no - 1
|
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
||||||
state = ReaderState.FUNCTION_DONE
|
self._function_done()
|
||||||
|
|
||||||
# Pause reading here so we handle the offset marker
|
# Start the next function right after so we can
|
||||||
# on the next loop iteration
|
# read the next line.
|
||||||
can_seek = False
|
self._function_marker(marker)
|
||||||
|
else:
|
||||||
|
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||||
|
|
||||||
elif state == ReaderState.IN_TEMPLATE:
|
elif marker_is_synthetic(marker):
|
||||||
|
if self.state in (ReaderState.SEARCH, ReaderState.IN_TEMPLATE):
|
||||||
|
self._synthetic_marker(marker)
|
||||||
|
elif self.state == ReaderState.IN_FUNC:
|
||||||
|
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
||||||
|
self._function_done()
|
||||||
|
self._synthetic_marker(marker)
|
||||||
|
else:
|
||||||
|
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||||
|
|
||||||
|
elif marker_is_variable(marker):
|
||||||
|
if self.state in (
|
||||||
|
ReaderState.SEARCH,
|
||||||
|
ReaderState.IN_GLOBAL,
|
||||||
|
ReaderState.IN_FUNC,
|
||||||
|
ReaderState.IN_FUNC_GLOBAL,
|
||||||
|
):
|
||||||
|
self._variable_marker(marker)
|
||||||
|
else:
|
||||||
|
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||||
|
|
||||||
|
elif marker_is_vtable(marker):
|
||||||
|
if self.state in (ReaderState.SEARCH, ReaderState.IN_VTABLE):
|
||||||
|
self._vtable_marker(marker)
|
||||||
|
elif self.state == ReaderState.IN_FUNC:
|
||||||
|
self._syntax_warning(ParserError.MISSED_END_OF_FUNCTION)
|
||||||
|
self._function_done()
|
||||||
|
self._vtable_marker(marker)
|
||||||
|
else:
|
||||||
|
self._syntax_error(ParserError.INCOMPATIBLE_MARKER)
|
||||||
|
|
||||||
|
else:
|
||||||
|
self._syntax_warning(ParserError.BOGUS_MARKER)
|
||||||
|
|
||||||
|
def read_line(self, line: str):
|
||||||
|
self.last_line = line # TODO: Useful or hack for error reporting?
|
||||||
|
self.line_number += 1
|
||||||
|
|
||||||
|
marker = match_marker(line)
|
||||||
|
if marker is not None:
|
||||||
|
# TODO: what's the best place for this?
|
||||||
|
# Does it belong with reading or marker handling?
|
||||||
|
if not is_marker_exact(self.last_line):
|
||||||
|
self._syntax_warning(ParserError.BAD_DECOMP_MARKER)
|
||||||
|
self._handle_marker(marker)
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.state == ReaderState.IN_TEMPLATE:
|
||||||
# TEMPLATE functions are a special case. The signature is
|
# TEMPLATE functions are a special case. The signature is
|
||||||
# given on the next line (in a // comment)
|
# given on the next line (in a // comment)
|
||||||
function_sig = get_template_function_name(line)
|
self.function_sig = get_template_function_name(line)
|
||||||
start_line = line_no
|
self._function_starts_here()
|
||||||
end_line = line_no
|
self._function_done()
|
||||||
state = ReaderState.FUNCTION_DONE
|
|
||||||
|
|
||||||
elif state == ReaderState.WANT_SIG:
|
elif self.state == ReaderState.WANT_SIG:
|
||||||
# Skip blank lines or comments that come after the offset
|
# Skip blank lines or comments that come after the offset
|
||||||
# marker. There is not a formal procedure for this, so just
|
# marker. There is not a formal procedure for this, so just
|
||||||
# assume the next "code line" is the function signature
|
# assume the next "code line" is the function signature
|
||||||
if not is_blank_or_comment(line):
|
if not is_blank_or_comment(line):
|
||||||
# Inline functions may end with a comment. Strip that out
|
# Inline functions may end with a comment. Strip that out
|
||||||
# to help parsing.
|
# to help parsing.
|
||||||
function_sig = remove_trailing_comment(line.strip())
|
self.function_sig = remove_trailing_comment(line.strip())
|
||||||
|
|
||||||
# Now check to see if the opening curly bracket is on the
|
# Now check to see if the opening curly bracket is on the
|
||||||
# same line. clang-format should prevent this (BraceWrapping)
|
# same line. clang-format should prevent this (BraceWrapping)
|
||||||
# but it is easy to detect.
|
# but it is easy to detect.
|
||||||
# If the entire function is on one line, handle that too.
|
# If the entire function is on one line, handle that too.
|
||||||
if function_sig.endswith("{"):
|
if self.function_sig.endswith("{"):
|
||||||
start_line = line_no
|
self._function_starts_here()
|
||||||
state = ReaderState.IN_FUNC
|
self.state = ReaderState.IN_FUNC
|
||||||
elif function_sig.endswith("}") or function_sig.endswith("};"):
|
elif self.function_sig.endswith("}") or self.function_sig.endswith(
|
||||||
start_line = line_no
|
"};"
|
||||||
end_line = line_no
|
):
|
||||||
state = ReaderState.FUNCTION_DONE
|
self._function_starts_here()
|
||||||
|
self._function_done()
|
||||||
else:
|
else:
|
||||||
state = ReaderState.WANT_CURLY
|
self.state = ReaderState.WANT_CURLY
|
||||||
|
|
||||||
elif state == ReaderState.WANT_CURLY:
|
elif self.state == ReaderState.WANT_CURLY:
|
||||||
if line.strip() == "{":
|
if line.strip() == "{":
|
||||||
start_line = line_no
|
self._function_starts_here()
|
||||||
state = ReaderState.IN_FUNC
|
self.state = ReaderState.IN_FUNC
|
||||||
|
|
||||||
elif state == ReaderState.IN_FUNC:
|
elif self.state == ReaderState.IN_FUNC:
|
||||||
# Naive but reasonable assumption that functions will end with
|
# Naive but reasonable assumption that functions will end with
|
||||||
# a curly brace on its own line with no prepended spaces.
|
# a curly brace on its own line with no prepended spaces.
|
||||||
if line.startswith("}"):
|
if line.startswith("}"):
|
||||||
end_line = line_no
|
self._function_done()
|
||||||
state = ReaderState.FUNCTION_DONE
|
|
||||||
|
|
||||||
return blocks
|
elif self.state in (ReaderState.IN_GLOBAL, ReaderState.IN_FUNC_GLOBAL):
|
||||||
|
if not is_blank_or_comment(line):
|
||||||
|
self._variable_done()
|
||||||
|
|
||||||
|
elif self.state == ReaderState.IN_VTABLE:
|
||||||
|
if not is_blank_or_comment(line):
|
||||||
|
self._vtable_done()
|
||||||
|
|
||||||
|
def read_lines(self, lines: Iterable):
|
||||||
|
for line in lines:
|
||||||
|
self.read_line(line)
|
||||||
|
|
||||||
|
|
||||||
|
def find_code_blocks(stream: TextIO) -> List[ParserNode]:
|
||||||
|
"""Read the IO stream (file) line-by-line and give the following report:
|
||||||
|
Foreach code block (function) in the file, what are its starting and
|
||||||
|
ending line numbers, and what is the given offset in the original
|
||||||
|
binary. We expect the result to be ordered by line number because we
|
||||||
|
are reading the file from start to finish."""
|
||||||
|
|
||||||
|
# TODO: this will be replaced shortly. shim for now to avoid
|
||||||
|
# making more changes elsewhere
|
||||||
|
p = DecompParser()
|
||||||
|
for line in stream:
|
||||||
|
p.read_line(line)
|
||||||
|
|
||||||
|
return p.functions
|
||||||
|
@@ -4,41 +4,15 @@ import re
|
|||||||
from typing import List
|
from typing import List
|
||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
|
|
||||||
|
DecompMarker = namedtuple("DecompMarker", ["type", "module", "offset"])
|
||||||
|
|
||||||
CodeBlock = namedtuple(
|
|
||||||
"CodeBlock",
|
|
||||||
[
|
|
||||||
"offset",
|
|
||||||
"signature",
|
|
||||||
"start_line",
|
|
||||||
"end_line",
|
|
||||||
"offset_comment",
|
|
||||||
"module",
|
|
||||||
"is_template",
|
|
||||||
"is_stub",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
|
|
||||||
OffsetMatch = namedtuple(
|
markerRegex = re.compile(
|
||||||
"OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"]
|
r"\s*//\s*(\w+):\s*(\w+)\s+((?:0x)?[a-f0-9]+)",
|
||||||
)
|
|
||||||
|
|
||||||
# This has not been formally established, but considering that "STUB"
|
|
||||||
# is a temporary state for a function, we assume it will appear last,
|
|
||||||
# after any other modifiers (i.e. TEMPLATE)
|
|
||||||
|
|
||||||
# To match a reasonable variance of formatting for the offset comment
|
|
||||||
offsetCommentRegex = re.compile(
|
|
||||||
r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?", # nopep8
|
|
||||||
flags=re.I,
|
flags=re.I,
|
||||||
)
|
)
|
||||||
|
|
||||||
# To match the exact syntax (text upper case, hex lower case, with spaces)
|
markerExactRegex = re.compile(r"// ([A-Z]+): ([A-Z0-9]+) (0x[a-f0-9]+)$")
|
||||||
# that is used in most places
|
|
||||||
offsetCommentExactRegex = re.compile(
|
|
||||||
r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$"
|
|
||||||
) # nopep8
|
|
||||||
|
|
||||||
|
|
||||||
# The goal here is to just read whatever is on the next line, so some
|
# The goal here is to just read whatever is on the next line, so some
|
||||||
# flexibility in the formatting seems OK
|
# flexibility in the formatting seems OK
|
||||||
@@ -78,39 +52,15 @@ def is_blank_or_comment(line: str) -> bool:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def is_exact_offset_comment(line: str) -> bool:
|
def match_marker(line: str) -> DecompMarker | None:
|
||||||
"""If the offset comment does not match our (unofficial) syntax
|
match = markerRegex.match(line)
|
||||||
we may want to alert the user to fix it for style points."""
|
|
||||||
return offsetCommentExactRegex.match(line) is not None
|
|
||||||
|
|
||||||
|
|
||||||
def match_offset_comment(line: str) -> OffsetMatch | None:
|
|
||||||
match = offsetCommentRegex.match(line)
|
|
||||||
if match is None:
|
if match is None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return OffsetMatch(
|
return DecompMarker(
|
||||||
module=match.group(1),
|
type=match.group(1), module=match.group(2), offset=int(match.group(3), 16)
|
||||||
address=int(match.group(2), 16),
|
|
||||||
is_template=match.group(3) is not None,
|
|
||||||
is_stub=match.group(4) is not None,
|
|
||||||
comment=line.strip(),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def distinct_by_module(offsets: List) -> List:
|
def is_marker_exact(line: str) -> bool:
|
||||||
"""Given a list of offset markers, return a list with distinct
|
return markerExactRegex.match(line) is not None
|
||||||
module names. If module names (case-insensitive) are repeated,
|
|
||||||
choose the offset that appears first."""
|
|
||||||
|
|
||||||
if len(offsets) < 2:
|
|
||||||
return offsets
|
|
||||||
|
|
||||||
# Dict maintains insertion order in python >=3.7
|
|
||||||
offsets_dict = {}
|
|
||||||
for offset in offsets:
|
|
||||||
module_upper = offset.module.upper()
|
|
||||||
if module_upper not in offsets_dict:
|
|
||||||
offsets_dict[module_upper] = offset
|
|
||||||
|
|
||||||
return list(offsets_dict.values())
|
|
||||||
|
@@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
// A very simple class
|
// A very simple class
|
||||||
|
|
||||||
|
// VTABLE: TEST 0x1001002
|
||||||
class TestClass {
|
class TestClass {
|
||||||
public:
|
public:
|
||||||
TestClass();
|
TestClass();
|
||||||
@@ -10,14 +11,14 @@ public:
|
|||||||
|
|
||||||
virtual MxResult Tickle() override; // vtable+08
|
virtual MxResult Tickle() override; // vtable+08
|
||||||
|
|
||||||
// OFFSET: TEST 0x12345678
|
// FUNCTION: TEST 0x12345678
|
||||||
inline const char* ClassName() const // vtable+0c
|
inline const char* ClassName() const // vtable+0c
|
||||||
{
|
{
|
||||||
// 0xabcd1234
|
// 0xabcd1234
|
||||||
return "TestClass";
|
return "TestClass";
|
||||||
}
|
}
|
||||||
|
|
||||||
// OFFSET: TEST 0xdeadbeef
|
// FUNCTION: TEST 0xdeadbeef
|
||||||
inline MxBool IsA(const char* name) const override // vtable+10
|
inline MxBool IsA(const char* name) const override // vtable+10
|
||||||
{
|
{
|
||||||
return !strcmp(name, TestClass::ClassName());
|
return !strcmp(name, TestClass::ClassName());
|
||||||
|
@@ -3,19 +3,19 @@
|
|||||||
|
|
||||||
// A very simple well-formed code file
|
// A very simple well-formed code file
|
||||||
|
|
||||||
// OFFSET: TEST 0x1234
|
// FUNCTION: TEST 0x1234
|
||||||
void function01()
|
void function01()
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
// OFFSET: TEST 0x2345
|
// FUNCTION: TEST 0x2345
|
||||||
void function02()
|
void function02()
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
// OFFSET: TEST 0x3456
|
// FUNCTION: TEST 0x3456
|
||||||
void function03()
|
void function03()
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
|
14
tools/isledecomp/tests/samples/global_variables.cpp
Normal file
14
tools/isledecomp/tests/samples/global_variables.cpp
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
// Sample for python unit tests
|
||||||
|
// Not part of the decomp
|
||||||
|
|
||||||
|
// Global variables inside and outside of functions
|
||||||
|
|
||||||
|
// GLOBAL: TEST 0x1000
|
||||||
|
const char *g_message = "test";
|
||||||
|
|
||||||
|
// FUNCTION: TEST 0x1234
|
||||||
|
void function01()
|
||||||
|
{
|
||||||
|
// GLOBAL: TEST 0x5555
|
||||||
|
static int g_hello = 123;
|
||||||
|
}
|
@@ -1,8 +1,8 @@
|
|||||||
// Sample for python unit tests
|
// Sample for python unit tests
|
||||||
// Not part of the decomp
|
// Not part of the decomp
|
||||||
|
|
||||||
// OFFSET: TEST 0x10000001
|
// FUNCTION: TEST 0x10000001
|
||||||
inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there
|
inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there
|
||||||
|
|
||||||
// OFFSET: TEST 0x10000002
|
// FUNCTION: TEST 0x10000002
|
||||||
inline const char* OneLine() const { return "MxDSObject"; };
|
inline const char* OneLine() const { return "MxDSObject"; };
|
||||||
|
@@ -9,7 +9,7 @@ int no_offset_comment()
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// OFFSET: TEST 0xdeadbeef
|
// FUNCTION: TEST 0xdeadbeef
|
||||||
void regular_ole_function()
|
void regular_ole_function()
|
||||||
{
|
{
|
||||||
printf("hi there");
|
printf("hi there");
|
||||||
|
@@ -3,22 +3,22 @@
|
|||||||
|
|
||||||
// Handling multiple offset markers
|
// Handling multiple offset markers
|
||||||
|
|
||||||
// OFFSET: TEST 0x1234
|
// FUNCTION: TEST 0x1234
|
||||||
// OFFSET: HELLO 0x5555
|
// FUNCTION: HELLO 0x5555
|
||||||
void different_modules()
|
void different_modules()
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
// OFFSET: TEST 0x2345
|
// FUNCTION: TEST 0x2345
|
||||||
// OFFSET: TEST 0x1234
|
// FUNCTION: TEST 0x1234
|
||||||
void same_module()
|
void same_module()
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
// OFFSET: TEST 0x2002
|
// FUNCTION: TEST 0x2002
|
||||||
// OFFSET: test 0x1001
|
// FUNCTION: test 0x1001
|
||||||
void same_case_insensitive()
|
void same_case_insensitive()
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
|
@@ -1,10 +1,10 @@
|
|||||||
// Sample for python unit tests
|
// Sample for python unit tests
|
||||||
// Not part of the decomp
|
// Not part of the decomp
|
||||||
|
|
||||||
// OFFSET: TEST 0x1234
|
// FUNCTION: TEST 0x1234
|
||||||
void short_function() { static char* msg = "oneliner"; }
|
void short_function() { static char* msg = "oneliner"; }
|
||||||
|
|
||||||
// OFFSET: TEST 0x5555
|
// FUNCTION: TEST 0x5555
|
||||||
void function_after_one_liner()
|
void function_after_one_liner()
|
||||||
{
|
{
|
||||||
// This function comes after the previous that is on a single line.
|
// This function comes after the previous that is on a single line.
|
||||||
|
@@ -1,19 +1,19 @@
|
|||||||
// Sample for python unit tests
|
// Sample for python unit tests
|
||||||
// Not part of the decomp
|
// Not part of the decomp
|
||||||
|
|
||||||
// OFFSET: TEST 0x1001
|
// FUNCTION: TEST 0x1001
|
||||||
void function_order01()
|
void function_order01()
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
// OFFSET: TEST 0x1003
|
// FUNCTION: TEST 0x1003
|
||||||
void function_order03()
|
void function_order03()
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
// OFFSET: TEST 0x1002
|
// FUNCTION: TEST 0x1002
|
||||||
void function_order02()
|
void function_order02()
|
||||||
{
|
{
|
||||||
// TODO
|
// TODO
|
||||||
|
@@ -4,18 +4,18 @@
|
|||||||
// While it's reasonable to expect a well-formed file (and clang-format
|
// While it's reasonable to expect a well-formed file (and clang-format
|
||||||
// will make sure we get one), this will put the parser through its paces.
|
// will make sure we get one), this will put the parser through its paces.
|
||||||
|
|
||||||
// OFFSET: TEST 0x1234
|
// FUNCTION: TEST 0x1234
|
||||||
void curly_with_spaces()
|
void curly_with_spaces()
|
||||||
{
|
{
|
||||||
static char* msg = "hello";
|
static char* msg = "hello";
|
||||||
}
|
}
|
||||||
|
|
||||||
// OFFSET: TEST 0x5555
|
// FUNCTION: TEST 0x5555
|
||||||
void weird_closing_curly()
|
void weird_closing_curly()
|
||||||
{
|
{
|
||||||
int x = 123; }
|
int x = 123; }
|
||||||
|
|
||||||
// OFFSET: HELLO 0x5656
|
// FUNCTION: HELLO 0x5656
|
||||||
void bad_indenting() {
|
void bad_indenting() {
|
||||||
if (0)
|
if (0)
|
||||||
{
|
{
|
||||||
|
@@ -1,127 +1,170 @@
|
|||||||
import os
|
import pytest
|
||||||
from typing import List, TextIO
|
from isledecomp.parser.parser import (
|
||||||
from isledecomp.parser import find_code_blocks
|
ReaderState,
|
||||||
from isledecomp.parser.util import CodeBlock
|
DecompParser,
|
||||||
|
)
|
||||||
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
|
from isledecomp.parser.util import DecompMarker
|
||||||
|
from isledecomp.parser.error import ParserError
|
||||||
|
|
||||||
|
|
||||||
def sample_file(filename: str) -> TextIO:
|
@pytest.fixture
|
||||||
"""Wrapper for opening the samples from the directory that does not
|
def parser():
|
||||||
depend on the cwd where we run the test"""
|
return DecompParser()
|
||||||
full_path = os.path.join(SAMPLE_DIR, filename)
|
|
||||||
return open(full_path, "r", encoding="utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool:
|
@pytest.mark.skip(reason="todo")
|
||||||
"""Helper to make this more idiomatic"""
|
def test_missing_sig(parser):
|
||||||
just_offsets = [block.offset for block in blocks]
|
"""Bad syntax: function signature is missing"""
|
||||||
return just_offsets == sorted(just_offsets)
|
parser.read_lines(["// FUNCTION: TEST 0x1234", "{"])
|
||||||
|
assert parser.state == ReaderState.IN_FUNC
|
||||||
|
assert len(parser.alerts) == 1
|
||||||
|
parser.read_line("}")
|
||||||
|
assert len(parser.functions) == 1
|
||||||
|
assert parser.functions[0] != "{"
|
||||||
|
|
||||||
|
|
||||||
# Tests are below #
|
def test_not_exact_syntax(parser):
|
||||||
|
"""Alert to inexact syntax right here in the parser instead of kicking it downstream.
|
||||||
|
Doing this means we don't have to save the actual text."""
|
||||||
|
parser.read_line("// function: test 1234")
|
||||||
|
assert len(parser.alerts) == 1
|
||||||
|
assert parser.alerts[0].code == ParserError.BAD_DECOMP_MARKER
|
||||||
|
|
||||||
|
|
||||||
def test_sanity():
|
def test_invalid_marker(parser):
|
||||||
"""Read a very basic file"""
|
"""We matched a decomp marker, but it's not one we care about"""
|
||||||
with sample_file("basic_file.cpp") as f:
|
parser.read_line("// BANANA: TEST 0x1234")
|
||||||
blocks = find_code_blocks(f)
|
assert parser.state == ReaderState.SEARCH
|
||||||
|
|
||||||
assert len(blocks) == 3
|
assert len(parser.alerts) == 1
|
||||||
assert code_blocks_are_sorted(blocks) is True
|
assert parser.alerts[0].code == ParserError.BOGUS_MARKER
|
||||||
# n.b. The parser returns line numbers as 1-based
|
|
||||||
# Function starts when we see the opening curly brace
|
|
||||||
assert blocks[0].start_line == 8
|
|
||||||
assert blocks[0].end_line == 10
|
|
||||||
|
|
||||||
|
|
||||||
def test_oneline():
|
def test_unexpected_marker(parser):
|
||||||
"""(Assuming clang-format permits this) This sample has a function
|
parser.read_lines(
|
||||||
on a single line. This will test the end-of-function detection"""
|
[
|
||||||
with sample_file("oneline_function.cpp") as f:
|
"// FUNCTION: TEST 0x1234",
|
||||||
blocks = find_code_blocks(f)
|
"// GLOBAL: TEST 0x5000",
|
||||||
|
]
|
||||||
assert len(blocks) == 2
|
)
|
||||||
assert blocks[0].start_line == 5
|
assert parser.state == ReaderState.SEARCH
|
||||||
assert blocks[0].end_line == 5
|
assert len(parser.alerts) == 1
|
||||||
|
assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
|
||||||
|
|
||||||
|
|
||||||
def test_missing_offset():
|
def test_variable(parser):
|
||||||
"""What if the function doesn't have an offset comment?"""
|
parser.read_lines(
|
||||||
with sample_file("missing_offset.cpp") as f:
|
[
|
||||||
blocks = find_code_blocks(f)
|
"// GLOBAL: HELLO 0x1234",
|
||||||
|
"int g_value = 5;",
|
||||||
# TODO: For now, the function without the offset will just be ignored.
|
]
|
||||||
# Would be the same outcome if the comment was present but mangled and
|
)
|
||||||
# we failed to match it. We should detect these cases in the future.
|
assert len(parser.variables) == 1
|
||||||
assert len(blocks) == 1
|
|
||||||
|
|
||||||
|
|
||||||
def test_jumbled_case():
|
def test_synthetic_plus_marker(parser):
|
||||||
"""The parser just reports what it sees. It is the responsibility of
|
"""Should fail with error and not log the synthetic"""
|
||||||
the downstream tools to do something about a jumbled file.
|
parser.read_lines(
|
||||||
Just verify that we are reading it correctly."""
|
[
|
||||||
with sample_file("out_of_order.cpp") as f:
|
"// SYNTHETIC: HEY 0x555",
|
||||||
blocks = find_code_blocks(f)
|
"// FUNCTION: HOWDY 0x1234",
|
||||||
|
]
|
||||||
assert len(blocks) == 3
|
)
|
||||||
assert code_blocks_are_sorted(blocks) is False
|
assert len(parser.functions) == 0
|
||||||
|
assert len(parser.alerts) == 1
|
||||||
|
assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
|
||||||
|
|
||||||
|
|
||||||
def test_bad_file():
|
def test_different_markers_different_module(parser):
|
||||||
with sample_file("poorly_formatted.cpp") as f:
|
"""Does it make any sense for a function to be a stub in one module,
|
||||||
blocks = find_code_blocks(f)
|
but not in another? I don't know. But it's no problem for us."""
|
||||||
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// FUNCTION: HOWDY 0x1234",
|
||||||
|
"// STUB: SUP 0x5555",
|
||||||
|
"void interesting_function() {",
|
||||||
|
"}",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
assert len(blocks) == 3
|
assert len(parser.alerts) == 0
|
||||||
|
assert len(parser.functions) == 2
|
||||||
|
|
||||||
|
|
||||||
def test_indented():
|
def test_different_markers_same_module(parser):
|
||||||
"""Offsets for functions inside of a class will probably be indented."""
|
"""Now, if something is a regular function but then a stub,
|
||||||
with sample_file("basic_class.cpp") as f:
|
what do we say about that?"""
|
||||||
blocks = find_code_blocks(f)
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// FUNCTION: HOWDY 0x1234",
|
||||||
|
"// STUB: HOWDY 0x5555",
|
||||||
|
"void interesting_function() {",
|
||||||
|
"}",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
# TODO: We don't properly detect the end of these functions
|
# Use first marker declaration, don't replace
|
||||||
# because the closing brace is indented. However... knowing where each
|
assert len(parser.functions) == 1
|
||||||
# function ends is less important (for now) than capturing
|
assert parser.functions[0].is_stub is False
|
||||||
# all the functions that are there.
|
|
||||||
|
|
||||||
assert len(blocks) == 2
|
# Should alert to this
|
||||||
assert blocks[0].offset == int("0x12345678", 16)
|
assert len(parser.alerts) == 1
|
||||||
assert blocks[0].start_line == 15
|
assert parser.alerts[0].code == ParserError.DUPLICATE_MODULE
|
||||||
# assert blocks[0].end_line == 18
|
|
||||||
|
|
||||||
assert blocks[1].offset == int("0xdeadbeef", 16)
|
|
||||||
assert blocks[1].start_line == 22
|
|
||||||
# assert blocks[1].end_line == 24
|
|
||||||
|
|
||||||
|
|
||||||
def test_inline():
|
def test_unexpected_synthetic(parser):
|
||||||
with sample_file("inline.cpp") as f:
|
"""FUNCTION then SYNTHETIC should fail to report either one"""
|
||||||
blocks = find_code_blocks(f)
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// FUNCTION: HOWDY 0x1234",
|
||||||
|
"// SYNTHETIC: HOWDY 0x5555",
|
||||||
|
"void interesting_function() {",
|
||||||
|
"}",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
assert len(blocks) == 2
|
assert parser.state == ReaderState.SEARCH
|
||||||
for block in blocks:
|
assert len(parser.functions) == 0
|
||||||
assert block.start_line is not None
|
assert len(parser.alerts) == 1
|
||||||
assert block.start_line == block.end_line
|
assert parser.alerts[0].code == ParserError.INCOMPATIBLE_MARKER
|
||||||
|
|
||||||
|
|
||||||
def test_multiple_offsets():
|
@pytest.mark.skip(reason="not implemented yet")
|
||||||
"""If multiple offset marks appear before for a code block, take them
|
def test_duplicate_offset(parser):
|
||||||
all but ensure module name (case-insensitive) is distinct.
|
"""Repeating the same module/offset in the same file is probably a typo"""
|
||||||
Use first module occurrence in case of duplicates."""
|
parser.read_lines(
|
||||||
with sample_file("multiple_offsets.cpp") as f:
|
[
|
||||||
blocks = find_code_blocks(f)
|
"// GLOBAL: HELLO 0x1234",
|
||||||
|
"int x = 1;",
|
||||||
|
"// GLOBAL: HELLO 0x1234",
|
||||||
|
"int y = 2;",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
assert len(blocks) == 4
|
assert len(parser.alerts) == 1
|
||||||
assert blocks[0].module == "TEST"
|
assert parser.alerts[0].code == ParserError.DUPLICATE_OFFSET
|
||||||
assert blocks[0].start_line == 9
|
|
||||||
|
|
||||||
assert blocks[1].module == "HELLO"
|
|
||||||
assert blocks[1].start_line == 9
|
|
||||||
|
|
||||||
# Duplicate modules are ignored
|
def test_multiple_variables(parser):
|
||||||
assert blocks[2].start_line == 16
|
"""Theoretically the same global variable can appear in multiple modules"""
|
||||||
assert blocks[2].offset == 0x2345
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// GLOBAL: HELLO 0x1234",
|
||||||
|
"// GLOBAL: WUZZUP 0x555",
|
||||||
|
"const char *g_greeting;",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert len(parser.alerts) == 0
|
||||||
|
assert len(parser.variables) == 2
|
||||||
|
|
||||||
assert blocks[3].module == "TEST"
|
|
||||||
assert blocks[3].offset == 0x2002
|
def test_multiple_vtables(parser):
|
||||||
|
parser.read_lines(
|
||||||
|
[
|
||||||
|
"// VTABLE: HELLO 0x1234",
|
||||||
|
"// VTABLE: TEST 0x5432",
|
||||||
|
"class MxString : public MxCore {",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
assert len(parser.alerts) == 0
|
||||||
|
assert len(parser.vtables) == 2
|
||||||
|
141
tools/isledecomp/tests/test_parser_samples.py
Normal file
141
tools/isledecomp/tests/test_parser_samples.py
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
from typing import List, TextIO
|
||||||
|
from isledecomp.parser import DecompParser
|
||||||
|
from isledecomp.parser.node import ParserSymbol
|
||||||
|
|
||||||
|
SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples")
|
||||||
|
|
||||||
|
|
||||||
|
def sample_file(filename: str) -> TextIO:
|
||||||
|
"""Wrapper for opening the samples from the directory that does not
|
||||||
|
depend on the cwd where we run the test"""
|
||||||
|
full_path = os.path.join(SAMPLE_DIR, filename)
|
||||||
|
return open(full_path, "r", encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def code_blocks_are_sorted(blocks: List[ParserSymbol]) -> bool:
|
||||||
|
"""Helper to make this more idiomatic"""
|
||||||
|
just_offsets = [block.offset for block in blocks]
|
||||||
|
return just_offsets == sorted(just_offsets)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def parser():
|
||||||
|
return DecompParser()
|
||||||
|
|
||||||
|
|
||||||
|
# Tests are below #
|
||||||
|
|
||||||
|
|
||||||
|
def test_sanity(parser):
|
||||||
|
"""Read a very basic file"""
|
||||||
|
with sample_file("basic_file.cpp") as f:
|
||||||
|
parser.read_lines(f)
|
||||||
|
|
||||||
|
assert len(parser.functions) == 3
|
||||||
|
assert code_blocks_are_sorted(parser.functions) is True
|
||||||
|
# n.b. The parser returns line numbers as 1-based
|
||||||
|
# Function starts when we see the opening curly brace
|
||||||
|
assert parser.functions[0].line_number == 8
|
||||||
|
assert parser.functions[0].end_line == 10
|
||||||
|
|
||||||
|
|
||||||
|
def test_oneline(parser):
|
||||||
|
"""(Assuming clang-format permits this) This sample has a function
|
||||||
|
on a single line. This will test the end-of-function detection"""
|
||||||
|
with sample_file("oneline_function.cpp") as f:
|
||||||
|
parser.read_lines(f)
|
||||||
|
|
||||||
|
assert len(parser.functions) == 2
|
||||||
|
assert parser.functions[0].line_number == 5
|
||||||
|
assert parser.functions[0].end_line == 5
|
||||||
|
|
||||||
|
|
||||||
|
def test_missing_offset(parser):
|
||||||
|
"""What if the function doesn't have an offset comment?"""
|
||||||
|
with sample_file("missing_offset.cpp") as f:
|
||||||
|
parser.read_lines(f)
|
||||||
|
|
||||||
|
# TODO: For now, the function without the offset will just be ignored.
|
||||||
|
# Would be the same outcome if the comment was present but mangled and
|
||||||
|
# we failed to match it. We should detect these cases in the future.
|
||||||
|
assert len(parser.functions) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_jumbled_case(parser):
|
||||||
|
"""The parser just reports what it sees. It is the responsibility of
|
||||||
|
the downstream tools to do something about a jumbled file.
|
||||||
|
Just verify that we are reading it correctly."""
|
||||||
|
with sample_file("out_of_order.cpp") as f:
|
||||||
|
parser.read_lines(f)
|
||||||
|
|
||||||
|
assert len(parser.functions) == 3
|
||||||
|
assert code_blocks_are_sorted(parser.functions) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_bad_file(parser):
|
||||||
|
with sample_file("poorly_formatted.cpp") as f:
|
||||||
|
parser.read_lines(f)
|
||||||
|
|
||||||
|
assert len(parser.functions) == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_indented(parser):
|
||||||
|
"""Offsets for functions inside of a class will probably be indented."""
|
||||||
|
with sample_file("basic_class.cpp") as f:
|
||||||
|
parser.read_lines(f)
|
||||||
|
|
||||||
|
# TODO: We don't properly detect the end of these functions
|
||||||
|
# because the closing brace is indented. However... knowing where each
|
||||||
|
# function ends is less important (for now) than capturing
|
||||||
|
# all the functions that are there.
|
||||||
|
|
||||||
|
assert len(parser.functions) == 2
|
||||||
|
assert parser.functions[0].offset == int("0x12345678", 16)
|
||||||
|
assert parser.functions[0].line_number == 16
|
||||||
|
# assert parser.functions[0].end_line == 19
|
||||||
|
|
||||||
|
assert parser.functions[1].offset == int("0xdeadbeef", 16)
|
||||||
|
assert parser.functions[1].line_number == 23
|
||||||
|
# assert parser.functions[1].end_line == 25
|
||||||
|
|
||||||
|
|
||||||
|
def test_inline(parser):
|
||||||
|
with sample_file("inline.cpp") as f:
|
||||||
|
parser.read_lines(f)
|
||||||
|
|
||||||
|
assert len(parser.functions) == 2
|
||||||
|
for fun in parser.functions:
|
||||||
|
assert fun.line_number is not None
|
||||||
|
assert fun.line_number == fun.end_line
|
||||||
|
|
||||||
|
|
||||||
|
def test_multiple_offsets(parser):
|
||||||
|
"""If multiple offset marks appear before for a code block, take them
|
||||||
|
all but ensure module name (case-insensitive) is distinct.
|
||||||
|
Use first module occurrence in case of duplicates."""
|
||||||
|
with sample_file("multiple_offsets.cpp") as f:
|
||||||
|
parser.read_lines(f)
|
||||||
|
|
||||||
|
assert len(parser.functions) == 4
|
||||||
|
assert parser.functions[0].module == "TEST"
|
||||||
|
assert parser.functions[0].line_number == 9
|
||||||
|
|
||||||
|
assert parser.functions[1].module == "HELLO"
|
||||||
|
assert parser.functions[1].line_number == 9
|
||||||
|
|
||||||
|
# Duplicate modules are ignored
|
||||||
|
assert parser.functions[2].line_number == 16
|
||||||
|
assert parser.functions[2].offset == 0x2345
|
||||||
|
|
||||||
|
assert parser.functions[3].module == "TEST"
|
||||||
|
assert parser.functions[3].offset == 0x2002
|
||||||
|
|
||||||
|
|
||||||
|
def test_variables(parser):
|
||||||
|
with sample_file("global_variables.cpp") as f:
|
||||||
|
parser.read_lines(f)
|
||||||
|
|
||||||
|
assert len(parser.functions) == 1
|
||||||
|
assert len(parser.variables) == 2
|
150
tools/isledecomp/tests/test_parser_statechange.py
Normal file
150
tools/isledecomp/tests/test_parser_statechange.py
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
import pytest
|
||||||
|
from isledecomp.parser.parser import (
|
||||||
|
ReaderState as _rs,
|
||||||
|
DecompParser,
|
||||||
|
)
|
||||||
|
from isledecomp.parser.util import DecompMarker
|
||||||
|
from isledecomp.parser.error import ParserError as _pe
|
||||||
|
|
||||||
|
# fmt: off
|
||||||
|
state_change_marker_cases = [
|
||||||
|
(_rs.SEARCH, "FUNCTION", _rs.WANT_SIG, None),
|
||||||
|
(_rs.SEARCH, "GLOBAL", _rs.IN_GLOBAL, None),
|
||||||
|
(_rs.SEARCH, "STUB", _rs.WANT_SIG, None),
|
||||||
|
(_rs.SEARCH, "SYNTHETIC", _rs.IN_TEMPLATE, None),
|
||||||
|
(_rs.SEARCH, "TEMPLATE", _rs.IN_TEMPLATE, None),
|
||||||
|
(_rs.SEARCH, "VTABLE", _rs.IN_VTABLE, None),
|
||||||
|
|
||||||
|
(_rs.WANT_SIG, "FUNCTION", _rs.WANT_SIG, None),
|
||||||
|
(_rs.WANT_SIG, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.WANT_SIG, "STUB", _rs.WANT_SIG, None),
|
||||||
|
(_rs.WANT_SIG, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.WANT_SIG, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.WANT_SIG, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
|
||||||
|
(_rs.IN_FUNC, "FUNCTION", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
|
||||||
|
(_rs.IN_FUNC, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
|
||||||
|
(_rs.IN_FUNC, "STUB", _rs.WANT_SIG, _pe.MISSED_END_OF_FUNCTION),
|
||||||
|
(_rs.IN_FUNC, "SYNTHETIC", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
|
||||||
|
(_rs.IN_FUNC, "TEMPLATE", _rs.IN_TEMPLATE, _pe.MISSED_END_OF_FUNCTION),
|
||||||
|
(_rs.IN_FUNC, "VTABLE", _rs.IN_VTABLE, _pe.MISSED_END_OF_FUNCTION),
|
||||||
|
|
||||||
|
(_rs.IN_TEMPLATE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_TEMPLATE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_TEMPLATE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_TEMPLATE, "SYNTHETIC", _rs.IN_TEMPLATE, None),
|
||||||
|
(_rs.IN_TEMPLATE, "TEMPLATE", _rs.IN_TEMPLATE, None),
|
||||||
|
(_rs.IN_TEMPLATE, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
|
||||||
|
(_rs.WANT_CURLY, "FUNCTION", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
|
(_rs.WANT_CURLY, "GLOBAL", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
|
(_rs.WANT_CURLY, "STUB", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
|
(_rs.WANT_CURLY, "SYNTHETIC", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
|
(_rs.WANT_CURLY, "TEMPLATE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
|
(_rs.WANT_CURLY, "VTABLE", _rs.SEARCH, _pe.UNEXPECTED_MARKER),
|
||||||
|
|
||||||
|
(_rs.IN_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_GLOBAL, "GLOBAL", _rs.IN_GLOBAL, None),
|
||||||
|
(_rs.IN_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
|
||||||
|
(_rs.IN_FUNC_GLOBAL, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_FUNC_GLOBAL, "GLOBAL", _rs.IN_FUNC_GLOBAL, None),
|
||||||
|
(_rs.IN_FUNC_GLOBAL, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_FUNC_GLOBAL, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_FUNC_GLOBAL, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_FUNC_GLOBAL, "VTABLE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
|
||||||
|
(_rs.IN_VTABLE, "FUNCTION", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_VTABLE, "GLOBAL", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_VTABLE, "STUB", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_VTABLE, "SYNTHETIC", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_VTABLE, "TEMPLATE", _rs.SEARCH, _pe.INCOMPATIBLE_MARKER),
|
||||||
|
(_rs.IN_VTABLE, "VTABLE", _rs.IN_VTABLE, None),
|
||||||
|
]
|
||||||
|
# fmt: on
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"state, marker_type, new_state, expected_error", state_change_marker_cases
|
||||||
|
)
|
||||||
|
def test_state_change_by_marker(
|
||||||
|
state: _rs, marker_type: str, new_state: _rs, expected_error: None | _pe
|
||||||
|
):
|
||||||
|
p = DecompParser()
|
||||||
|
p.state = state
|
||||||
|
p._handle_marker(DecompMarker(marker_type, "TEST", 0x1234))
|
||||||
|
assert p.state == new_state
|
||||||
|
|
||||||
|
if expected_error is not None:
|
||||||
|
assert len(p.alerts) > 0
|
||||||
|
assert p.alerts[0].code == expected_error
|
||||||
|
|
||||||
|
|
||||||
|
# Reading any of these lines should have no effect in ReaderState.SEARCH
|
||||||
|
search_lines_no_effect = [
|
||||||
|
"",
|
||||||
|
"\t",
|
||||||
|
" ",
|
||||||
|
"int x = 0;",
|
||||||
|
"// Comment",
|
||||||
|
"/*",
|
||||||
|
"*/",
|
||||||
|
"/* Block comment */",
|
||||||
|
"{",
|
||||||
|
"}",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("line", search_lines_no_effect)
|
||||||
|
def test_state_search_line(line: str):
|
||||||
|
p = DecompParser()
|
||||||
|
p.read_line(line)
|
||||||
|
assert p.state == _rs.SEARCH
|
||||||
|
assert len(p.alerts) == 0
|
||||||
|
|
||||||
|
|
||||||
|
global_lines = [
|
||||||
|
("// A comment", _rs.IN_GLOBAL),
|
||||||
|
("", _rs.IN_GLOBAL),
|
||||||
|
("\t", _rs.IN_GLOBAL),
|
||||||
|
(" ", _rs.IN_GLOBAL),
|
||||||
|
# TODO: no check for "likely" variable declaration so these all count
|
||||||
|
("void function()", _rs.SEARCH),
|
||||||
|
("int x = 123;", _rs.SEARCH),
|
||||||
|
("just some text", _rs.SEARCH),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("line, new_state", global_lines)
|
||||||
|
def test_state_global_line(line: str, new_state: _rs):
|
||||||
|
p = DecompParser()
|
||||||
|
p.read_line("// GLOBAL: TEST 0x1234")
|
||||||
|
assert p.state == _rs.IN_GLOBAL
|
||||||
|
p.read_line(line)
|
||||||
|
assert p.state == new_state
|
||||||
|
|
||||||
|
|
||||||
|
# mostly same as above
|
||||||
|
in_func_global_lines = [
|
||||||
|
("// A comment", _rs.IN_FUNC_GLOBAL),
|
||||||
|
("", _rs.IN_FUNC_GLOBAL),
|
||||||
|
("\t", _rs.IN_FUNC_GLOBAL),
|
||||||
|
(" ", _rs.IN_FUNC_GLOBAL),
|
||||||
|
# TODO: no check for "likely" variable declaration so these all count
|
||||||
|
("void function()", _rs.IN_FUNC),
|
||||||
|
("int x = 123;", _rs.IN_FUNC),
|
||||||
|
("just some text", _rs.IN_FUNC),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("line, new_state", in_func_global_lines)
|
||||||
|
def test_state_in_func_global_line(line: str, new_state: _rs):
|
||||||
|
p = DecompParser()
|
||||||
|
p.state = _rs.IN_FUNC
|
||||||
|
p.read_line("// GLOBAL: TEST 0x1234")
|
||||||
|
assert p.state == _rs.IN_FUNC_GLOBAL
|
||||||
|
p.read_line(line)
|
||||||
|
assert p.state == new_state
|
@@ -1,11 +1,12 @@
|
|||||||
from collections import namedtuple
|
from collections import namedtuple
|
||||||
from typing import List
|
from typing import List
|
||||||
import pytest
|
import pytest
|
||||||
|
from isledecomp.parser.parser import MarkerDict
|
||||||
from isledecomp.parser.util import (
|
from isledecomp.parser.util import (
|
||||||
|
DecompMarker,
|
||||||
is_blank_or_comment,
|
is_blank_or_comment,
|
||||||
match_offset_comment,
|
match_marker,
|
||||||
is_exact_offset_comment,
|
is_marker_exact,
|
||||||
distinct_by_module,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -28,76 +29,72 @@ def test_is_blank_or_comment(line: str, expected: bool):
|
|||||||
assert is_blank_or_comment(line) is expected
|
assert is_blank_or_comment(line) is expected
|
||||||
|
|
||||||
|
|
||||||
offset_comment_samples = [
|
marker_samples = [
|
||||||
# (can_parse: bool, exact_match: bool, line: str)
|
# (can_parse: bool, exact_match: bool, line: str)
|
||||||
# Should match both expected modules with optional STUB marker
|
(True, True, "// FUNCTION: LEGO1 0xdeadbeef"),
|
||||||
(True, True, "// OFFSET: LEGO1 0xdeadbeef"),
|
(True, True, "// FUNCTION: ISLE 0x12345678"),
|
||||||
(True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"),
|
|
||||||
(True, True, "// OFFSET: ISLE 0x12345678"),
|
|
||||||
(True, True, "// OFFSET: ISLE 0x12345678 STUB"),
|
|
||||||
# No trailing spaces allowed
|
# No trailing spaces allowed
|
||||||
(True, False, "// OFFSET: LEGO1 0xdeadbeef "),
|
(True, False, "// FUNCTION: LEGO1 0xdeadbeef "),
|
||||||
(True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "),
|
|
||||||
# Must have exactly one space between elements
|
# Must have exactly one space between elements
|
||||||
(True, False, "//OFFSET: ISLE 0xdeadbeef"),
|
(True, False, "//FUNCTION: ISLE 0xdeadbeef"),
|
||||||
(True, False, "// OFFSET:ISLE 0xdeadbeef"),
|
(True, False, "// FUNCTION:ISLE 0xdeadbeef"),
|
||||||
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
|
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
|
||||||
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
|
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
|
||||||
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
|
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
|
||||||
(True, False, "// OFFSET: ISLE 0xdeadbeef STUB"),
|
|
||||||
# Must have 0x prefix for hex number
|
# Must have 0x prefix for hex number
|
||||||
(True, False, "// OFFSET: ISLE deadbeef"),
|
(True, False, "// FUNCTION: ISLE deadbeef"),
|
||||||
# Offset, module name, and STUB must be uppercase
|
# Offset, module name, and STUB must be uppercase
|
||||||
(True, False, "// offset: ISLE 0xdeadbeef"),
|
(True, False, "// function: ISLE 0xdeadbeef"),
|
||||||
(True, False, "// offset: isle 0xdeadbeef"),
|
(True, False, "// function: isle 0xdeadbeef"),
|
||||||
(True, False, "// OFFSET: LEGO1 0xdeadbeef stub"),
|
|
||||||
# Hex string must be lowercase
|
# Hex string must be lowercase
|
||||||
(True, False, "// OFFSET: ISLE 0xDEADBEEF"),
|
(True, False, "// FUNCTION: ISLE 0xDEADBEEF"),
|
||||||
# TODO: How flexible should we be with matching the module name?
|
# TODO: How flexible should we be with matching the module name?
|
||||||
(True, True, "// OFFSET: OMNI 0x12345678"),
|
(True, True, "// FUNCTION: OMNI 0x12345678"),
|
||||||
(True, True, "// OFFSET: LEG01 0x12345678"),
|
(True, True, "// FUNCTION: LEG01 0x12345678"),
|
||||||
(True, False, "// OFFSET: hello 0x12345678"),
|
(True, False, "// FUNCTION: hello 0x12345678"),
|
||||||
# Not close enough to match
|
# Not close enough to match
|
||||||
(False, False, "// OFFSET: ISLE0x12345678"),
|
(False, False, "// FUNCTION: ISLE0x12345678"),
|
||||||
(False, False, "// OFFSET: 0x12345678"),
|
(False, False, "// FUNCTION: 0x12345678"),
|
||||||
(False, False, "// LEGO1: 0x12345678"),
|
(False, False, "// LEGO1: 0x12345678"),
|
||||||
# Hex string shorter than 8 characters
|
# Hex string shorter than 8 characters
|
||||||
(True, True, "// OFFSET: LEGO1 0x1234"),
|
(True, True, "// FUNCTION: LEGO1 0x1234"),
|
||||||
# TODO: These match but shouldn't.
|
# TODO: These match but shouldn't.
|
||||||
# (False, False, '// OFFSET: LEGO1 0'),
|
# (False, False, '// FUNCTION: LEGO1 0'),
|
||||||
# (False, False, '// OFFSET: LEGO1 0x'),
|
# (False, False, '// FUNCTION: LEGO1 0x'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("match, _, line", offset_comment_samples)
|
@pytest.mark.parametrize("match, _, line", marker_samples)
|
||||||
def test_offset_match(line: str, match: bool, _):
|
def test_marker_match(line: str, match: bool, _):
|
||||||
did_match = match_offset_comment(line) is not None
|
did_match = match_marker(line) is not None
|
||||||
assert did_match is match
|
assert did_match is match
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("_, exact, line", offset_comment_samples)
|
@pytest.mark.parametrize("_, exact, line", marker_samples)
|
||||||
def test_exact_offset_comment(line: str, exact: bool, _):
|
def test_marker_exact(line: str, exact: bool, _):
|
||||||
assert is_exact_offset_comment(line) is exact
|
assert is_marker_exact(line) is exact
|
||||||
|
|
||||||
|
|
||||||
# Helper for the next test: cut down version of OffsetMatch
|
def test_marker_dict_simple():
|
||||||
MiniOfs = namedtuple("MiniOfs", ["module", "value"])
|
d = MarkerDict()
|
||||||
|
d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
|
||||||
distinct_by_module_samples = [
|
markers = list(d.iter())
|
||||||
# empty set
|
assert len(markers) == 1
|
||||||
([], []),
|
|
||||||
# same module name
|
|
||||||
([MiniOfs("TEST", 123), MiniOfs("TEST", 555)], [MiniOfs("TEST", 123)]),
|
|
||||||
# same module name, case-insensitive
|
|
||||||
([MiniOfs("test", 123), MiniOfs("TEST", 555)], [MiniOfs("test", 123)]),
|
|
||||||
# duplicates, non-consecutive
|
|
||||||
(
|
|
||||||
[MiniOfs("test", 123), MiniOfs("abc", 111), MiniOfs("TEST", 555)],
|
|
||||||
[MiniOfs("test", 123), MiniOfs("abc", 111)],
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("sample, expected", distinct_by_module_samples)
|
def test_marker_dict_ofs_replace():
|
||||||
def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]):
|
d = MarkerDict()
|
||||||
assert distinct_by_module(sample) == expected
|
d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
|
||||||
|
d.insert(DecompMarker("FUNCTION", "TEST", 0x555))
|
||||||
|
markers = list(d.iter())
|
||||||
|
assert len(markers) == 1
|
||||||
|
assert markers[0].offset == 0x1234
|
||||||
|
|
||||||
|
|
||||||
|
def test_marker_dict_type_replace():
|
||||||
|
d = MarkerDict()
|
||||||
|
d.insert(DecompMarker("FUNCTION", "TEST", 0x1234))
|
||||||
|
d.insert(DecompMarker("STUB", "TEST", 0x1234))
|
||||||
|
markers = list(d.iter())
|
||||||
|
assert len(markers) == 1
|
||||||
|
assert markers[0].type == "FUNCTION"
|
||||||
|
@@ -10,7 +10,7 @@ import re
|
|||||||
|
|
||||||
from isledecomp import (
|
from isledecomp import (
|
||||||
Bin,
|
Bin,
|
||||||
find_code_blocks,
|
DecompParser,
|
||||||
get_file_in_script_dir,
|
get_file_in_script_dir,
|
||||||
OffsetPlaceholderGenerator,
|
OffsetPlaceholderGenerator,
|
||||||
print_diff,
|
print_diff,
|
||||||
@@ -313,18 +313,20 @@ if __name__ == "__main__":
|
|||||||
# Generate basename of original file, used in locating OFFSET lines
|
# Generate basename of original file, used in locating OFFSET lines
|
||||||
basename = os.path.basename(os.path.splitext(original)[0])
|
basename = os.path.basename(os.path.splitext(original)[0])
|
||||||
|
|
||||||
|
parser = DecompParser()
|
||||||
for srcfilename in walk_source_dir(source):
|
for srcfilename in walk_source_dir(source):
|
||||||
|
parser.reset()
|
||||||
with open(srcfilename, "r", encoding="utf-8") as srcfile:
|
with open(srcfilename, "r", encoding="utf-8") as srcfile:
|
||||||
blocks = find_code_blocks(srcfile)
|
parser.read_lines(srcfile)
|
||||||
|
|
||||||
for block in blocks:
|
for fun in parser.functions:
|
||||||
if block.is_stub:
|
if fun.is_stub:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if block.module != basename:
|
if fun.module != basename:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
addr = block.offset
|
addr = fun.offset
|
||||||
# Verbose flag handling
|
# Verbose flag handling
|
||||||
if verbose:
|
if verbose:
|
||||||
if addr == verbose:
|
if addr == verbose:
|
||||||
@@ -332,13 +334,13 @@ if __name__ == "__main__":
|
|||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if block.is_template:
|
if fun.is_template:
|
||||||
recinfo = syminfo.get_recompiled_address_from_name(block.signature)
|
recinfo = syminfo.get_recompiled_address_from_name(fun.name)
|
||||||
if not recinfo:
|
if not recinfo:
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
recinfo = syminfo.get_recompiled_address(
|
recinfo = syminfo.get_recompiled_address(
|
||||||
srcfilename, block.start_line
|
srcfilename, fun.line_number
|
||||||
)
|
)
|
||||||
if not recinfo:
|
if not recinfo:
|
||||||
continue
|
continue
|
||||||
|
Reference in New Issue
Block a user