mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-24 08:54:15 +00:00
Adjustments to "decomp" language
This commit is contained in:
@@ -13,7 +13,7 @@ pip install -r tools/requirements.txt
|
||||
|
||||
* `reccmp`: Compares the original EXE or DLL with a recompiled EXE or DLL, provided a PDB file
|
||||
* `verexp`: Verifies exports by comparing the exports of the original DLL and the recompiled DLL
|
||||
* `checkorder`: Checks `OFFSET` declarations, ensuring they appear in ascending order within a unit
|
||||
* `checkorder`: Checks `FUNCTION` declarations, ensuring they appear in ascending order within a unit
|
||||
* `isledecomp`: A library that is used by the above scripts, it has a collection of useful classes and functions
|
||||
|
||||
## Testing
|
||||
|
||||
@@ -57,7 +57,7 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||
end_line=end_line,
|
||||
offset_comment=offset_match.comment,
|
||||
module=offset_match.module,
|
||||
is_template=offset_match.is_template,
|
||||
is_synthetic=offset_match.is_synthetic,
|
||||
is_stub=offset_match.is_stub,
|
||||
)
|
||||
blocks.append(block)
|
||||
@@ -85,7 +85,7 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||
|
||||
offset_matches.append(new_match)
|
||||
|
||||
if new_match.is_template:
|
||||
if new_match.is_synthetic:
|
||||
state = ReaderState.IN_TEMPLATE
|
||||
else:
|
||||
state = ReaderState.WANT_SIG
|
||||
@@ -100,7 +100,7 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
|
||||
can_seek = False
|
||||
|
||||
elif state == ReaderState.IN_TEMPLATE:
|
||||
# TEMPLATE functions are a special case. The signature is
|
||||
# SYNTHETIC functions are a special case. The signature is
|
||||
# given on the next line (in a // comment)
|
||||
function_sig = get_template_function_name(line)
|
||||
start_line = line_no
|
||||
|
||||
@@ -14,29 +14,29 @@ CodeBlock = namedtuple(
|
||||
"end_line",
|
||||
"offset_comment",
|
||||
"module",
|
||||
"is_template",
|
||||
"is_synthetic",
|
||||
"is_stub",
|
||||
],
|
||||
)
|
||||
|
||||
OffsetMatch = namedtuple(
|
||||
"OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"]
|
||||
"OffsetMatch", ["module", "address", "is_synthetic", "is_stub", "comment"]
|
||||
)
|
||||
|
||||
# This has not been formally established, but considering that "STUB"
|
||||
# is a temporary state for a function, we assume it will appear last,
|
||||
# after any other modifiers (i.e. TEMPLATE)
|
||||
# after any other modifiers (i.e. SYNTHETIC)
|
||||
|
||||
# To match a reasonable variance of formatting for the offset comment
|
||||
offsetCommentRegex = re.compile(
|
||||
r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?", # nopep8
|
||||
r"\s*//\s*FUNCTION:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+SYNTHETIC)?(\s+STUB)?", # nopep8
|
||||
flags=re.I,
|
||||
)
|
||||
|
||||
# To match the exact syntax (text upper case, hex lower case, with spaces)
|
||||
# that is used in most places
|
||||
offsetCommentExactRegex = re.compile(
|
||||
r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$"
|
||||
r"^// FUNCTION: [A-Z0-9]+ (0x[a-f0-9]+)( SYNTHETIC)?( STUB)?$"
|
||||
) # nopep8
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
|
||||
|
||||
|
||||
def get_template_function_name(line: str) -> str:
|
||||
"""Parse function signature for special TEMPLATE functions"""
|
||||
"""Parse function signature for special SYNTHETIC functions"""
|
||||
template_match = templateCommentRegex.match(line)
|
||||
|
||||
# If we don't match, you get whatever is on the line as the signature
|
||||
@@ -92,7 +92,7 @@ def match_offset_comment(line: str) -> OffsetMatch | None:
|
||||
return OffsetMatch(
|
||||
module=match.group(1),
|
||||
address=int(match.group(2), 16),
|
||||
is_template=match.group(3) is not None,
|
||||
is_synthetic=match.group(3) is not None,
|
||||
is_stub=match.group(4) is not None,
|
||||
comment=line.strip(),
|
||||
)
|
||||
|
||||
@@ -5,25 +5,25 @@
|
||||
|
||||
class TestClass {
|
||||
public:
|
||||
TestClass();
|
||||
virtual ~TestClass() override;
|
||||
TestClass();
|
||||
virtual ~TestClass() override;
|
||||
|
||||
virtual MxResult Tickle() override; // vtable+08
|
||||
virtual MxResult Tickle() override; // vtable+08
|
||||
|
||||
// OFFSET: TEST 0x12345678
|
||||
inline const char* ClassName() const // vtable+0c
|
||||
{
|
||||
// 0xabcd1234
|
||||
return "TestClass";
|
||||
}
|
||||
// FUNCTION: TEST 0x12345678
|
||||
inline const char* ClassName() const // vtable+0c
|
||||
{
|
||||
// 0xabcd1234
|
||||
return "TestClass";
|
||||
}
|
||||
|
||||
// OFFSET: TEST 0xdeadbeef
|
||||
inline MxBool IsA(const char* name) const override // vtable+10
|
||||
{
|
||||
return !strcmp(name, TestClass::ClassName());
|
||||
}
|
||||
// FUNCTION: TEST 0xdeadbeef
|
||||
inline MxBool IsA(const char* name) const override // vtable+10
|
||||
{
|
||||
return !strcmp(name, TestClass::ClassName());
|
||||
}
|
||||
|
||||
private:
|
||||
int m_hello;
|
||||
int m_hiThere;
|
||||
int m_hello;
|
||||
int m_hiThere;
|
||||
};
|
||||
|
||||
@@ -3,20 +3,20 @@
|
||||
|
||||
// A very simple well-formed code file
|
||||
|
||||
// OFFSET: TEST 0x1234
|
||||
// FUNCTION: TEST 0x1234
|
||||
void function01()
|
||||
{
|
||||
// TODO
|
||||
// TODO
|
||||
}
|
||||
|
||||
// OFFSET: TEST 0x2345
|
||||
// FUNCTION: TEST 0x2345
|
||||
void function02()
|
||||
{
|
||||
// TODO
|
||||
// TODO
|
||||
}
|
||||
|
||||
// OFFSET: TEST 0x3456
|
||||
// FUNCTION: TEST 0x3456
|
||||
void function03()
|
||||
{
|
||||
// TODO
|
||||
// TODO
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
// Sample for python unit tests
|
||||
// Not part of the decomp
|
||||
|
||||
// OFFSET: TEST 0x10000001
|
||||
// FUNCTION: TEST 0x10000001
|
||||
inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there
|
||||
|
||||
// OFFSET: TEST 0x10000002
|
||||
// FUNCTION: TEST 0x10000002
|
||||
inline const char* OneLine() const { return "MxDSObject"; };
|
||||
|
||||
@@ -5,12 +5,12 @@
|
||||
|
||||
int no_offset_comment()
|
||||
{
|
||||
static int dummy = 123;
|
||||
return -1;
|
||||
static int dummy = 123;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// OFFSET: TEST 0xdeadbeef
|
||||
// FUNCTION: TEST 0xdeadbeef
|
||||
void regular_ole_function()
|
||||
{
|
||||
printf("hi there");
|
||||
printf("hi there");
|
||||
}
|
||||
|
||||
@@ -3,23 +3,23 @@
|
||||
|
||||
// Handling multiple offset markers
|
||||
|
||||
// OFFSET: TEST 0x1234
|
||||
// OFFSET: HELLO 0x5555
|
||||
// FUNCTION: TEST 0x1234
|
||||
// FUNCTION: HELLO 0x5555
|
||||
void different_modules()
|
||||
{
|
||||
// TODO
|
||||
// TODO
|
||||
}
|
||||
|
||||
// OFFSET: TEST 0x2345
|
||||
// OFFSET: TEST 0x1234
|
||||
// FUNCTION: TEST 0x2345
|
||||
// FUNCTION: TEST 0x1234
|
||||
void same_module()
|
||||
{
|
||||
// TODO
|
||||
// TODO
|
||||
}
|
||||
|
||||
// OFFSET: TEST 0x2002
|
||||
// OFFSET: test 0x1001
|
||||
// FUNCTION: TEST 0x2002
|
||||
// FUNCTION: test 0x1001
|
||||
void same_case_insensitive()
|
||||
{
|
||||
// TODO
|
||||
// TODO
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
// Sample for python unit tests
|
||||
// Not part of the decomp
|
||||
|
||||
// OFFSET: TEST 0x1234
|
||||
// FUNCTION: TEST 0x1234
|
||||
void short_function() { static char* msg = "oneliner"; }
|
||||
|
||||
// OFFSET: TEST 0x5555
|
||||
// FUNCTION: TEST 0x5555
|
||||
void function_after_one_liner()
|
||||
{
|
||||
// This function comes after the previous that is on a single line.
|
||||
// Do we report the offset for this one correctly?
|
||||
// This function comes after the previous that is on a single line.
|
||||
// Do we report the offset for this one correctly?
|
||||
}
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
// Sample for python unit tests
|
||||
// Not part of the decomp
|
||||
|
||||
// OFFSET: TEST 0x1001
|
||||
// FUNCTION: TEST 0x1001
|
||||
void function_order01()
|
||||
{
|
||||
// TODO
|
||||
// TODO
|
||||
}
|
||||
|
||||
// OFFSET: TEST 0x1003
|
||||
// FUNCTION: TEST 0x1003
|
||||
void function_order03()
|
||||
{
|
||||
// TODO
|
||||
// TODO
|
||||
}
|
||||
|
||||
// OFFSET: TEST 0x1002
|
||||
// FUNCTION: TEST 0x1002
|
||||
void function_order02()
|
||||
{
|
||||
// TODO
|
||||
// TODO
|
||||
}
|
||||
|
||||
@@ -4,20 +4,22 @@
|
||||
// While it's reasonable to expect a well-formed file (and clang-format
|
||||
// will make sure we get one), this will put the parser through its paces.
|
||||
|
||||
// OFFSET: TEST 0x1234
|
||||
// FUNCTION: TEST 0x1234
|
||||
void curly_with_spaces()
|
||||
{
|
||||
static char* msg = "hello";
|
||||
}
|
||||
{
|
||||
static char* msg = "hello";
|
||||
}
|
||||
|
||||
// OFFSET: TEST 0x5555
|
||||
// FUNCTION: TEST 0x5555
|
||||
void weird_closing_curly()
|
||||
{
|
||||
int x = 123; }
|
||||
int x = 123;
|
||||
}
|
||||
|
||||
// OFFSET: HELLO 0x5656
|
||||
void bad_indenting() {
|
||||
if (0)
|
||||
// FUNCTION: HELLO 0x5656
|
||||
void bad_indenting()
|
||||
{
|
||||
int y = 5;
|
||||
}}
|
||||
if (0) {
|
||||
int y = 5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ blank_or_comment_param = [
|
||||
(True, "\t"),
|
||||
(True, " "),
|
||||
(False, "\tint abc=123;"),
|
||||
(True, "// OFFSET: LEGO1 0xdeadbeef"),
|
||||
(True, "// FUNCTION: LEGO1 0xdeadbeef"),
|
||||
(True, " /* Block comment beginning"),
|
||||
(True, "Block comment ending */ "),
|
||||
# TODO: does clang-format have anything to say about these cases?
|
||||
@@ -31,41 +31,41 @@ def test_is_blank_or_comment(line: str, expected: bool):
|
||||
offset_comment_samples = [
|
||||
# (can_parse: bool, exact_match: bool, line: str)
|
||||
# Should match both expected modules with optional STUB marker
|
||||
(True, True, "// OFFSET: LEGO1 0xdeadbeef"),
|
||||
(True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"),
|
||||
(True, True, "// OFFSET: ISLE 0x12345678"),
|
||||
(True, True, "// OFFSET: ISLE 0x12345678 STUB"),
|
||||
(True, True, "// FUNCTION: LEGO1 0xdeadbeef"),
|
||||
(True, True, "// FUNCTION: LEGO1 0xdeadbeef STUB"),
|
||||
(True, True, "// FUNCTION: ISLE 0x12345678"),
|
||||
(True, True, "// FUNCTION: ISLE 0x12345678 STUB"),
|
||||
# No trailing spaces allowed
|
||||
(True, False, "// OFFSET: LEGO1 0xdeadbeef "),
|
||||
(True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "),
|
||||
(True, False, "// FUNCTION: LEGO1 0xdeadbeef "),
|
||||
(True, False, "// FUNCTION: LEGO1 0xdeadbeef STUB "),
|
||||
# Must have exactly one space between elements
|
||||
(True, False, "//OFFSET: ISLE 0xdeadbeef"),
|
||||
(True, False, "// OFFSET:ISLE 0xdeadbeef"),
|
||||
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
|
||||
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
|
||||
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
|
||||
(True, False, "// OFFSET: ISLE 0xdeadbeef STUB"),
|
||||
(True, False, "//FUNCTION: ISLE 0xdeadbeef"),
|
||||
(True, False, "// FUNCTION:ISLE 0xdeadbeef"),
|
||||
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
|
||||
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
|
||||
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
|
||||
(True, False, "// FUNCTION: ISLE 0xdeadbeef STUB"),
|
||||
# Must have 0x prefix for hex number
|
||||
(True, False, "// OFFSET: ISLE deadbeef"),
|
||||
(True, False, "// FUNCTION: ISLE deadbeef"),
|
||||
# Offset, module name, and STUB must be uppercase
|
||||
(True, False, "// offset: ISLE 0xdeadbeef"),
|
||||
(True, False, "// offset: isle 0xdeadbeef"),
|
||||
(True, False, "// OFFSET: LEGO1 0xdeadbeef stub"),
|
||||
(True, False, "// function: ISLE 0xdeadbeef"),
|
||||
(True, False, "// function: isle 0xdeadbeef"),
|
||||
(True, False, "// FUNCTION: LEGO1 0xdeadbeef stub"),
|
||||
# Hex string must be lowercase
|
||||
(True, False, "// OFFSET: ISLE 0xDEADBEEF"),
|
||||
(True, False, "// FUNCTION: ISLE 0xDEADBEEF"),
|
||||
# TODO: How flexible should we be with matching the module name?
|
||||
(True, True, "// OFFSET: OMNI 0x12345678"),
|
||||
(True, True, "// OFFSET: LEG01 0x12345678"),
|
||||
(True, False, "// OFFSET: hello 0x12345678"),
|
||||
(True, True, "// FUNCTION: OMNI 0x12345678"),
|
||||
(True, True, "// FUNCTION: LEG01 0x12345678"),
|
||||
(True, False, "// FUNCTION: hello 0x12345678"),
|
||||
# Not close enough to match
|
||||
(False, False, "// OFFSET: ISLE0x12345678"),
|
||||
(False, False, "// OFFSET: 0x12345678"),
|
||||
(False, False, "// FUNCTION: ISLE0x12345678"),
|
||||
(False, False, "// FUNCTION: 0x12345678"),
|
||||
(False, False, "// LEGO1: 0x12345678"),
|
||||
# Hex string shorter than 8 characters
|
||||
(True, True, "// OFFSET: LEGO1 0x1234"),
|
||||
(True, True, "// FUNCTION: LEGO1 0x1234"),
|
||||
# TODO: These match but shouldn't.
|
||||
# (False, False, '// OFFSET: LEGO1 0'),
|
||||
# (False, False, '// OFFSET: LEGO1 0x'),
|
||||
# (False, False, '// FUNCTION: LEGO1 0'),
|
||||
# (False, False, '// FUNCTION: LEGO1 0x'),
|
||||
]
|
||||
|
||||
|
||||
|
||||
@@ -310,7 +310,7 @@ if __name__ == "__main__":
|
||||
total_effective_accuracy = 0
|
||||
htmlinsert = []
|
||||
|
||||
# Generate basename of original file, used in locating OFFSET lines
|
||||
# Generate basename of original file, used in locating FUNCTION lines
|
||||
basename = os.path.basename(os.path.splitext(original)[0])
|
||||
|
||||
for srcfilename in walk_source_dir(source):
|
||||
@@ -332,7 +332,7 @@ if __name__ == "__main__":
|
||||
else:
|
||||
continue
|
||||
|
||||
if block.is_template:
|
||||
if block.is_synthetic:
|
||||
recinfo = syminfo.get_recompiled_address_from_name(block.signature)
|
||||
if not recinfo:
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user