Adjustments to "decomp" language

This commit is contained in:
Christian Semmler
2023-11-28 09:00:57 -05:00
parent 3d48cdede1
commit 1ba3b7f0a7
394 changed files with 1946 additions and 1944 deletions

View File

@@ -13,7 +13,7 @@ pip install -r tools/requirements.txt
* `reccmp`: Compares the original EXE or DLL with a recompiled EXE or DLL, provided a PDB file
* `verexp`: Verifies exports by comparing the exports of the original DLL and the recompiled DLL
* `checkorder`: Checks `OFFSET` declarations, ensuring they appear in ascending order within a unit
* `checkorder`: Checks `FUNCTION` declarations, ensuring they appear in ascending order within a unit
* `isledecomp`: A library that is used by the above scripts, it has a collection of useful classes and functions
## Testing

View File

@@ -57,7 +57,7 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
end_line=end_line,
offset_comment=offset_match.comment,
module=offset_match.module,
is_template=offset_match.is_template,
is_synthetic=offset_match.is_synthetic,
is_stub=offset_match.is_stub,
)
blocks.append(block)
@@ -85,7 +85,7 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
offset_matches.append(new_match)
if new_match.is_template:
if new_match.is_synthetic:
state = ReaderState.IN_TEMPLATE
else:
state = ReaderState.WANT_SIG
@@ -100,7 +100,7 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]:
can_seek = False
elif state == ReaderState.IN_TEMPLATE:
# TEMPLATE functions are a special case. The signature is
# SYNTHETIC functions are a special case. The signature is
# given on the next line (in a // comment)
function_sig = get_template_function_name(line)
start_line = line_no

View File

@@ -14,29 +14,29 @@ CodeBlock = namedtuple(
"end_line",
"offset_comment",
"module",
"is_template",
"is_synthetic",
"is_stub",
],
)
OffsetMatch = namedtuple(
"OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"]
"OffsetMatch", ["module", "address", "is_synthetic", "is_stub", "comment"]
)
# This has not been formally established, but considering that "STUB"
# is a temporary state for a function, we assume it will appear last,
# after any other modifiers (i.e. TEMPLATE)
# after any other modifiers (i.e. SYNTHETIC)
# To match a reasonable variance of formatting for the offset comment
offsetCommentRegex = re.compile(
r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?", # nopep8
r"\s*//\s*FUNCTION:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+SYNTHETIC)?(\s+STUB)?", # nopep8
flags=re.I,
)
# To match the exact syntax (text upper case, hex lower case, with spaces)
# that is used in most places
offsetCommentExactRegex = re.compile(
r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$"
r"^// FUNCTION: [A-Z0-9]+ (0x[a-f0-9]+)( SYNTHETIC)?( STUB)?$"
) # nopep8
@@ -51,7 +51,7 @@ trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$")
def get_template_function_name(line: str) -> str:
"""Parse function signature for special TEMPLATE functions"""
"""Parse function signature for special SYNTHETIC functions"""
template_match = templateCommentRegex.match(line)
# If we don't match, you get whatever is on the line as the signature
@@ -92,7 +92,7 @@ def match_offset_comment(line: str) -> OffsetMatch | None:
return OffsetMatch(
module=match.group(1),
address=int(match.group(2), 16),
is_template=match.group(3) is not None,
is_synthetic=match.group(3) is not None,
is_stub=match.group(4) is not None,
comment=line.strip(),
)

View File

@@ -5,25 +5,25 @@
class TestClass {
public:
TestClass();
virtual ~TestClass() override;
TestClass();
virtual ~TestClass() override;
virtual MxResult Tickle() override; // vtable+08
virtual MxResult Tickle() override; // vtable+08
// OFFSET: TEST 0x12345678
inline const char* ClassName() const // vtable+0c
{
// 0xabcd1234
return "TestClass";
}
// FUNCTION: TEST 0x12345678
inline const char* ClassName() const // vtable+0c
{
// 0xabcd1234
return "TestClass";
}
// OFFSET: TEST 0xdeadbeef
inline MxBool IsA(const char* name) const override // vtable+10
{
return !strcmp(name, TestClass::ClassName());
}
// FUNCTION: TEST 0xdeadbeef
inline MxBool IsA(const char* name) const override // vtable+10
{
return !strcmp(name, TestClass::ClassName());
}
private:
int m_hello;
int m_hiThere;
int m_hello;
int m_hiThere;
};

View File

@@ -3,20 +3,20 @@
// A very simple well-formed code file
// OFFSET: TEST 0x1234
// FUNCTION: TEST 0x1234
void function01()
{
// TODO
// TODO
}
// OFFSET: TEST 0x2345
// FUNCTION: TEST 0x2345
void function02()
{
// TODO
// TODO
}
// OFFSET: TEST 0x3456
// FUNCTION: TEST 0x3456
void function03()
{
// TODO
// TODO
}

View File

@@ -1,8 +1,8 @@
// Sample for python unit tests
// Not part of the decomp
// OFFSET: TEST 0x10000001
// FUNCTION: TEST 0x10000001
inline const char* OneLineWithComment() const { return "MxDSObject"; }; // hi there
// OFFSET: TEST 0x10000002
// FUNCTION: TEST 0x10000002
inline const char* OneLine() const { return "MxDSObject"; };

View File

@@ -5,12 +5,12 @@
int no_offset_comment()
{
static int dummy = 123;
return -1;
static int dummy = 123;
return -1;
}
// OFFSET: TEST 0xdeadbeef
// FUNCTION: TEST 0xdeadbeef
void regular_ole_function()
{
printf("hi there");
printf("hi there");
}

View File

@@ -3,23 +3,23 @@
// Handling multiple offset markers
// OFFSET: TEST 0x1234
// OFFSET: HELLO 0x5555
// FUNCTION: TEST 0x1234
// FUNCTION: HELLO 0x5555
void different_modules()
{
// TODO
// TODO
}
// OFFSET: TEST 0x2345
// OFFSET: TEST 0x1234
// FUNCTION: TEST 0x2345
// FUNCTION: TEST 0x1234
void same_module()
{
// TODO
// TODO
}
// OFFSET: TEST 0x2002
// OFFSET: test 0x1001
// FUNCTION: TEST 0x2002
// FUNCTION: test 0x1001
void same_case_insensitive()
{
// TODO
// TODO
}

View File

@@ -1,12 +1,12 @@
// Sample for python unit tests
// Not part of the decomp
// OFFSET: TEST 0x1234
// FUNCTION: TEST 0x1234
void short_function() { static char* msg = "oneliner"; }
// OFFSET: TEST 0x5555
// FUNCTION: TEST 0x5555
void function_after_one_liner()
{
// This function comes after the previous that is on a single line.
// Do we report the offset for this one correctly?
// This function comes after the previous that is on a single line.
// Do we report the offset for this one correctly?
}

View File

@@ -1,20 +1,20 @@
// Sample for python unit tests
// Not part of the decomp
// OFFSET: TEST 0x1001
// FUNCTION: TEST 0x1001
void function_order01()
{
// TODO
// TODO
}
// OFFSET: TEST 0x1003
// FUNCTION: TEST 0x1003
void function_order03()
{
// TODO
// TODO
}
// OFFSET: TEST 0x1002
// FUNCTION: TEST 0x1002
void function_order02()
{
// TODO
// TODO
}

View File

@@ -4,20 +4,22 @@
// While it's reasonable to expect a well-formed file (and clang-format
// will make sure we get one), this will put the parser through its paces.
// OFFSET: TEST 0x1234
// FUNCTION: TEST 0x1234
void curly_with_spaces()
{
static char* msg = "hello";
}
{
static char* msg = "hello";
}
// OFFSET: TEST 0x5555
// FUNCTION: TEST 0x5555
void weird_closing_curly()
{
int x = 123; }
int x = 123;
}
// OFFSET: HELLO 0x5656
void bad_indenting() {
if (0)
// FUNCTION: HELLO 0x5656
void bad_indenting()
{
int y = 5;
}}
if (0) {
int y = 5;
}
}

View File

@@ -14,7 +14,7 @@ blank_or_comment_param = [
(True, "\t"),
(True, " "),
(False, "\tint abc=123;"),
(True, "// OFFSET: LEGO1 0xdeadbeef"),
(True, "// FUNCTION: LEGO1 0xdeadbeef"),
(True, " /* Block comment beginning"),
(True, "Block comment ending */ "),
# TODO: does clang-format have anything to say about these cases?
@@ -31,41 +31,41 @@ def test_is_blank_or_comment(line: str, expected: bool):
offset_comment_samples = [
# (can_parse: bool, exact_match: bool, line: str)
# Should match both expected modules with optional STUB marker
(True, True, "// OFFSET: LEGO1 0xdeadbeef"),
(True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"),
(True, True, "// OFFSET: ISLE 0x12345678"),
(True, True, "// OFFSET: ISLE 0x12345678 STUB"),
(True, True, "// FUNCTION: LEGO1 0xdeadbeef"),
(True, True, "// FUNCTION: LEGO1 0xdeadbeef STUB"),
(True, True, "// FUNCTION: ISLE 0x12345678"),
(True, True, "// FUNCTION: ISLE 0x12345678 STUB"),
# No trailing spaces allowed
(True, False, "// OFFSET: LEGO1 0xdeadbeef "),
(True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "),
(True, False, "// FUNCTION: LEGO1 0xdeadbeef "),
(True, False, "// FUNCTION: LEGO1 0xdeadbeef STUB "),
# Must have exactly one space between elements
(True, False, "//OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET:ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef"),
(True, False, "// OFFSET: ISLE 0xdeadbeef STUB"),
(True, False, "//FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// FUNCTION:ISLE 0xdeadbeef"),
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// FUNCTION: ISLE 0xdeadbeef"),
(True, False, "// FUNCTION: ISLE 0xdeadbeef STUB"),
# Must have 0x prefix for hex number
(True, False, "// OFFSET: ISLE deadbeef"),
(True, False, "// FUNCTION: ISLE deadbeef"),
# Offset, module name, and STUB must be uppercase
(True, False, "// offset: ISLE 0xdeadbeef"),
(True, False, "// offset: isle 0xdeadbeef"),
(True, False, "// OFFSET: LEGO1 0xdeadbeef stub"),
(True, False, "// function: ISLE 0xdeadbeef"),
(True, False, "// function: isle 0xdeadbeef"),
(True, False, "// FUNCTION: LEGO1 0xdeadbeef stub"),
# Hex string must be lowercase
(True, False, "// OFFSET: ISLE 0xDEADBEEF"),
(True, False, "// FUNCTION: ISLE 0xDEADBEEF"),
# TODO: How flexible should we be with matching the module name?
(True, True, "// OFFSET: OMNI 0x12345678"),
(True, True, "// OFFSET: LEG01 0x12345678"),
(True, False, "// OFFSET: hello 0x12345678"),
(True, True, "// FUNCTION: OMNI 0x12345678"),
(True, True, "// FUNCTION: LEG01 0x12345678"),
(True, False, "// FUNCTION: hello 0x12345678"),
# Not close enough to match
(False, False, "// OFFSET: ISLE0x12345678"),
(False, False, "// OFFSET: 0x12345678"),
(False, False, "// FUNCTION: ISLE0x12345678"),
(False, False, "// FUNCTION: 0x12345678"),
(False, False, "// LEGO1: 0x12345678"),
# Hex string shorter than 8 characters
(True, True, "// OFFSET: LEGO1 0x1234"),
(True, True, "// FUNCTION: LEGO1 0x1234"),
# TODO: These match but shouldn't.
# (False, False, '// OFFSET: LEGO1 0'),
# (False, False, '// OFFSET: LEGO1 0x'),
# (False, False, '// FUNCTION: LEGO1 0'),
# (False, False, '// FUNCTION: LEGO1 0x'),
]

View File

@@ -310,7 +310,7 @@ if __name__ == "__main__":
total_effective_accuracy = 0
htmlinsert = []
# Generate basename of original file, used in locating OFFSET lines
# Generate basename of original file, used in locating FUNCTION lines
basename = os.path.basename(os.path.splitext(original)[0])
for srcfilename in walk_source_dir(source):
@@ -332,7 +332,7 @@ if __name__ == "__main__":
else:
continue
if block.is_template:
if block.is_synthetic:
recinfo = syminfo.get_recompiled_address_from_name(block.signature)
if not recinfo:
continue