mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-23 00:14:22 +00:00
Add Ghidra function import script (#909)
* Add draft for Ghidra function import script * feature: Basic PDB analysis [skip ci] This is a draft with a lot of open questions left. Please do not merge * Refactor: Introduce submodules and reload remedy * refactor types and make them Python 3.9 compatible * run black * WIP: save progress * fix types and small type safety violations * fix another Python 3.9 syntax incompatibility * Implement struct imports [skip ci] - This code is still in dire need of refactoring and tests - There are only single-digit issues left, and 2600 functions can be imported - The biggest remaining error is mismatched stacks * Refactor, implement enums, fix lots of bugs * fix Python 3.9 issue * refactor: address review comments Not sure why VS Code suddenly decides to remove some empty spaces, but they don't make sense anyway * add unit tests for new type parsers, fix linter issue * refactor: db access from pdb_extraction.py * Fix stack layout offset error * fix: Undo incorrect reference change * Fix CI issue * Improve READMEs (fix typos, add information) --------- Co-authored-by: jonschz <jonschz@users.noreply.github.com>
This commit is contained in:
@@ -9,6 +9,21 @@ from isledecomp.cvdump.types import (
|
||||
)
|
||||
|
||||
TEST_LINES = """
|
||||
0x1018 : Length = 18, Leaf = 0x1201 LF_ARGLIST argument count = 3
|
||||
list[0] = 0x100D
|
||||
list[1] = 0x1016
|
||||
list[2] = 0x1017
|
||||
|
||||
0x1019 : Length = 14, Leaf = 0x1008 LF_PROCEDURE
|
||||
Return type = T_LONG(0012), Call type = C Near
|
||||
Func attr = none
|
||||
# Parms = 3, Arg list type = 0x1018
|
||||
|
||||
0x101e : Length = 26, Leaf = 0x1009 LF_MFUNCTION
|
||||
Return type = T_CHAR(0010), Class type = 0x101A, This type = 0x101B,
|
||||
Call type = ThisCall, Func attr = none
|
||||
Parms = 2, Arg list type = 0x101d, This adjust = 0
|
||||
|
||||
0x1028 : Length = 10, Leaf = 0x1001 LF_MODIFIER
|
||||
const, modifies type T_REAL32(0040)
|
||||
|
||||
@@ -47,16 +62,16 @@ TEST_LINES = """
|
||||
Element type = T_UCHAR(0020)
|
||||
Index type = T_SHORT(0011)
|
||||
length = 8
|
||||
Name =
|
||||
Name =
|
||||
|
||||
0x10ea : Length = 14, Leaf = 0x1503 LF_ARRAY
|
||||
Element type = 0x1028
|
||||
Index type = T_SHORT(0011)
|
||||
length = 12
|
||||
Name =
|
||||
Name =
|
||||
|
||||
0x11f0 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 0, class name = MxRect32, UDT(0x00001214)
|
||||
|
||||
@@ -98,22 +113,22 @@ TEST_LINES = """
|
||||
member name = 'm_bottom'
|
||||
|
||||
0x1214 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 34, field list type 0x1213, CONSTRUCTOR, OVERLOAD,
|
||||
# members = 34, field list type 0x1213, CONSTRUCTOR, OVERLOAD,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 16, class name = MxRect32, UDT(0x00001214)
|
||||
|
||||
0x1220 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 0, class name = MxCore, UDT(0x00004060)
|
||||
|
||||
0x14db : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 0, class name = MxString, UDT(0x00004db6)
|
||||
|
||||
0x19b0 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 0, class name = ROIColorAlias, UDT(0x00002a76)
|
||||
|
||||
@@ -123,6 +138,12 @@ TEST_LINES = """
|
||||
length = 440
|
||||
Name =
|
||||
|
||||
0x2339 : Length = 26, Leaf = 0x1506 LF_UNION
|
||||
# members = 0, field list type 0x0000, FORWARD REF, Size = 0 ,class name = FlagBitfield, UDT(0x00002e85)
|
||||
|
||||
0x2e85 : Length = 26, Leaf = 0x1506 LF_UNION
|
||||
# members = 8, field list type 0x2e84, Size = 1 ,class name = FlagBitfield, UDT(0x00002e85)
|
||||
|
||||
0x2a75 : Length = 98, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_MEMBER, public, type = T_32PRCHAR(0470), offset = 0
|
||||
member name = 'm_name'
|
||||
@@ -136,18 +157,18 @@ TEST_LINES = """
|
||||
member name = 'm_unk0x10'
|
||||
|
||||
0x2a76 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
|
||||
# members = 5, field list type 0x2a75,
|
||||
# members = 5, field list type 0x2a75,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 20, class name = ROIColorAlias, UDT(0x00002a76)
|
||||
|
||||
0x22d4 : Length = 154, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_VFUNCTAB, type = 0x20FC
|
||||
list[1] = LF_METHOD, count = 3, list = 0x22D0, name = 'MxVariable'
|
||||
list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F0F,
|
||||
list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F0F,
|
||||
vfptr offset = 0, name = 'GetValue'
|
||||
list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F10,
|
||||
list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F10,
|
||||
vfptr offset = 4, name = 'SetValue'
|
||||
list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F11,
|
||||
list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F11,
|
||||
vfptr offset = 8, name = '~MxVariable'
|
||||
list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x22D3, name = 'GetKey'
|
||||
list[6] = LF_MEMBER, protected, type = 0x14DB, offset = 4
|
||||
@@ -156,10 +177,15 @@ TEST_LINES = """
|
||||
member name = 'm_value'
|
||||
|
||||
0x22d5 : Length = 34, Leaf = 0x1504 LF_CLASS
|
||||
# members = 10, field list type 0x22d4, CONSTRUCTOR,
|
||||
# members = 10, field list type 0x22d4, CONSTRUCTOR,
|
||||
Derivation list type 0x0000, VT shape type 0x20fb
|
||||
Size = 36, class name = MxVariable, UDT(0x00004041)
|
||||
|
||||
0x3c45 : Length = 50, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_ENUMERATE, public, value = 1, name = 'c_read'
|
||||
list[1] = LF_ENUMERATE, public, value = 2, name = 'c_write'
|
||||
list[2] = LF_ENUMERATE, public, value = 4, name = 'c_text'
|
||||
|
||||
0x3cc2 : Length = 38, Leaf = 0x1507 LF_ENUM
|
||||
# members = 64, type = T_INT4(0074) field list type 0x3cc1
|
||||
NESTED, enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2)
|
||||
@@ -171,22 +197,22 @@ NESTED, enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2)
|
||||
0x405f : Length = 158, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_VFUNCTAB, type = 0x2090
|
||||
list[1] = LF_ONEMETHOD, public, VANILLA, index = 0x176A, name = 'MxCore'
|
||||
list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176A,
|
||||
list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176A,
|
||||
vfptr offset = 0, name = '~MxCore'
|
||||
list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176B,
|
||||
list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176B,
|
||||
vfptr offset = 4, name = 'Notify'
|
||||
list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2087,
|
||||
list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2087,
|
||||
vfptr offset = 8, name = 'Tickle'
|
||||
list[5] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x202F,
|
||||
list[5] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x202F,
|
||||
vfptr offset = 12, name = 'ClassName'
|
||||
list[6] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2030,
|
||||
list[6] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2030,
|
||||
vfptr offset = 16, name = 'IsA'
|
||||
list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x2091, name = 'GetId'
|
||||
list[8] = LF_MEMBER, private, type = T_UINT4(0075), offset = 4
|
||||
member name = 'm_id'
|
||||
|
||||
0x4060 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 9, field list type 0x405f, CONSTRUCTOR,
|
||||
# members = 9, field list type 0x405f, CONSTRUCTOR,
|
||||
Derivation list type 0x0000, VT shape type 0x1266
|
||||
Size = 8, class name = MxCore, UDT(0x00004060)
|
||||
|
||||
@@ -194,7 +220,7 @@ NESTED, enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2)
|
||||
Element type = 0x3CC2
|
||||
Index type = T_SHORT(0011)
|
||||
length = 24
|
||||
Name =
|
||||
Name =
|
||||
|
||||
0x432f : Length = 14, Leaf = 0x1503 LF_ARRAY
|
||||
Element type = T_INT4(0074)
|
||||
@@ -220,7 +246,7 @@ NESTED, enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2)
|
||||
member name = 'm_length'
|
||||
|
||||
0x4db6 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 16, field list type 0x4db5, CONSTRUCTOR, OVERLOAD,
|
||||
# members = 16, field list type 0x4db5, CONSTRUCTOR, OVERLOAD,
|
||||
Derivation list type 0x0000, VT shape type 0x1266
|
||||
Size = 16, class name = MxString, UDT(0x00004db6)
|
||||
"""
|
||||
@@ -235,7 +261,7 @@ def types_parser_fixture():
|
||||
return parser
|
||||
|
||||
|
||||
def test_basic_parsing(parser):
|
||||
def test_basic_parsing(parser: CvdumpTypesParser):
|
||||
obj = parser.keys["0x4db6"]
|
||||
assert obj["type"] == "LF_CLASS"
|
||||
assert obj["name"] == "MxString"
|
||||
@@ -244,7 +270,7 @@ def test_basic_parsing(parser):
|
||||
assert len(parser.keys["0x4db5"]["members"]) == 2
|
||||
|
||||
|
||||
def test_scalar_types(parser):
|
||||
def test_scalar_types(parser: CvdumpTypesParser):
|
||||
"""Full tests on the scalar_* methods are in another file.
|
||||
Here we are just testing the passthrough of the "T_" types."""
|
||||
assert parser.get("T_CHAR").name is None
|
||||
@@ -254,7 +280,7 @@ def test_scalar_types(parser):
|
||||
assert parser.get("T_32PVOID").size == 4
|
||||
|
||||
|
||||
def test_resolve_forward_ref(parser):
|
||||
def test_resolve_forward_ref(parser: CvdumpTypesParser):
|
||||
# Non-forward ref
|
||||
assert parser.get("0x22d5").name == "MxVariable"
|
||||
# Forward ref
|
||||
@@ -262,7 +288,7 @@ def test_resolve_forward_ref(parser):
|
||||
assert parser.get("0x14db").size == 16
|
||||
|
||||
|
||||
def test_members(parser):
|
||||
def test_members(parser: CvdumpTypesParser):
|
||||
"""Return the list of items to compare for a given complex type.
|
||||
If the class has a superclass, add those members too."""
|
||||
# MxCore field list
|
||||
@@ -284,7 +310,7 @@ def test_members(parser):
|
||||
]
|
||||
|
||||
|
||||
def test_members_recursive(parser):
|
||||
def test_members_recursive(parser: CvdumpTypesParser):
|
||||
"""Make sure that we unwrap the dependency tree correctly."""
|
||||
# MxVariable field list
|
||||
assert parser.get_scalars("0x22d4") == [
|
||||
@@ -300,7 +326,7 @@ def test_members_recursive(parser):
|
||||
]
|
||||
|
||||
|
||||
def test_struct(parser):
|
||||
def test_struct(parser: CvdumpTypesParser):
|
||||
"""Basic test for converting type into struct.unpack format string."""
|
||||
# MxCore: vftable and uint32. The vftable pointer is read as uint32.
|
||||
assert parser.get_format_string("0x4060") == "<LL"
|
||||
@@ -312,7 +338,7 @@ def test_struct(parser):
|
||||
assert parser.get_format_string("0x1214") == "<llll"
|
||||
|
||||
|
||||
def test_struct_padding(parser):
|
||||
def test_struct_padding(parser: CvdumpTypesParser):
|
||||
"""For data comparison purposes, make sure we have no gaps in the
|
||||
list of scalar types. Any gap is filled by an unsigned char."""
|
||||
|
||||
@@ -326,7 +352,7 @@ def test_struct_padding(parser):
|
||||
assert len(parser.get_scalars_gapless("0x22d5")) == 13
|
||||
|
||||
|
||||
def test_struct_format_string(parser):
|
||||
def test_struct_format_string(parser: CvdumpTypesParser):
|
||||
"""Generate the struct.unpack format string using the
|
||||
list of scalars with padding filled in."""
|
||||
# MxString, padded to 16 bytes.
|
||||
@@ -336,7 +362,7 @@ def test_struct_format_string(parser):
|
||||
assert parser.get_format_string("0x22d5") == "<LLLLHBBLLLHBB"
|
||||
|
||||
|
||||
def test_array(parser):
|
||||
def test_array(parser: CvdumpTypesParser):
|
||||
"""LF_ARRAY members are created dynamically based on the
|
||||
total array size and the size of one element."""
|
||||
# unsigned char[8]
|
||||
@@ -360,7 +386,7 @@ def test_array(parser):
|
||||
]
|
||||
|
||||
|
||||
def test_2d_array(parser):
|
||||
def test_2d_array(parser: CvdumpTypesParser):
|
||||
"""Make sure 2d array elements are named as we expect."""
|
||||
# float[4][4]
|
||||
float_array = parser.get_scalars("0x103c")
|
||||
@@ -371,7 +397,7 @@ def test_2d_array(parser):
|
||||
assert float_array[-1] == (60, "[3][3]", "T_REAL32")
|
||||
|
||||
|
||||
def test_enum(parser):
|
||||
def test_enum(parser: CvdumpTypesParser):
|
||||
"""LF_ENUM should equal 4-byte int"""
|
||||
assert parser.get("0x3cc2").size == 4
|
||||
assert parser.get_scalars("0x3cc2") == [(0, None, "T_INT4")]
|
||||
@@ -382,7 +408,7 @@ def test_enum(parser):
|
||||
assert enum_array[0].size == 4
|
||||
|
||||
|
||||
def test_lf_pointer(parser):
|
||||
def test_lf_pointer(parser: CvdumpTypesParser):
|
||||
"""LF_POINTER is just a wrapper for scalar pointer type"""
|
||||
assert parser.get("0x3fab").size == 4
|
||||
# assert parser.get("0x3fab").is_pointer is True # TODO: ?
|
||||
@@ -390,7 +416,7 @@ def test_lf_pointer(parser):
|
||||
assert parser.get_scalars("0x3fab") == [(0, None, "T_32PVOID")]
|
||||
|
||||
|
||||
def test_key_not_exist(parser):
|
||||
def test_key_not_exist(parser: CvdumpTypesParser):
|
||||
"""Accessing a non-existent type id should raise our exception"""
|
||||
with pytest.raises(CvdumpKeyError):
|
||||
parser.get("0xbeef")
|
||||
@@ -399,7 +425,7 @@ def test_key_not_exist(parser):
|
||||
parser.get_scalars("0xbeef")
|
||||
|
||||
|
||||
def test_broken_forward_ref(parser):
|
||||
def test_broken_forward_ref(parser: CvdumpTypesParser):
|
||||
"""Raise an exception if we cannot follow a forward reference"""
|
||||
# Verify forward reference on MxCore
|
||||
parser.get("0x1220")
|
||||
@@ -412,7 +438,7 @@ def test_broken_forward_ref(parser):
|
||||
parser.get("0x1220")
|
||||
|
||||
|
||||
def test_null_forward_ref(parser):
|
||||
def test_null_forward_ref(parser: CvdumpTypesParser):
|
||||
"""If the forward ref object is invalid and has no forward ref id,
|
||||
raise an exception."""
|
||||
# Test MxString forward reference
|
||||
@@ -426,7 +452,7 @@ def test_null_forward_ref(parser):
|
||||
parser.get("0x14db")
|
||||
|
||||
|
||||
def test_broken_array_element_ref(parser):
|
||||
def test_broken_array_element_ref(parser: CvdumpTypesParser):
|
||||
# Test LF_ARRAY of ROIColorAlias
|
||||
parser.get("0x19b1")
|
||||
|
||||
@@ -438,7 +464,7 @@ def test_broken_array_element_ref(parser):
|
||||
parser.get("0x19b1")
|
||||
|
||||
|
||||
def test_lf_modifier(parser):
|
||||
def test_lf_modifier(parser: CvdumpTypesParser):
|
||||
"""Is this an alias for another type?"""
|
||||
# Modifies float
|
||||
assert parser.get("0x1028").size == 4
|
||||
@@ -449,7 +475,7 @@ def test_lf_modifier(parser):
|
||||
assert mxrect == parser.get_scalars("0x11f2")
|
||||
|
||||
|
||||
def test_union_members(parser):
|
||||
def test_union_members(parser: CvdumpTypesParser):
|
||||
"""If there is a union somewhere in our dependency list, we can
|
||||
expect to see duplicated member offsets and names. This is ok for
|
||||
the TypeInfo tuple, but the list of ScalarType items should have
|
||||
@@ -457,9 +483,71 @@ def test_union_members(parser):
|
||||
|
||||
# D3DVector type with duplicated offsets
|
||||
d3dvector = parser.get("0x10e1")
|
||||
assert d3dvector.members is not None
|
||||
assert len(d3dvector.members) == 6
|
||||
assert len([m for m in d3dvector.members if m.offset == 0]) == 2
|
||||
|
||||
# Deduplicated comparison list
|
||||
vector_items = parser.get_scalars("0x10e1")
|
||||
assert len(vector_items) == 3
|
||||
|
||||
|
||||
def test_arglist(parser: CvdumpTypesParser):
|
||||
arglist = parser.keys["0x1018"]
|
||||
assert arglist["argcount"] == 3
|
||||
assert arglist["args"] == ["0x100D", "0x1016", "0x1017"]
|
||||
|
||||
|
||||
def test_procedure(parser: CvdumpTypesParser):
|
||||
procedure = parser.keys["0x1019"]
|
||||
assert procedure == {
|
||||
"type": "LF_PROCEDURE",
|
||||
"return_type": "T_LONG(0012)",
|
||||
"call_type": "C Near",
|
||||
"func_attr": "none",
|
||||
"num_params": "3",
|
||||
"arg_list_type": "0x1018",
|
||||
}
|
||||
|
||||
|
||||
def test_mfunction(parser: CvdumpTypesParser):
|
||||
mfunction = parser.keys["0x101e"]
|
||||
assert mfunction == {
|
||||
"type": "LF_MFUNCTION",
|
||||
"return_type": "T_CHAR(0010)",
|
||||
"class_type": "0x101A",
|
||||
"this_type": "0x101B",
|
||||
"call_type": "ThisCall",
|
||||
"func_attr": "none",
|
||||
"num_params": "2",
|
||||
"arg_list_type": "0x101d",
|
||||
"this_adjust": "0",
|
||||
}
|
||||
|
||||
|
||||
def test_union_forward_ref(parser: CvdumpTypesParser):
|
||||
union = parser.keys["0x2339"]
|
||||
assert union["is_forward_ref"] is True
|
||||
assert union["udt"] == "0x2e85"
|
||||
|
||||
|
||||
def test_union(parser: CvdumpTypesParser):
|
||||
union = parser.keys["0x2e85"]
|
||||
assert union == {
|
||||
"type": "LF_UNION",
|
||||
"name": "FlagBitfield",
|
||||
"size": 1,
|
||||
"udt": "0x2e85",
|
||||
}
|
||||
|
||||
|
||||
def test_fieldlist_enumerate(parser: CvdumpTypesParser):
|
||||
fieldlist_enum = parser.keys["0x3c45"]
|
||||
assert fieldlist_enum == {
|
||||
"type": "LF_FIELDLIST",
|
||||
"variants": [
|
||||
{"name": "c_read", "value": 1},
|
||||
{"name": "c_write", "value": 2},
|
||||
{"name": "c_text", "value": 4},
|
||||
],
|
||||
}
|
||||
|
Reference in New Issue
Block a user