mirror of
https://github.com/isledecomp/isle.git
synced 2025-10-24 00:44:21 +00:00
297 lines
10 KiB
Python
297 lines
10 KiB
Python
from typing import Optional
|
|
import pytest
|
|
from isledecomp.compare.asm.parse import DisasmLiteInst, ParseAsm
|
|
|
|
|
|
def mock_inst(mnemonic: str, op_str: str) -> DisasmLiteInst:
|
|
"""Mock up the named tuple DisasmLite from just a mnemonic and op_str.
|
|
To be used for tests on sanitize that do not require the instruction address
|
|
or size. i.e. any non-jump instruction."""
|
|
return DisasmLiteInst(0, 0, mnemonic, op_str)
|
|
|
|
|
|
identity_cases = [
|
|
("", ""),
|
|
("sti", ""),
|
|
("push", "ebx"),
|
|
("ret", ""),
|
|
("ret", "4"),
|
|
("mov", "eax, 0x1234"),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("mnemonic, op_str", identity_cases)
|
|
def test_identity(mnemonic, op_str):
|
|
"""Confirm that nothing is substituted."""
|
|
p = ParseAsm()
|
|
inst = mock_inst(mnemonic, op_str)
|
|
result = p.sanitize(inst)
|
|
assert result == (mnemonic, op_str)
|
|
|
|
|
|
ptr_replace_cases = [
|
|
("byte ptr [0x5555]", "byte ptr [<OFFSET1>]"),
|
|
("word ptr [0x5555]", "word ptr [<OFFSET1>]"),
|
|
("dword ptr [0x5555]", "dword ptr [<OFFSET1>]"),
|
|
("qword ptr [0x5555]", "qword ptr [<OFFSET1>]"),
|
|
("eax, dword ptr [0x5555]", "eax, dword ptr [<OFFSET1>]"),
|
|
("dword ptr [0x5555], eax", "dword ptr [<OFFSET1>], eax"),
|
|
("dword ptr [0x5555], 0", "dword ptr [<OFFSET1>], 0"),
|
|
("dword ptr [0x5555], 8", "dword ptr [<OFFSET1>], 8"),
|
|
# Same value, assumed to be an addr in the first appearance
|
|
# because it is designated as 'ptr', but we have not provided the
|
|
# relocation table lookup method so we do not replace the second appearance.
|
|
("dword ptr [0x5555], 0x5555", "dword ptr [<OFFSET1>], 0x5555"),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("start, end", ptr_replace_cases)
|
|
def test_ptr_replace(start, end):
|
|
"""Anything in square brackets (with the 'ptr' prefix) will always be replaced."""
|
|
p = ParseAsm()
|
|
inst = mock_inst("", start)
|
|
(_, op_str) = p.sanitize(inst)
|
|
assert op_str == end
|
|
|
|
|
|
call_replace_cases = [
|
|
("ebx", "ebx"),
|
|
("0x1234", "<OFFSET1>"),
|
|
("dword ptr [0x1234]", "dword ptr [<OFFSET1>]"),
|
|
("dword ptr [ecx + 0x10]", "dword ptr [ecx + 0x10]"),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("start, end", call_replace_cases)
|
|
def test_call_replace(start, end):
|
|
"""Call with hex operand is always replaced.
|
|
Otherwise, ptr replacement rules apply, but skip `this` calls."""
|
|
p = ParseAsm()
|
|
inst = mock_inst("call", start)
|
|
(_, op_str) = p.sanitize(inst)
|
|
assert op_str == end
|
|
|
|
|
|
def test_jump_displacement():
|
|
"""Display jump displacement (offset from end of jump instruction)
|
|
instead of destination address."""
|
|
p = ParseAsm()
|
|
inst = DisasmLiteInst(0x1000, 2, "je", "0x1000")
|
|
(_, op_str) = p.sanitize(inst)
|
|
assert op_str == "-0x2"
|
|
|
|
|
|
def test_jmp_table():
|
|
"""To ignore cases where it would be inappropriate to replace pointer
|
|
displacement (i.e. the vast majority of them) we require the address
|
|
to be relocated. This excludes any address less than the imagebase."""
|
|
p = ParseAsm()
|
|
inst = mock_inst("jmp", "dword ptr [eax*4 + 0x5555]")
|
|
(_, op_str) = p.sanitize(inst)
|
|
# i.e. no change
|
|
assert op_str == "dword ptr [eax*4 + 0x5555]"
|
|
|
|
def relocate_lookup(addr: int) -> bool:
|
|
return addr == 0x5555
|
|
|
|
# Now add the relocation lookup
|
|
p = ParseAsm(relocate_lookup=relocate_lookup)
|
|
(_, op_str) = p.sanitize(inst)
|
|
# Should replace it now
|
|
assert op_str == "dword ptr [eax*4 + <OFFSET1>]"
|
|
|
|
|
|
name_replace_cases = [
|
|
("byte ptr [0x5555]", "byte ptr [_substitute_]"),
|
|
("word ptr [0x5555]", "word ptr [_substitute_]"),
|
|
("dword ptr [0x5555]", "dword ptr [_substitute_]"),
|
|
("qword ptr [0x5555]", "qword ptr [_substitute_]"),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("start, end", name_replace_cases)
|
|
def test_name_replace(start, end):
|
|
"""Make sure the name lookup function is called if present"""
|
|
|
|
def substitute(_: int, __: bool) -> str:
|
|
return "_substitute_"
|
|
|
|
p = ParseAsm(name_lookup=substitute)
|
|
inst = mock_inst("mov", start)
|
|
(_, op_str) = p.sanitize(inst)
|
|
assert op_str == end
|
|
|
|
|
|
def test_replacement_cache():
|
|
p = ParseAsm()
|
|
inst = mock_inst("inc", "dword ptr [0x1234]")
|
|
|
|
(_, op_str) = p.sanitize(inst)
|
|
assert op_str == "dword ptr [<OFFSET1>]"
|
|
|
|
(_, op_str) = p.sanitize(inst)
|
|
assert op_str == "dword ptr [<OFFSET1>]"
|
|
|
|
|
|
def test_replacement_numbering():
|
|
"""If we can use the name lookup for the first address but not the second,
|
|
the second replacement should be <OFFSET2> not <OFFSET1>."""
|
|
|
|
def substitute_1234(addr: int, _: bool) -> Optional[str]:
|
|
return "_substitute_" if addr == 0x1234 else None
|
|
|
|
p = ParseAsm(name_lookup=substitute_1234)
|
|
|
|
(_, op_str) = p.sanitize(mock_inst("inc", "dword ptr [0x1234]"))
|
|
assert op_str == "dword ptr [_substitute_]"
|
|
|
|
(_, op_str) = p.sanitize(mock_inst("inc", "dword ptr [0x5555]"))
|
|
assert op_str == "dword ptr [<OFFSET2>]"
|
|
|
|
|
|
def test_relocate_lookup():
|
|
"""Immediate values would be relocated if they are actually addresses.
|
|
So we can use the relocation table to check whether a given value is an
|
|
address or just some number."""
|
|
|
|
def relocate_lookup(addr: int) -> bool:
|
|
return addr == 0x1234
|
|
|
|
p = ParseAsm(relocate_lookup=relocate_lookup)
|
|
(_, op_str) = p.sanitize(mock_inst("mov", "eax, 0x1234"))
|
|
assert op_str == "eax, <OFFSET1>"
|
|
|
|
(_, op_str) = p.sanitize(mock_inst("mov", "eax, 0x5555"))
|
|
assert op_str == "eax, 0x5555"
|
|
|
|
|
|
def test_jump_to_function():
|
|
"""A jmp instruction can lead us directly to a function. This can be found
|
|
in the unwind section at the end of a function. However: we do not want to
|
|
assume this is the case for all jumps. Only replace the jump with a name
|
|
if we can find it using our lookup."""
|
|
|
|
def substitute_1234(addr: int, _: bool) -> Optional[str]:
|
|
return "_substitute_" if addr == 0x1234 else None
|
|
|
|
p = ParseAsm(name_lookup=substitute_1234)
|
|
inst = DisasmLiteInst(0x1000, 2, "jmp", "0x1234")
|
|
(_, op_str) = p.sanitize(inst)
|
|
assert op_str == "_substitute_"
|
|
|
|
# Should not replace this jump.
|
|
# 0x1000 (start addr)
|
|
# + 2 (size of jump instruction)
|
|
# + 0x5555 (displacement, the value we want)
|
|
# = 0x6557
|
|
inst = DisasmLiteInst(0x1000, 2, "jmp", "0x6557")
|
|
(_, op_str) = p.sanitize(inst)
|
|
assert op_str == "0x5555"
|
|
|
|
|
|
@pytest.mark.skip(reason="changed implementation")
|
|
def test_float_replacement():
|
|
"""Floating point constants often appear as pointers to data.
|
|
A good example is ViewROI::IntrinsicImportance and the subclass override
|
|
LegoROI::IntrinsicImportance. Both return 0.5, but this is done via the
|
|
FLD instruction and a dword value at 0x100dbdec. In this case it is more
|
|
valuable to just read the constant value rather than use a placeholder.
|
|
The float constants don't appear to be deduplicated (like strings are)
|
|
because there is another 0.5 at 0x100d40b0."""
|
|
|
|
def bin_lookup(addr: int, _: int) -> Optional[bytes]:
|
|
return b"\xdb\x0f\x49\x40" if addr == 0x1234 else None
|
|
|
|
p = ParseAsm(bin_lookup=bin_lookup)
|
|
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
|
|
(_, op_str) = p.sanitize(inst)
|
|
# Single-precision float. struct.unpack("<f", struct.pack("<f", math.pi))
|
|
assert op_str == "dword ptr [3.1415927410125732 (FLOAT)]"
|
|
|
|
|
|
@pytest.mark.skip(reason="changed implementation")
|
|
def test_float_variable():
|
|
"""If there is a variable at the address referenced by a float instruction,
|
|
use the name instead of calling into the float replacement handler."""
|
|
|
|
def name_lookup(addr: int, _: bool) -> Optional[str]:
|
|
return "g_myFloatVariable" if addr == 0x1234 else None
|
|
|
|
p = ParseAsm(name_lookup=name_lookup)
|
|
inst = DisasmLiteInst(0x1000, 6, "fld", "dword ptr [0x1234]")
|
|
(_, op_str) = p.sanitize(inst)
|
|
assert op_str == "dword ptr [g_myFloatVariable]"
|
|
|
|
|
|
def test_pointer_compare():
|
|
"""A loop on an array could get optimized into comparing on the address
|
|
that immediately follows the array. This may or may not be a valid address
|
|
and it may or may not be annotated. To avoid a situation where an
|
|
erroneous address value would get replaced with a placeholder and silently
|
|
pass the comparison check, we will only replace an immediate value on the
|
|
CMP instruction if it is a known address."""
|
|
|
|
# 0x1234 and 0x5555 are relocated and so are considered to be addresses.
|
|
def relocate_lookup(addr: int) -> bool:
|
|
return addr in (0x1234, 0x5555)
|
|
|
|
# Only 0x5555 is a "known" address
|
|
def name_lookup(addr: int, _: bool) -> Optional[str]:
|
|
return "hello" if addr == 0x5555 else None
|
|
|
|
p = ParseAsm(relocate_lookup=relocate_lookup, name_lookup=name_lookup)
|
|
|
|
# Will always replace on MOV instruction
|
|
(_, op_str) = p.sanitize(mock_inst("mov", "eax, 0x1234"))
|
|
assert op_str == "eax, <OFFSET1>"
|
|
(_, op_str) = p.sanitize(mock_inst("mov", "eax, 0x5555"))
|
|
assert op_str == "eax, hello"
|
|
|
|
# n.b. We have already cached the replacement for 0x1234, but the
|
|
# special handling for CMP should skip the cache and not use it.
|
|
|
|
# Do not replace here
|
|
(_, op_str) = p.sanitize(mock_inst("cmp", "eax, 0x1234"))
|
|
assert op_str == "eax, 0x1234"
|
|
# Should replace here
|
|
(_, op_str) = p.sanitize(mock_inst("cmp", "eax, 0x5555"))
|
|
assert op_str == "eax, hello"
|
|
|
|
|
|
def test_absolute_indirect():
|
|
"""The instruction `call dword ptr [0x1234]` means we call the function
|
|
whose address is at 0x1234. (i.e. absolute indirect addressing mode)
|
|
It is probably more useful to show the name of the function itself if
|
|
we have it, but there are some circumstances where we want to replace
|
|
with the pointer's name (i.e. an import function)."""
|
|
|
|
def name_lookup(addr: int, _: bool) -> Optional[str]:
|
|
return {
|
|
0x1234: "Hello",
|
|
0x4321: "xyz",
|
|
0x5555: "Test",
|
|
}.get(addr)
|
|
|
|
def bin_lookup(addr: int, _: int) -> Optional[bytes]:
|
|
return (
|
|
{
|
|
0x1234: b"\x55\x55\x00\x00",
|
|
0x4321: b"\x99\x99\x00\x00",
|
|
}
|
|
).get(addr)
|
|
|
|
p = ParseAsm(name_lookup=name_lookup, bin_lookup=bin_lookup)
|
|
|
|
# If we know the indirect address (0x5555)
|
|
# Arrow to indicate this is an indirect replacement
|
|
(_, op_str) = p.sanitize(mock_inst("call", "dword ptr [0x1234]"))
|
|
assert op_str == "dword ptr [->Test]"
|
|
|
|
# If we do not know the indirect address (0x9999)
|
|
(_, op_str) = p.sanitize(mock_inst("call", "dword ptr [0x4321]"))
|
|
assert op_str == "dword ptr [xyz]"
|
|
|
|
# If we can't read the indirect address
|
|
(_, op_str) = p.sanitize(mock_inst("call", "dword ptr [0x5555]"))
|
|
assert op_str == "dword ptr [Test]"
|