mirror of
				https://github.com/isledecomp/isle.git
				synced 2025-10-25 17:34:05 +00:00 
			
		
		
		
	Identify and handle jump tables (#732)
This commit is contained in:
		
							
								
								
									
										235
									
								
								tools/isledecomp/isledecomp/compare/asm/instgen.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										235
									
								
								tools/isledecomp/isledecomp/compare/asm/instgen.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,235 @@ | |||||||
|  | """Pre-parser for x86 instructions. Will identify data/jump tables used with | ||||||
|  | switch statements and local jump/call destinations.""" | ||||||
|  | import re | ||||||
|  | import bisect | ||||||
|  | import struct | ||||||
|  | from enum import Enum, auto | ||||||
|  | from collections import namedtuple | ||||||
|  | from typing import List, NamedTuple, Optional, Tuple, Union | ||||||
|  | from capstone import Cs, CS_ARCH_X86, CS_MODE_32 | ||||||
|  | from .const import JUMP_MNEMONICS | ||||||
|  | 
 | ||||||
|  | disassembler = Cs(CS_ARCH_X86, CS_MODE_32) | ||||||
|  | 
 | ||||||
|  | DisasmLiteInst = namedtuple("DisasmLiteInst", "address, size, mnemonic, op_str") | ||||||
|  | 
 | ||||||
|  | displacement_regex = re.compile(r".*\+ (0x[0-9a-f]+)\]") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class SectionType(Enum): | ||||||
|  |     CODE = auto() | ||||||
|  |     DATA_TAB = auto() | ||||||
|  |     ADDR_TAB = auto() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class FuncSection(NamedTuple): | ||||||
|  |     type: SectionType | ||||||
|  |     contents: List[Union[DisasmLiteInst, Tuple[str, int]]] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class InstructGen: | ||||||
|  |     # pylint: disable=too-many-instance-attributes | ||||||
|  |     def __init__(self, blob: bytes, start: int) -> None: | ||||||
|  |         self.blob = blob | ||||||
|  |         self.start = start | ||||||
|  |         self.end = len(blob) + start | ||||||
|  |         self.section_end: int = self.end | ||||||
|  |         self.code_tracks: List[List[DisasmLiteInst]] = [] | ||||||
|  | 
 | ||||||
|  |         # Todo: Could be refactored later | ||||||
|  |         self.cur_addr: int = 0 | ||||||
|  |         self.cur_section_type: SectionType = SectionType.CODE | ||||||
|  |         self.section_start = start | ||||||
|  | 
 | ||||||
|  |         self.sections: List[FuncSection] = [] | ||||||
|  | 
 | ||||||
|  |         self.confirmed_addrs = {} | ||||||
|  |         self.analysis() | ||||||
|  | 
 | ||||||
|  |     def _finish_section(self, type_: SectionType, stuff): | ||||||
|  |         sect = FuncSection(type_, stuff) | ||||||
|  |         self.sections.append(sect) | ||||||
|  | 
 | ||||||
|  |     def _insert_confirmed_addr(self, addr: int, type_: SectionType): | ||||||
|  |         # Ignore address outside the bounds of the function | ||||||
|  |         if not self.start <= addr < self.end: | ||||||
|  |             return | ||||||
|  | 
 | ||||||
|  |         self.confirmed_addrs[addr] = type_ | ||||||
|  | 
 | ||||||
|  |         # This newly inserted address might signal the end of this section. | ||||||
|  |         # For example, a jump table at the end of the function means we should | ||||||
|  |         # stop reading instructions once we hit that address. | ||||||
|  |         # However, if there is a jump table in between code sections, we might | ||||||
|  |         # read a jump to an address back to the beginning of the function | ||||||
|  |         # (e.g. a loop that spans the entire function) | ||||||
|  |         # so ignore this address because we have already passed it. | ||||||
|  |         if type_ != self.cur_section_type and addr > self.cur_addr: | ||||||
|  |             self.section_end = min(self.section_end, addr) | ||||||
|  | 
 | ||||||
|  |     def _next_section(self, addr: int) -> Optional[SectionType]: | ||||||
|  |         """We have reached the start of a new section. Tell what kind of | ||||||
|  |         data we are looking at (code or other) and how much we should read.""" | ||||||
|  | 
 | ||||||
|  |         # Assume the start of every function is code. | ||||||
|  |         if addr == self.start: | ||||||
|  |             self.section_end = self.end | ||||||
|  |             return SectionType.CODE | ||||||
|  | 
 | ||||||
|  |         # The start of a new section must be an address that we've seen. | ||||||
|  |         new_type = self.confirmed_addrs.get(addr) | ||||||
|  |         if new_type is None: | ||||||
|  |             return None | ||||||
|  | 
 | ||||||
|  |         self.cur_section_type = new_type | ||||||
|  | 
 | ||||||
|  |         # The confirmed addrs dict is sorted by insertion order | ||||||
|  |         # i.e. the order in which we read the addresses | ||||||
|  |         # So we have to sort and then find the next item | ||||||
|  |         # to see where this section should end. | ||||||
|  | 
 | ||||||
|  |         # If we are in a CODE section, ignore contiguous CODE addresses. | ||||||
|  |         # These are not the start of a new section. | ||||||
|  |         # However: if we are not in CODE, any upcoming address is a new section. | ||||||
|  |         # Do this so we can detect contiguous non-CODE sections. | ||||||
|  |         confirmed = [ | ||||||
|  |             conf_addr | ||||||
|  |             for (conf_addr, conf_type) in sorted(self.confirmed_addrs.items()) | ||||||
|  |             if self.cur_section_type != SectionType.CODE | ||||||
|  |             or conf_type != self.cur_section_type | ||||||
|  |         ] | ||||||
|  | 
 | ||||||
|  |         index = bisect.bisect_right(confirmed, addr) | ||||||
|  |         if index < len(confirmed): | ||||||
|  |             self.section_end = confirmed[index] | ||||||
|  |         else: | ||||||
|  |             self.section_end = self.end | ||||||
|  | 
 | ||||||
|  |         return new_type | ||||||
|  | 
 | ||||||
|  |     def _get_code_for(self, addr: int) -> List[DisasmLiteInst]: | ||||||
|  |         """Start disassembling at the given address.""" | ||||||
|  |         # If we are reading a code block beyond the first, see if we already | ||||||
|  |         # have disassembled instructions beginning at the specified address. | ||||||
|  |         # For a CODE/ADDR/CODE function, we might get lucky and produce the | ||||||
|  |         # correct instruction after the jump table's junk instructions. | ||||||
|  |         for track in self.code_tracks: | ||||||
|  |             for i, inst in enumerate(track): | ||||||
|  |                 if inst.address == addr: | ||||||
|  |                     return track[i:] | ||||||
|  | 
 | ||||||
|  |         # If we are here, we don't have the instructions. | ||||||
|  |         # Todo: Could try to be clever here and disassemble only | ||||||
|  |         # as much as we probably need (i.e. if a jump table is between CODE | ||||||
|  |         # blocks, there are probably only a few bad instructions after the | ||||||
|  |         # jump table is finished. We could disassemble up to the next verified | ||||||
|  |         # code address and stitch it together) | ||||||
|  | 
 | ||||||
|  |         blob_cropped = self.blob[addr - self.start :] | ||||||
|  |         instructions = [ | ||||||
|  |             DisasmLiteInst(*inst) | ||||||
|  |             for inst in disassembler.disasm_lite(blob_cropped, addr) | ||||||
|  |         ] | ||||||
|  |         self.code_tracks.append(instructions) | ||||||
|  |         return instructions | ||||||
|  | 
 | ||||||
|  |     def _handle_jump(self, inst: DisasmLiteInst): | ||||||
|  |         # If this is a regular jump and its destination is within the | ||||||
|  |         # bounds of the binary data (i.e. presumed function size) | ||||||
|  |         # add it to our list of confirmed addresses. | ||||||
|  |         if inst.op_str[0] == "0": | ||||||
|  |             value = int(inst.op_str, 16) | ||||||
|  |             self._insert_confirmed_addr(value, SectionType.CODE) | ||||||
|  | 
 | ||||||
|  |         # If this is jumping into a table of addresses, save the destination | ||||||
|  |         elif (match := displacement_regex.match(inst.op_str)) is not None: | ||||||
|  |             value = int(match.group(1), 16) | ||||||
|  |             self._insert_confirmed_addr(value, SectionType.ADDR_TAB) | ||||||
|  | 
 | ||||||
|  |     def analysis(self): | ||||||
|  |         self.cur_addr = self.start | ||||||
|  | 
 | ||||||
|  |         while (sect_type := self._next_section(self.cur_addr)) is not None: | ||||||
|  |             self.section_start = self.cur_addr | ||||||
|  | 
 | ||||||
|  |             if sect_type == SectionType.CODE: | ||||||
|  |                 instructions = self._get_code_for(self.cur_addr) | ||||||
|  | 
 | ||||||
|  |                 # If we didn't get any instructions back, something is wrong. | ||||||
|  |                 # i.e. We can only read part of the full instruction that is up next. | ||||||
|  |                 if len(instructions) == 0: | ||||||
|  |                     # Nudge the current addr so we will eventually move on to the | ||||||
|  |                     # next section. | ||||||
|  |                     # Todo: Maybe we could just call it quits here | ||||||
|  |                     self.cur_addr += 1 | ||||||
|  |                     break | ||||||
|  | 
 | ||||||
|  |                 for inst in instructions: | ||||||
|  |                     # section_end is updated as we read instructions. | ||||||
|  |                     # If we are into a jump/data table and would read | ||||||
|  |                     # a junk instruction, stop here. | ||||||
|  |                     if self.cur_addr >= self.section_end: | ||||||
|  |                         break | ||||||
|  | 
 | ||||||
|  |                     # print(f"{inst.address:x} : {inst.mnemonic} {inst.op_str}") | ||||||
|  | 
 | ||||||
|  |                     if inst.mnemonic in JUMP_MNEMONICS: | ||||||
|  |                         self._handle_jump(inst) | ||||||
|  |                         # Todo: log calls too (unwind section) | ||||||
|  |                     elif inst.mnemonic == "mov": | ||||||
|  |                         # Todo: maintain pairing of data/jump tables | ||||||
|  |                         if (match := displacement_regex.match(inst.op_str)) is not None: | ||||||
|  |                             value = int(match.group(1), 16) | ||||||
|  |                             self._insert_confirmed_addr(value, SectionType.DATA_TAB) | ||||||
|  | 
 | ||||||
|  |                     # Do this instead of copying instruction address. | ||||||
|  |                     # If there is only one instruction, we would get stuck here. | ||||||
|  |                     self.cur_addr += inst.size | ||||||
|  | 
 | ||||||
|  |                 # End of for loop on instructions. | ||||||
|  |                 # We are at the end of the section or the entire function. | ||||||
|  |                 # Cut out only the valid instructions for this section | ||||||
|  |                 # and save it for later. | ||||||
|  | 
 | ||||||
|  |                 # Todo: don't need to iter on every instruction here. | ||||||
|  |                 # They are already in order. | ||||||
|  |                 instruction_slice = [ | ||||||
|  |                     inst for inst in instructions if inst.address < self.section_end | ||||||
|  |                 ] | ||||||
|  |                 self._finish_section(SectionType.CODE, instruction_slice) | ||||||
|  | 
 | ||||||
|  |             elif sect_type == SectionType.ADDR_TAB: | ||||||
|  |                 # Clamp to multiple of 4 (dwords) | ||||||
|  |                 read_size = ((self.section_end - self.cur_addr) // 4) * 4 | ||||||
|  |                 offsets = range(self.section_start, self.section_start + read_size, 4) | ||||||
|  |                 dwords = self.blob[ | ||||||
|  |                     self.cur_addr - self.start : self.cur_addr - self.start + read_size | ||||||
|  |                 ] | ||||||
|  |                 addrs = [addr for addr, in struct.iter_unpack("<L", dwords)] | ||||||
|  |                 for addr in addrs: | ||||||
|  |                     # Todo: the fact that these are jump table destinations | ||||||
|  |                     # should factor into the label name. | ||||||
|  |                     self._insert_confirmed_addr(addr, SectionType.CODE) | ||||||
|  | 
 | ||||||
|  |                 jump_table = list(zip(offsets, addrs)) | ||||||
|  |                 # for (t0,t1) in jump_table: | ||||||
|  |                 #     print(f"{t0:x} : --> {t1:x}") | ||||||
|  | 
 | ||||||
|  |                 self._finish_section(SectionType.ADDR_TAB, jump_table) | ||||||
|  |                 self.cur_addr = self.section_end | ||||||
|  | 
 | ||||||
|  |             else: | ||||||
|  |                 # Todo: variable data size? | ||||||
|  |                 read_size = self.section_end - self.cur_addr | ||||||
|  |                 offsets = range(self.section_start, self.section_start + read_size) | ||||||
|  |                 bytes_ = self.blob[ | ||||||
|  |                     self.cur_addr - self.start : self.cur_addr - self.start + read_size | ||||||
|  |                 ] | ||||||
|  |                 data = [b for b, in struct.iter_unpack("<B", bytes_)] | ||||||
|  | 
 | ||||||
|  |                 data_table = list(zip(offsets, data)) | ||||||
|  |                 # for (t0,t1) in data_table: | ||||||
|  |                 #     print(f"{t0:x} : value {t1:02x}") | ||||||
|  | 
 | ||||||
|  |                 self._finish_section(SectionType.DATA_TAB, data_table) | ||||||
|  |                 self.cur_addr = self.section_end | ||||||
| @@ -11,13 +11,13 @@ from functools import cache | |||||||
| from typing import Callable, List, Optional, Tuple | from typing import Callable, List, Optional, Tuple | ||||||
| from collections import namedtuple | from collections import namedtuple | ||||||
| from isledecomp.bin import InvalidVirtualAddressError | from isledecomp.bin import InvalidVirtualAddressError | ||||||
| from capstone import Cs, CS_ARCH_X86, CS_MODE_32 |  | ||||||
| from .const import JUMP_MNEMONICS, SINGLE_OPERAND_INSTS | from .const import JUMP_MNEMONICS, SINGLE_OPERAND_INSTS | ||||||
| 
 | from .instgen import InstructGen, SectionType | ||||||
| disassembler = Cs(CS_ARCH_X86, CS_MODE_32) |  | ||||||
| 
 | 
 | ||||||
| ptr_replace_regex = re.compile(r"\[(0x[0-9a-f]+)\]") | ptr_replace_regex = re.compile(r"\[(0x[0-9a-f]+)\]") | ||||||
| 
 | 
 | ||||||
|  | displace_replace_regex = re.compile(r"\+ (0x[0-9a-f]+)\]") | ||||||
|  | 
 | ||||||
| # For matching an immediate value on its own. | # For matching an immediate value on its own. | ||||||
| # Preceded by start-of-string (first operand) or comma-space (second operand) | # Preceded by start-of-string (first operand) or comma-space (second operand) | ||||||
| immediate_replace_regex = re.compile(r"(?:^|, )(0x[0-9a-f]+)") | immediate_replace_regex = re.compile(r"(?:^|, )(0x[0-9a-f]+)") | ||||||
| @@ -172,16 +172,25 @@ class ParseAsm: | |||||||
|         else: |         else: | ||||||
|             op_str = ptr_replace_regex.sub(self.hex_replace_always, inst.op_str) |             op_str = ptr_replace_regex.sub(self.hex_replace_always, inst.op_str) | ||||||
| 
 | 
 | ||||||
|  |             # We only want relocated addresses for pointer displacement. | ||||||
|  |             # i.e. ptr [register + something] | ||||||
|  |             # Otherwise we would use a placeholder for every stack variable, | ||||||
|  |             # vtable call, or this->member access. | ||||||
|  |             op_str = displace_replace_regex.sub(self.hex_replace_relocated, op_str) | ||||||
|  | 
 | ||||||
|         op_str = immediate_replace_regex.sub(self.hex_replace_relocated, op_str) |         op_str = immediate_replace_regex.sub(self.hex_replace_relocated, op_str) | ||||||
|         return (inst.mnemonic, op_str) |         return (inst.mnemonic, op_str) | ||||||
| 
 | 
 | ||||||
|     def parse_asm(self, data: bytes, start_addr: Optional[int] = 0) -> List[str]: |     def parse_asm(self, data: bytes, start_addr: Optional[int] = 0) -> List[str]: | ||||||
|         asm = [] |         asm = [] | ||||||
| 
 | 
 | ||||||
|         for raw_inst in disassembler.disasm_lite(data, start_addr): |         ig = InstructGen(data, start_addr) | ||||||
|  | 
 | ||||||
|  |         for sect_type, sect_contents in ig.sections: | ||||||
|  |             if sect_type == SectionType.CODE: | ||||||
|  |                 for inst in sect_contents: | ||||||
|                     # Use heuristics to disregard some differences that aren't representative |                     # Use heuristics to disregard some differences that aren't representative | ||||||
|                     # of the accuracy of a function (e.g. global offsets) |                     # of the accuracy of a function (e.g. global offsets) | ||||||
|             inst = DisasmLiteInst(*raw_inst) |  | ||||||
| 
 | 
 | ||||||
|                     # If there is no pointer or immediate value in the op_str, |                     # If there is no pointer or immediate value in the op_str, | ||||||
|                     # there is nothing to sanitize. |                     # there is nothing to sanitize. | ||||||
| @@ -201,5 +210,14 @@ class ParseAsm: | |||||||
| 
 | 
 | ||||||
|                     # mnemonic + " " + op_str |                     # mnemonic + " " + op_str | ||||||
|                     asm.append((hex(inst.address), " ".join(result))) |                     asm.append((hex(inst.address), " ".join(result))) | ||||||
|  |             elif sect_type == SectionType.ADDR_TAB: | ||||||
|  |                 asm.append(("", "Jump table:")) | ||||||
|  |                 for i, (ofs, _) in enumerate(sect_contents): | ||||||
|  |                     asm.append((hex(ofs), f"Jump_dest_{i}")) | ||||||
|  | 
 | ||||||
|  |             elif sect_type == SectionType.DATA_TAB: | ||||||
|  |                 asm.append(("", "Data table:")) | ||||||
|  |                 for ofs, b in sect_contents: | ||||||
|  |                     asm.append((hex(ofs), hex(b))) | ||||||
| 
 | 
 | ||||||
|         return asm |         return asm | ||||||
|   | |||||||
							
								
								
									
										212
									
								
								tools/isledecomp/tests/test_instgen.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										212
									
								
								tools/isledecomp/tests/test_instgen.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,212 @@ | |||||||
|  | from isledecomp.compare.asm.instgen import InstructGen, SectionType | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_ret(): | ||||||
|  |     """Make sure we can handle a function with one instruction.""" | ||||||
|  |     ig = InstructGen(b"\xc3", 0) | ||||||
|  |     assert len(ig.sections) == 1 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | SCORE_NOTIFY = ( | ||||||
|  |     b"\x53\x56\x57\x8b\xd9\x33\xff\x8b\x74\x24\x10\x56\xe8\xbf\xe1\x01" | ||||||
|  |     b"\x00\x80\xbb\xf6\x00\x00\x00\x00\x0f\x84\x9c\x00\x00\x00\x8b\x4e" | ||||||
|  |     b"\x04\x49\x83\xf9\x17\x0f\x87\x8f\x00\x00\x00\x33\xc0\x8a\x81\xec" | ||||||
|  |     b"\x14\x00\x10\xff\x24\x85\xd4\x14\x00\x10\x8b\xcb\xbf\x01\x00\x00" | ||||||
|  |     b"\x00\xe8\x7a\x05\x00\x00\x8b\xc7\x5f\x5e\x5b\xc2\x04\x00\x56\x8b" | ||||||
|  |     b"\xcb\xe8\xaa\x00\x00\x00\x8b\xf8\x8b\xc7\x5f\x5e\x5b\xc2\x04\x00" | ||||||
|  |     b"\x80\x7e\x18\x20\x75\x07\x8b\xcb\xe8\xc3\xfe\xff\xff\xbf\x01\x00" | ||||||
|  |     b"\x00\x00\x8b\xc7\x5f\x5e\x5b\xc2\x04\x00\x56\x8b\xcb\xe8\x3e\x02" | ||||||
|  |     b"\x00\x00\x8b\xf8\x8b\xc7\x5f\x5e\x5b\xc2\x04\x00\x6a\x09\xa1\x4c" | ||||||
|  |     b"\x45\x0f\x10\x6a\x07\x50\xe8\x35\x45\x01\x00\x83\xc4\x0c\x8b\x83" | ||||||
|  |     b"\xf8\x00\x00\x00\x85\xc0\x74\x0d\x50\xe8\xa2\x42\x01\x00\x8b\xc8" | ||||||
|  |     b"\xe8\x9b\x9b\x03\x00\xbf\x01\x00\x00\x00\x8b\xc7\x5f\x5e\x5b\xc2" | ||||||
|  |     b"\x04\x00\x8b\xff\x4a\x14\x00\x10\x5e\x14\x00\x10\x70\x14\x00\x10" | ||||||
|  |     b"\x8a\x14\x00\x10\x9c\x14\x00\x10\xca\x14\x00\x10\x00\x01\x05\x05" | ||||||
|  |     b"\x05\x05\x02\x05\x05\x05\x05\x05\x05\x05\x05\x05\x03\x05\x05\x05" | ||||||
|  |     b"\x05\x05\x05\x04\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_score_notify(): | ||||||
|  |     """Score::Notify function from 0x10001410 in LEGO1. | ||||||
|  |     Good representative function for jump table (at 0x100014d4) | ||||||
|  |     and switch data (at 0x100014ec).""" | ||||||
|  |     ig = InstructGen(SCORE_NOTIFY, 0x10001410) | ||||||
|  | 
 | ||||||
|  |     # Did we get everything? | ||||||
|  |     assert len(ig.sections) == 3 | ||||||
|  |     types_only = tuple(s.type for s in ig.sections) | ||||||
|  |     assert types_only == (SectionType.CODE, SectionType.ADDR_TAB, SectionType.DATA_TAB) | ||||||
|  | 
 | ||||||
|  |     # CODE section stopped at correct place? | ||||||
|  |     instructions = ig.sections[0].contents | ||||||
|  |     assert instructions[-1].address == 0x100014D2 | ||||||
|  |     # n.b. 0x100014d2 is the dummy instruction `mov edi, edi` | ||||||
|  |     # Ghidra does more thorough analysis and ignores this. | ||||||
|  |     # The last real instruction should be at 0x100014cf. Not a big deal | ||||||
|  |     # to include this because it is not junk data. | ||||||
|  | 
 | ||||||
|  |     # 6 switch addresses | ||||||
|  |     assert len(ig.sections[1].contents) == 6 | ||||||
|  | 
 | ||||||
|  |     # TODO: The data table at the end includes all of the 0xCC padding bytes. | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | SMACK_CASE = ( | ||||||
|  |     # LEGO1: 0x100cdc43 (modified so jump table points at +0x1016) | ||||||
|  |     b"\x2e\xff\x24\x8d\x16\x10\x00\x00" | ||||||
|  |     # LEGO1: 0x100cdb62 (instructions before and after jump table) | ||||||
|  |     b"\x8b\xf8\xeb\x1a\x87\xdb\x87\xc9\x87\xdb\x87\xc9\x87\xdb\x50\xdc" | ||||||
|  |     b"\x0c\x10\xd0\xe2\x0c\x10\xb0\xe8\x0c\x10\x50\xe9\x0c\x10\xa0\x10" | ||||||
|  |     b"\x27\x10\x10\x3c\x11\x77\x17\x8a\xc8" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_smack_case(): | ||||||
|  |     """Case where we have code / jump table / code. | ||||||
|  |     Need to properly separate code sections, eliminate junk instructions | ||||||
|  |     and continue disassembling at the proper address following the data.""" | ||||||
|  |     ig = InstructGen(SMACK_CASE, 0x1000) | ||||||
|  |     assert len(ig.sections) == 3 | ||||||
|  |     assert ig.sections[0].type == ig.sections[2].type == SectionType.CODE | ||||||
|  | 
 | ||||||
|  |     # Make sure we captured the instruction immediately after | ||||||
|  |     assert ig.sections[2].contents[0].mnemonic == "mov" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # BETA10 0x1004c9cc | ||||||
|  | BETA_FUNC = ( | ||||||
|  |     b"\x55\x8b\xec\x83\xec\x08\x53\x56\x57\x89\x4d\xfc\x8b\x45\xfc\x33" | ||||||
|  |     b"\xc9\x8a\x88\x19\x02\x00\x00\x89\x4d\xf8\xe9\x1e\x00\x00\x00\xe9" | ||||||
|  |     b"\x41\x00\x00\x00\xe9\x3c\x00\x00\x00\xe9\x37\x00\x00\x00\xe9\x32" | ||||||
|  |     b"\x00\x00\x00\xe9\x2d\x00\x00\x00\xe9\x28\x00\x00\x00\x83\x7d\xf8" | ||||||
|  |     b"\x04\x0f\x87\x1e\x00\x00\x00\x8b\x45\xf8\xff\x24\x85\x1d\xca\x04" | ||||||
|  |     b"\x10\xeb\xc9\x04\x10\xf0\xc9\x04\x10\xf5\xc9\x04\x10\xfa\xc9\x04" | ||||||
|  |     b"\x10\xff\xc9\x04\x10\xb0\x01\xe9\x00\x00\x00\x00\x5f\x5e\x5b\xc9" | ||||||
|  |     b"\xc2\x04\x00" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_beta_case(): | ||||||
|  |     """Complete (and short) function with CODE / ADDR / CODE""" | ||||||
|  |     ig = InstructGen(BETA_FUNC, 0x1004C9CC) | ||||||
|  |     # The JMP into the jump table immediately precedes the jump table. | ||||||
|  |     # We have to detect this and switch sections correctly or we will only | ||||||
|  |     # get 1 section. | ||||||
|  |     assert len(ig.sections) == 3 | ||||||
|  |     assert ig.sections[0].type == ig.sections[2].type == SectionType.CODE | ||||||
|  | 
 | ||||||
|  |     # Make sure we captured the instruction immediately after | ||||||
|  |     assert ig.sections[2].contents[0].mnemonic == "mov" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # LEGO1 0x1000fb50 | ||||||
|  | # TODO: The test data here is longer than it needs to be. | ||||||
|  | THUNK_TEST = ( | ||||||
|  |     b"\x2b\x49\xfc\xe9\x08\x00\x00\x00\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc" | ||||||
|  |     b"\x56\x8b\xf1\xe8\xd8\xc5\x00\x00\x8b\xce\xe8\xb1\xdc\x01\x00\xf6" | ||||||
|  |     b"\x44\x24\x08\x01\x74\x0c\x8d\x46\xe0\x50\xe8\xe1\x66\x07\x00\x83" | ||||||
|  |     b"\xc4\x04\x8d\x46\xe0\x5e\xc2\x04\x00\xcc\xcc\xcc\xcc\xcc\xcc\xcc" | ||||||
|  |     b"\x2b\x49\xfc\xe9\x08\x00\x00\x00\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc" | ||||||
|  |     b"\xb8\x7c\x05\x0f\x10\xc3\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc" | ||||||
|  |     b"\x2b\x49\xfc\xe9\x08\x00\x00\x00\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc" | ||||||
|  |     b"\x8b\x54" | ||||||
|  |     # The problem is here: the last two bytes are the start of the next | ||||||
|  |     # function 0x1000fbc0. This is not enough data to read an instruction. | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_thunk_case(): | ||||||
|  |     """Adjuster thunk incorrectly annotated. | ||||||
|  |     We are reading way more bytes than we should for this function.""" | ||||||
|  |     ig = InstructGen(THUNK_TEST, 0x1000FB50) | ||||||
|  |     # No switch cases here, so the only section is code. | ||||||
|  |     # This caused an infinite loop during testing so the goal is just to finish. | ||||||
|  |     assert len(ig.sections) == 1 | ||||||
|  | 
 | ||||||
|  |     # TODO: We might detect the 0xCC padding bytes and cut off the function. | ||||||
|  |     # If we did that, we would correctly read only 2 instructions. | ||||||
|  |     # assert len(ig.sections[0].contents) == 2 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # LEGO1 0x1006f080, Infocenter::HandleEndAction | ||||||
|  | HANDLE_END_ACTION = ( | ||||||
|  |     b"\x53\x56\x57\x8b\xf1\x8b\x5c\x24\x10\x8b\x0d\x84\x45\x0f\x10\x8b" | ||||||
|  |     b"\x7b\x0c\x8b\x47\x20\x39\x01\x75\x29\x81\x7f\x1c\xf3\x01\x00\x00" | ||||||
|  |     b"\x75\x20\xe8\x59\x66\xfa\xff\x6a\x00\x8b\x40\x18\x6a\x00\x6a\x10" | ||||||
|  |     b"\x50\xff\x15\x38\xb5\x10\x10\xb8\x01\x00\x00\x00\x5f\x5e\x5b\xc2" | ||||||
|  |     b"\x04\x00\x39\x46\x0c\x0f\x85\xa2\x00\x00\x00\x8b\x47\x1c\x83\xf8" | ||||||
|  |     b"\x28\x74\x18\x83\xf8\x29\x74\x13\x83\xf8\x2a\x74\x0e\x83\xf8\x2b" | ||||||
|  |     b"\x74\x09\x83\xf8\x2c\x0f\x85\x82\x00\x00\x00\x66\x8b\x86\xd4\x01" | ||||||
|  |     b"\x00\x00\x66\x85\xc0\x74\x09\x66\x48\x66\x89\x86\xd4\x01\x00\x00" | ||||||
|  |     b"\x66\x83\xbe\xd4\x01\x00\x00\x00\x75\x63\x6a\x0b\xe8\xff\x67\xfa" | ||||||
|  |     b"\xff\x66\x8b\x86\xfc\x00\x00\x00\x83\xc4\x04\x50\xe8\x3f\x66\xfa" | ||||||
|  |     b"\xff\x8b\xc8\xe8\x58\xa6\xfc\xff\x0f\xbf\x86\xfc\x00\x00\x00\x48" | ||||||
|  |     b"\x83\xf8\x04\x77\x2f\xff\x24\x85\x78\xf4\x06\x10\x68\x1d\x02\x00" | ||||||
|  |     b"\x00\xeb\x1a\x68\x1e\x02\x00\x00\xeb\x13\x68\x1f\x02\x00\x00\xeb" | ||||||
|  |     b"\x0c\x68\x20\x02\x00\x00\xeb\x05\x68\x21\x02\x00\x00\x8b\xce\xe8" | ||||||
|  |     b"\x9c\x21\x00\x00\x6a\x01\x8b\xce\xe8\x53\x1c\x00\x00\x8d\x8e\x0c" | ||||||
|  |     b"\x01\x00\x00\x53\x8b\x01\xff\x50\x04\x85\xc0\x0f\x85\xef\x02\x00" | ||||||
|  |     b"\x00\x8b\x56\x0c\x8b\x4f\x20\x3b\xd1\x74\x0e\x8b\x1d\x74\x45\x0f" | ||||||
|  |     b"\x10\x39\x0b\x0f\x85\xd7\x02\x00\x00\x81\x7f\x1c\x02\x02\x00\x00" | ||||||
|  |     b"\x75\x1a\x6a\x00\x52\x6a\x10\xe8\xa4\x65\xfa\xff\x8b\xc8\xe8\x0d" | ||||||
|  |     b"\xa2\xfb\xff\x66\xc7\x86\xd6\x01\x00\x00\x00\x00\x8b\x96\x00\x01" | ||||||
|  |     b"\x00\x00\x8d\x42\x74\x8b\x18\x83\xfb\x0c\x0f\x87\x9b\x02\x00\x00" | ||||||
|  |     b"\x33\xc9\x8a\x8b\xac\xf4\x06\x10\xff\x24\x8d\x8c\xf4\x06\x10\x8b" | ||||||
|  |     b"\x86\x08\x01\x00\x00\x83\xf8\x05\x77\x07\xff\x24\x85\xbc\xf4\x06" | ||||||
|  |     b"\x10\x8b\xce\xe8\xb8\x1a\x00\x00\x8b\x86\x00\x01\x00\x00\x68\xf4" | ||||||
|  |     b"\x01\x00\x00\x8b\xce\xc7\x40\x74\x0b\x00\x00\x00\xe8\xef\x20\x00" | ||||||
|  |     b"\x00\x8b\x86\x00\x01\x00\x00\xc7\x86\x08\x01\x00\x00\xff\xff\xff" | ||||||
|  |     b"\xff\x83\x78\x78\x00\x0f\x85\x40\x02\x00\x00\xb8\x01\x00\x00\x00" | ||||||
|  |     b"\x5f\x66\xc7\x86\xd2\x01\x00\x00\x01\x00\x5e\x5b\xc2\x04\x00\x6a" | ||||||
|  |     b"\x00\x8b\xce\x6a\x01\xe8\xd6\x19\x00\x00\xb8\x01\x00\x00\x00\x5f" | ||||||
|  |     b"\x5e\x5b\xc2\x04\x00\x6a\x01\x8b\xce\x6a\x02\xe8\xc0\x19\x00\x00" | ||||||
|  |     b"\xb8\x01\x00\x00\x00\x5f\x5e\x5b\xc2\x04\x00\x8b\xce\xe8\x3e\x1a" | ||||||
|  |     b"\x00\x00\x8b\x86\x00\x01\x00\x00\x68\x1c\x02\x00\x00\x8b\xce\xc7" | ||||||
|  |     b"\x40\x74\x0b\x00\x00\x00\xe8\x75\x20\x00\x00\xb8\x01\x00\x00\x00" | ||||||
|  |     b"\x5f\xc7\x86\x08\x01\x00\x00\xff\xff\xff\xff\x5e\x5b\xc2\x04\x00" | ||||||
|  |     b"\x8b\xce\xe8\x09\x1a\x00\x00\x8b\x86\x00\x01\x00\x00\x68\x1b\x02" | ||||||
|  |     b"\x00\x00\x8b\xce\xc7\x40\x74\x0b\x00\x00\x00\xe8\x40\x20\x00\x00" | ||||||
|  |     b"\xb8\x01\x00\x00\x00\x5f\xc7\x86\x08\x01\x00\x00\xff\xff\xff\xff" | ||||||
|  |     b"\x5e\x5b\xc2\x04\x00\xc7\x00\x0b\x00\x00\x00\x8b\x86\x08\x01\x00" | ||||||
|  |     b"\x00\x83\xf8\x04\x74\x0c\x83\xf8\x05\x74\x0e\x68\xf4\x01\x00\x00" | ||||||
|  |     b"\xeb\x0c\x68\x1c\x02\x00\x00\xeb\x05\x68\x1b\x02\x00\x00\x8b\xce" | ||||||
|  |     b"\xe8\xfb\x1f\x00\x00\xb8\x01\x00\x00\x00\x5f\xc7\x86\x08\x01\x00" | ||||||
|  |     b"\x00\xff\xff\xff\xff\x5e\x5b\xc2\x04\x00\x6a\x00\xa1\xa0\x76\x0f" | ||||||
|  |     b"\x10\x50\xe8\x39\x65\xfa\xff\x83\xc4\x08\xa1\xa4\x76\x0f\x10\x6a" | ||||||
|  |     b"\x00\x50\xe8\x29\x65\xfa\xff\x83\xc4\x08\xe8\xf1\x63\xfa\xff\x8b" | ||||||
|  |     b"\xc8\xe8\x6a\x02\x01\x00\xb8\x01\x00\x00\x00\x5f\x5e\x5b\xc2\x04" | ||||||
|  |     b"\x00\x8b\x47\x1c\x83\xf8\x46\x74\x09\x83\xf8\x47\x0f\x85\x09\x01" | ||||||
|  |     b"\x00\x00\x6a\x00\x6a\x00\x6a\x32\x6a\x03\xe8\x91\x65\xfa\xff\x8b" | ||||||
|  |     b"\xc8\xe8\xfa\xc7\xfd\xff\x8b\x86\x00\x01\x00\x00\x5f\x5e\x5b\xc7" | ||||||
|  |     b"\x40\x74\x0e\x00\x00\x00\xb8\x01\x00\x00\x00\xc2\x04\x00\x8b\x47" | ||||||
|  |     b"\x1c\x39\x86\xf8\x00\x00\x00\x0f\x85\xce\x00\x00\x00\xe8\xbe\x63" | ||||||
|  |     b"\xfa\xff\x83\x78\x10\x02\x74\x19\x66\x8b\x86\xfc\x00\x00\x00\x66" | ||||||
|  |     b"\x85\xc0\x74\x0d\x50\xe8\xa6\x63\xfa\xff\x8b\xc8\xe8\xbf\xa3\xfc" | ||||||
|  |     b"\xff\x6a\x00\x6a\x00\x6a\x32\x6a\x03\xe8\x32\x65\xfa\xff\x8b\xc8" | ||||||
|  |     b"\xe8\x9b\xc7\xfd\xff\x8b\x86\x00\x01\x00\x00\x5f\x5e\x5b\xc7\x40" | ||||||
|  |     b"\x74\x0e\x00\x00\x00\xb8\x01\x00\x00\x00\xc2\x04\x00\x83\x7a\x78" | ||||||
|  |     b"\x00\x75\x32\x8b\x86\xf8\x00\x00\x00\x83\xf8\x28\x74\x27\x83\xf8" | ||||||
|  |     b"\x29\x74\x22\x83\xf8\x2a\x74\x1d\x83\xf8\x2b\x74\x18\x83\xf8\x2c" | ||||||
|  |     b"\x74\x13\x66\xc7\x86\xd0\x01\x00\x00\x01\x00\x6a\x0b\xe8\xee\x64" | ||||||
|  |     b"\xfa\xff\x83\xc4\x04\x8b\x86\x00\x01\x00\x00\x6a\x01\x68\xdc\x44" | ||||||
|  |     b"\x0f\x10\xc7\x40\x74\x02\x00\x00\x00\xe8\x22\x64\xfa\xff\x83\xc4" | ||||||
|  |     b"\x08\xb8\x01\x00\x00\x00\x5f\x5e\x5b\xc2\x04\x00\x8b\x47\x1c\x39" | ||||||
|  |     b"\x86\xf8\x00\x00\x00\x75\x14\x6a\x00\x6a\x00\x6a\x32\x6a\x03\xe8" | ||||||
|  |     b"\x9c\x64\xfa\xff\x8b\xc8\xe8\x05\xc7\xfd\xff\xb8\x01\x00\x00\x00" | ||||||
|  |     b"\x5f\x5e\x5b\xc2\x04\x00\x8b\xff\x3c\xf1\x06\x10\x43\xf1\x06\x10" | ||||||
|  |     b"\x4a\xf1\x06\x10\x51\xf1\x06\x10\x58\xf1\x06\x10\xdf\xf1\x06\x10" | ||||||
|  |     b"\xd5\xf2\x06\x10\x1a\xf3\x06\x10\x51\xf3\x06\x10\x8e\xf3\x06\x10" | ||||||
|  |     b"\xed\xf3\x06\x10\x4c\xf4\x06\x10\x6b\xf4\x06\x10\x00\x01\x02\x07" | ||||||
|  |     b"\x03\x04\x07\x07\x07\x07\x07\x05\x06\x8d\x49\x00\x3f\xf2\x06\x10" | ||||||
|  |     b"\x55\xf2\x06\x10\xf1\xf1\x06\x10\xf1\xf1\x06\x10\x6b\xf2\x06\x10" | ||||||
|  |     b"\xa0\xf2\x06\x10\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc\xcc" | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_action_case(): | ||||||
|  |     """3 switches: 3 jump tables, 1 data table""" | ||||||
|  |     ig = InstructGen(HANDLE_END_ACTION, 0x1006F080) | ||||||
|  |     # Two of the jump tables (0x1006f478 with 5, 0x1006f48c with 8) | ||||||
|  |     # are contiguous. | ||||||
|  |     assert len(ig.sections) == 5 | ||||||
| @@ -81,13 +81,23 @@ def test_jump_displacement(): | |||||||
|     assert op_str == "-0x2" |     assert op_str == "-0x2" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @pytest.mark.xfail(reason="Not implemented yet") |  | ||||||
| def test_jmp_table(): | def test_jmp_table(): | ||||||
|     """Should detect the characteristic jump table instruction |     """To ignore cases where it would be inappropriate to replace pointer | ||||||
|     (for a switch statement) and use placeholder.""" |     displacement (i.e. the vast majority of them) we require the address | ||||||
|  |     to be relocated. This excludes any address less than the imagebase.""" | ||||||
|     p = ParseAsm() |     p = ParseAsm() | ||||||
|     inst = mock_inst("jmp", "dword ptr [eax*4 + 0x5555]") |     inst = mock_inst("jmp", "dword ptr [eax*4 + 0x5555]") | ||||||
|     (_, op_str) = p.sanitize(inst) |     (_, op_str) = p.sanitize(inst) | ||||||
|  |     # i.e. no change | ||||||
|  |     assert op_str == "dword ptr [eax*4 + 0x5555]" | ||||||
|  | 
 | ||||||
|  |     def relocate_lookup(addr: int) -> bool: | ||||||
|  |         return addr == 0x5555 | ||||||
|  | 
 | ||||||
|  |     # Now add the relocation lookup | ||||||
|  |     p = ParseAsm(relocate_lookup=relocate_lookup) | ||||||
|  |     (_, op_str) = p.sanitize(inst) | ||||||
|  |     # Should replace it now | ||||||
|     assert op_str == "dword ptr [eax*4 + <OFFSET1>]" |     assert op_str == "dword ptr [eax*4 + <OFFSET1>]" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 MS
					MS