mirror of
				https://github.com/isledecomp/isle.git
				synced 2025-10-26 01:44:19 +00:00 
			
		
		
		
	Python Linting and Code Formatting (#298)
* Create common print_diff function * Add pylint and black * Fix linting, move classes to utils * Add black/pylint to github actions * Fix linting * Move Bin and SymInfo into their own files * Split out format * Tidy up workdlows and pip, add readme * Lint tests, add tests to readme
This commit is contained in:
		| @@ -0,0 +1,5 @@ | ||||
| from .bin import * | ||||
| from .dir import * | ||||
| from .parser import * | ||||
| from .syminfo import * | ||||
| from .utils import * | ||||
|   | ||||
							
								
								
									
										47
									
								
								tools/isledecomp/isledecomp/bin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								tools/isledecomp/isledecomp/bin.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,47 @@ | ||||
| import struct | ||||
| 
 | ||||
| 
 | ||||
| # Declare a class that can automatically convert virtual executable addresses | ||||
| # to file addresses | ||||
| class Bin: | ||||
|     def __init__(self, filename, logger): | ||||
|         self.logger = logger | ||||
|         self.logger.debug('Parsing headers of "%s"... ', filename) | ||||
|         self.filename = filename | ||||
|         self.file = None | ||||
|         self.imagebase = None | ||||
|         self.textvirt = None | ||||
|         self.textraw = None | ||||
| 
 | ||||
|     def __enter__(self): | ||||
|         self.logger.debug(f"Bin {self.filename} Enter") | ||||
|         self.file = open(self.filename, "rb") | ||||
| 
 | ||||
|         # HACK: Strictly, we should be parsing the header, but we know where | ||||
|         #      everything is in these two files so we just jump straight there | ||||
| 
 | ||||
|         # Read ImageBase | ||||
|         self.file.seek(0xB4) | ||||
|         (self.imagebase,) = struct.unpack("<i", self.file.read(4)) | ||||
| 
 | ||||
|         # Read .text VirtualAddress | ||||
|         self.file.seek(0x184) | ||||
|         (self.textvirt,) = struct.unpack("<i", self.file.read(4)) | ||||
| 
 | ||||
|         # Read .text PointerToRawData | ||||
|         self.file.seek(0x18C) | ||||
|         (self.textraw,) = struct.unpack("<i", self.file.read(4)) | ||||
|         self.logger.debug("... Parsing finished") | ||||
|         return self | ||||
| 
 | ||||
|     def __exit__(self, exc_type, exc_value, exc_traceback): | ||||
|         self.logger.debug(f"Bin {self.filename} Exit") | ||||
|         if self.file: | ||||
|             self.file.close() | ||||
| 
 | ||||
|     def get_addr(self, virt): | ||||
|         return virt - self.imagebase - self.textvirt + self.textraw | ||||
| 
 | ||||
|     def read(self, offset, size): | ||||
|         self.file.seek(self.get_addr(offset)) | ||||
|         return self.file.read(size) | ||||
| @@ -1,21 +1,63 @@ | ||||
| import os | ||||
| import subprocess | ||||
| import sys | ||||
| from typing import Iterator | ||||
| 
 | ||||
| 
 | ||||
| class WinePathConverter: | ||||
|     def __init__(self, unix_cwd): | ||||
|         self.unix_cwd = unix_cwd | ||||
|         self.win_cwd = self._call_winepath_unix2win(self.unix_cwd) | ||||
| 
 | ||||
|     def get_wine_path(self, unix_fn: str) -> str: | ||||
|         if unix_fn.startswith("./"): | ||||
|             return self.win_cwd + "\\" + unix_fn[2:].replace("/", "\\") | ||||
|         if unix_fn.startswith(self.unix_cwd): | ||||
|             return ( | ||||
|                 self.win_cwd | ||||
|                 + "\\" | ||||
|                 + unix_fn.removeprefix(self.unix_cwd).replace("/", "\\").lstrip("\\") | ||||
|             ) | ||||
|         return self._call_winepath_unix2win(unix_fn) | ||||
| 
 | ||||
|     def get_unix_path(self, win_fn: str) -> str: | ||||
|         if win_fn.startswith(".\\") or win_fn.startswith("./"): | ||||
|             return self.unix_cwd + "/" + win_fn[2:].replace("\\", "/") | ||||
|         if win_fn.startswith(self.win_cwd): | ||||
|             return ( | ||||
|                 self.unix_cwd | ||||
|                 + "/" | ||||
|                 + win_fn.removeprefix(self.win_cwd).replace("\\", "/") | ||||
|             ) | ||||
|         return self._call_winepath_win2unix(win_fn) | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _call_winepath_unix2win(fn: str) -> str: | ||||
|         return subprocess.check_output(["winepath", "-w", fn], text=True).strip() | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def _call_winepath_win2unix(fn: str) -> str: | ||||
|         return subprocess.check_output(["winepath", fn], text=True).strip() | ||||
| 
 | ||||
| 
 | ||||
| def is_file_cpp(filename: str) -> bool: | ||||
|     (basefile, ext) = os.path.splitext(filename) | ||||
|     return ext.lower() in ('.h', '.cpp') | ||||
|     (_, ext) = os.path.splitext(filename) | ||||
|     return ext.lower() in (".h", ".cpp") | ||||
| 
 | ||||
| 
 | ||||
| def walk_source_dir(source: str, recursive: bool = True) -> Iterator[str]: | ||||
|     """Generator to walk the given directory recursively and return | ||||
|        any C++ files found.""" | ||||
|     any C++ files found.""" | ||||
| 
 | ||||
|     source = os.path.abspath(source) | ||||
|     for subdir, dirs, files in os.walk(source): | ||||
|     for subdir, _, files in os.walk(source): | ||||
|         for file in files: | ||||
|             if is_file_cpp(file): | ||||
|                 yield os.path.join(subdir, file) | ||||
| 
 | ||||
|         if not recursive: | ||||
|             break | ||||
| 
 | ||||
| 
 | ||||
| def get_file_in_script_dir(fn): | ||||
|     return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn) | ||||
|   | ||||
| @@ -7,7 +7,6 @@ from .util import ( | ||||
|     OffsetMatch, | ||||
|     is_blank_or_comment, | ||||
|     match_offset_comment, | ||||
|     is_exact_offset_comment, | ||||
|     get_template_function_name, | ||||
|     remove_trailing_comment, | ||||
|     distinct_by_module, | ||||
| @@ -25,10 +24,10 @@ class ReaderState(Enum): | ||||
| 
 | ||||
| def find_code_blocks(stream: TextIO) -> List[CodeBlock]: | ||||
|     """Read the IO stream (file) line-by-line and give the following report: | ||||
|        Foreach code block (function) in the file, what are its starting and | ||||
|        ending line numbers, and what is the given offset in the original | ||||
|        binary. We expect the result to be ordered by line number because we | ||||
|        are reading the file from start to finish.""" | ||||
|     Foreach code block (function) in the file, what are its starting and | ||||
|     ending line numbers, and what is the given offset in the original | ||||
|     binary. We expect the result to be ordered by line number because we | ||||
|     are reading the file from start to finish.""" | ||||
| 
 | ||||
|     blocks: List[CodeBlock] = [] | ||||
| 
 | ||||
| @@ -51,14 +50,16 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]: | ||||
|             # Our list of offset marks could have duplicates on | ||||
|             # module name, so we'll eliminate those now. | ||||
|             for offset_match in distinct_by_module(offset_matches): | ||||
|                 block = CodeBlock(offset=offset_match.address, | ||||
|                                   signature=function_sig, | ||||
|                                   start_line=start_line, | ||||
|                                   end_line=end_line, | ||||
|                                   offset_comment=offset_match.comment, | ||||
|                                   module=offset_match.module, | ||||
|                                   is_template=offset_match.is_template, | ||||
|                                   is_stub=offset_match.is_stub) | ||||
|                 block = CodeBlock( | ||||
|                     offset=offset_match.address, | ||||
|                     signature=function_sig, | ||||
|                     start_line=start_line, | ||||
|                     end_line=end_line, | ||||
|                     offset_comment=offset_match.comment, | ||||
|                     module=offset_match.module, | ||||
|                     is_template=offset_match.is_template, | ||||
|                     is_stub=offset_match.is_stub, | ||||
|                 ) | ||||
|                 blocks.append(block) | ||||
|             offset_matches = [] | ||||
|             state = ReaderState.WANT_OFFSET | ||||
| @@ -66,15 +67,18 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]: | ||||
|         if can_seek: | ||||
|             line_no += 1 | ||||
|             line = stream.readline() | ||||
|             if line == '': | ||||
|             if line == "": | ||||
|                 break | ||||
| 
 | ||||
|         new_match = match_offset_comment(line) | ||||
|         if new_match is not None: | ||||
|             # We will allow multiple offsets if we have just begun | ||||
|             # the code block, but not after we hit the curly brace. | ||||
|             if state in (ReaderState.WANT_OFFSET, ReaderState.IN_TEMPLATE, | ||||
|                          ReaderState.WANT_SIG): | ||||
|             if state in ( | ||||
|                 ReaderState.WANT_OFFSET, | ||||
|                 ReaderState.IN_TEMPLATE, | ||||
|                 ReaderState.WANT_SIG, | ||||
|             ): | ||||
|                 # If we detected an offset marker unexpectedly, | ||||
|                 # we are handling it here so we can continue seeking. | ||||
|                 can_seek = True | ||||
| @@ -116,11 +120,10 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]: | ||||
|                 # same line. clang-format should prevent this (BraceWrapping) | ||||
|                 # but it is easy to detect. | ||||
|                 # If the entire function is on one line, handle that too. | ||||
|                 if function_sig.endswith('{'): | ||||
|                 if function_sig.endswith("{"): | ||||
|                     start_line = line_no | ||||
|                     state = ReaderState.IN_FUNC | ||||
|                 elif (function_sig.endswith('}') or | ||||
|                         function_sig.endswith('};')): | ||||
|                 elif function_sig.endswith("}") or function_sig.endswith("};"): | ||||
|                     start_line = line_no | ||||
|                     end_line = line_no | ||||
|                     state = ReaderState.FUNCTION_DONE | ||||
| @@ -128,14 +131,14 @@ def find_code_blocks(stream: TextIO) -> List[CodeBlock]: | ||||
|                     state = ReaderState.WANT_CURLY | ||||
| 
 | ||||
|         elif state == ReaderState.WANT_CURLY: | ||||
|             if line.strip() == '{': | ||||
|             if line.strip() == "{": | ||||
|                 start_line = line_no | ||||
|                 state = ReaderState.IN_FUNC | ||||
| 
 | ||||
|         elif state == ReaderState.IN_FUNC: | ||||
|             # Naive but reasonable assumption that functions will end with | ||||
|             # a curly brace on its own line with no prepended spaces. | ||||
|             if line.startswith('}'): | ||||
|             if line.startswith("}"): | ||||
|                 end_line = line_no | ||||
|                 state = ReaderState.FUNCTION_DONE | ||||
| 
 | ||||
|   | ||||
| @@ -5,34 +5,49 @@ from typing import List | ||||
| from collections import namedtuple | ||||
| 
 | ||||
| 
 | ||||
| CodeBlock = namedtuple('CodeBlock', | ||||
|                        ['offset', 'signature', 'start_line', 'end_line', | ||||
|                         'offset_comment', 'module', 'is_template', 'is_stub']) | ||||
| CodeBlock = namedtuple( | ||||
|     "CodeBlock", | ||||
|     [ | ||||
|         "offset", | ||||
|         "signature", | ||||
|         "start_line", | ||||
|         "end_line", | ||||
|         "offset_comment", | ||||
|         "module", | ||||
|         "is_template", | ||||
|         "is_stub", | ||||
|     ], | ||||
| ) | ||||
| 
 | ||||
| OffsetMatch = namedtuple('OffsetMatch', ['module', 'address', 'is_template', | ||||
|                                          'is_stub', 'comment']) | ||||
| OffsetMatch = namedtuple( | ||||
|     "OffsetMatch", ["module", "address", "is_template", "is_stub", "comment"] | ||||
| ) | ||||
| 
 | ||||
| # This has not been formally established, but considering that "STUB" | ||||
| # is a temporary state for a function, we assume it will appear last, | ||||
| # after any other modifiers (i.e. TEMPLATE) | ||||
| 
 | ||||
| # To match a reasonable variance of formatting for the offset comment | ||||
| offsetCommentRegex = re.compile(r'\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?',  # nopep8 | ||||
|                                 flags=re.I) | ||||
| offsetCommentRegex = re.compile( | ||||
|     r"\s*//\s*OFFSET:\s*(\w+)\s+(?:0x)?([a-f0-9]+)(\s+TEMPLATE)?(\s+STUB)?",  # nopep8 | ||||
|     flags=re.I, | ||||
| ) | ||||
| 
 | ||||
| # To match the exact syntax (text upper case, hex lower case, with spaces) | ||||
| # that is used in most places | ||||
| offsetCommentExactRegex = re.compile(r'^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$')  # nopep8 | ||||
| offsetCommentExactRegex = re.compile( | ||||
|     r"^// OFFSET: [A-Z0-9]+ (0x[a-f0-9]+)( TEMPLATE)?( STUB)?$" | ||||
| )  # nopep8 | ||||
| 
 | ||||
| 
 | ||||
| # The goal here is to just read whatever is on the next line, so some | ||||
| # flexibility in the formatting seems OK | ||||
| templateCommentRegex = re.compile(r'\s*//\s+(.*)') | ||||
| templateCommentRegex = re.compile(r"\s*//\s+(.*)") | ||||
| 
 | ||||
| 
 | ||||
| # To remove any comment (//) or block comment (/*) and its leading spaces | ||||
| # from the end of a code line | ||||
| trailingCommentRegex = re.compile(r'(\s*(?://|/\*).*)$') | ||||
| trailingCommentRegex = re.compile(r"(\s*(?://|/\*).*)$") | ||||
| 
 | ||||
| 
 | ||||
| def get_template_function_name(line: str) -> str: | ||||
| @@ -47,23 +62,25 @@ def get_template_function_name(line: str) -> str: | ||||
| 
 | ||||
| 
 | ||||
| def remove_trailing_comment(line: str) -> str: | ||||
|     return trailingCommentRegex.sub('', line) | ||||
|     return trailingCommentRegex.sub("", line) | ||||
| 
 | ||||
| 
 | ||||
| def is_blank_or_comment(line: str) -> bool: | ||||
|     """Helper to read ahead after the offset comment is matched. | ||||
|        There could be blank lines or other comments before the | ||||
|        function signature, and we want to skip those.""" | ||||
|     There could be blank lines or other comments before the | ||||
|     function signature, and we want to skip those.""" | ||||
|     line_strip = line.strip() | ||||
|     return (len(line_strip) == 0 | ||||
|             or line_strip.startswith('//') | ||||
|             or line_strip.startswith('/*') | ||||
|             or line_strip.endswith('*/')) | ||||
|     return ( | ||||
|         len(line_strip) == 0 | ||||
|         or line_strip.startswith("//") | ||||
|         or line_strip.startswith("/*") | ||||
|         or line_strip.endswith("*/") | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def is_exact_offset_comment(line: str) -> bool: | ||||
|     """If the offset comment does not match our (unofficial) syntax | ||||
|        we may want to alert the user to fix it for style points.""" | ||||
|     we may want to alert the user to fix it for style points.""" | ||||
|     return offsetCommentExactRegex.match(line) is not None | ||||
| 
 | ||||
| 
 | ||||
| @@ -72,17 +89,19 @@ def match_offset_comment(line: str) -> OffsetMatch | None: | ||||
|     if match is None: | ||||
|         return None | ||||
| 
 | ||||
|     return OffsetMatch(module=match.group(1), | ||||
|                        address=int(match.group(2), 16), | ||||
|                        is_template=match.group(3) is not None, | ||||
|                        is_stub=match.group(4) is not None, | ||||
|                        comment=line.strip()) | ||||
|     return OffsetMatch( | ||||
|         module=match.group(1), | ||||
|         address=int(match.group(2), 16), | ||||
|         is_template=match.group(3) is not None, | ||||
|         is_stub=match.group(4) is not None, | ||||
|         comment=line.strip(), | ||||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def distinct_by_module(offsets: List) -> List: | ||||
|     """Given a list of offset markers, return a list with distinct | ||||
|        module names. If module names (case-insensitive) are repeated, | ||||
|        choose the offset that appears first.""" | ||||
|     module names. If module names (case-insensitive) are repeated, | ||||
|     choose the offset that appears first.""" | ||||
| 
 | ||||
|     if len(offsets) < 2: | ||||
|         return offsets | ||||
|   | ||||
							
								
								
									
										138
									
								
								tools/isledecomp/isledecomp/syminfo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										138
									
								
								tools/isledecomp/isledecomp/syminfo.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,138 @@ | ||||
| import os | ||||
| import subprocess | ||||
| from .utils import get_file_in_script_dir | ||||
| 
 | ||||
| 
 | ||||
| class RecompiledInfo: | ||||
|     addr = None | ||||
|     size = None | ||||
|     name = None | ||||
|     start = None | ||||
| 
 | ||||
| 
 | ||||
| # Declare a class that parses the output of cvdump for fast access later | ||||
| class SymInfo: | ||||
|     funcs = {} | ||||
|     lines = {} | ||||
|     names = {} | ||||
| 
 | ||||
|     def __init__(self, pdb, sym_recompfile, sym_logger, sym_wine_path_converter=None): | ||||
|         self.logger = sym_logger | ||||
|         call = [get_file_in_script_dir("cvdump.exe"), "-l", "-s"] | ||||
| 
 | ||||
|         if sym_wine_path_converter: | ||||
|             # Run cvdump through wine and convert path to Windows-friendly wine path | ||||
|             call.insert(0, "wine") | ||||
|             call.append(sym_wine_path_converter.get_wine_path(pdb)) | ||||
|         else: | ||||
|             call.append(pdb) | ||||
| 
 | ||||
|         self.logger.info("Parsing %s ...", pdb) | ||||
|         self.logger.debug("Command = %s", call) | ||||
|         line_dump = subprocess.check_output(call).decode("utf-8").split("\r\n") | ||||
| 
 | ||||
|         current_section = None | ||||
| 
 | ||||
|         self.logger.debug("Parsing output of cvdump.exe ...") | ||||
| 
 | ||||
|         for i, line in enumerate(line_dump): | ||||
|             if line.startswith("***"): | ||||
|                 current_section = line[4:] | ||||
| 
 | ||||
|             if current_section == "SYMBOLS" and "S_GPROC32" in line: | ||||
|                 sym_addr = int(line[26:34], 16) | ||||
| 
 | ||||
|                 info = RecompiledInfo() | ||||
|                 info.addr = ( | ||||
|                     sym_addr + sym_recompfile.imagebase + sym_recompfile.textvirt | ||||
|                 ) | ||||
| 
 | ||||
|                 use_dbg_offs = False | ||||
|                 if use_dbg_offs: | ||||
|                     debug_offs = line_dump[i + 2] | ||||
|                     debug_start = int(debug_offs[22:30], 16) | ||||
|                     debug_end = int(debug_offs[43:], 16) | ||||
| 
 | ||||
|                     info.start = debug_start | ||||
|                     info.size = debug_end - debug_start | ||||
|                 else: | ||||
|                     info.start = 0 | ||||
|                     info.size = int(line[41:49], 16) | ||||
| 
 | ||||
|                 info.name = line[77:] | ||||
| 
 | ||||
|                 self.names[info.name] = info | ||||
|                 self.funcs[sym_addr] = info | ||||
|             elif ( | ||||
|                 current_section == "LINES" | ||||
|                 and line.startswith("  ") | ||||
|                 and not line.startswith("   ") | ||||
|             ): | ||||
|                 sourcepath = line.split()[0] | ||||
| 
 | ||||
|                 if sym_wine_path_converter: | ||||
|                     # Convert filename to Unix path for file compare | ||||
|                     sourcepath = sym_wine_path_converter.get_unix_path(sourcepath) | ||||
| 
 | ||||
|                 if sourcepath not in self.lines: | ||||
|                     self.lines[sourcepath] = {} | ||||
| 
 | ||||
|                 j = i + 2 | ||||
|                 while True: | ||||
|                     ll = line_dump[j].split() | ||||
|                     if len(ll) == 0: | ||||
|                         break | ||||
| 
 | ||||
|                     k = 0 | ||||
|                     while k < len(ll): | ||||
|                         linenum = int(ll[k + 0]) | ||||
|                         address = int(ll[k + 1], 16) | ||||
|                         if linenum not in self.lines[sourcepath]: | ||||
|                             self.lines[sourcepath][linenum] = address | ||||
|                         k += 2 | ||||
| 
 | ||||
|                     j += 1 | ||||
| 
 | ||||
|         self.logger.debug("... Parsing output of cvdump.exe finished") | ||||
| 
 | ||||
|     def get_recompiled_address(self, filename, line): | ||||
|         recompiled_addr = None | ||||
| 
 | ||||
|         self.logger.debug("Looking for %s:%s", filename, line) | ||||
|         filename_basename = os.path.basename(filename).lower() | ||||
| 
 | ||||
|         for fn in self.lines: | ||||
|             # Sometimes a PDB is compiled with a relative path while we always have | ||||
|             # an absolute path. Therefore we must | ||||
|             try: | ||||
|                 if os.path.basename( | ||||
|                     fn | ||||
|                 ).lower() == filename_basename and os.path.samefile(fn, filename): | ||||
|                     filename = fn | ||||
|                     break | ||||
|             except FileNotFoundError: | ||||
|                 continue | ||||
| 
 | ||||
|         if filename in self.lines and line in self.lines[filename]: | ||||
|             recompiled_addr = self.lines[filename][line] | ||||
| 
 | ||||
|             if recompiled_addr in self.funcs: | ||||
|                 return self.funcs[recompiled_addr] | ||||
|             self.logger.error( | ||||
|                 "Failed to find function symbol with address: %x", recompiled_addr | ||||
|             ) | ||||
|             return None | ||||
|         self.logger.error( | ||||
|             "Failed to find function symbol with filename and line: %s:%s", | ||||
|             filename, | ||||
|             line, | ||||
|         ) | ||||
|         return None | ||||
| 
 | ||||
|     def get_recompiled_address_from_name(self, name): | ||||
|         self.logger.debug("Looking for %s", name) | ||||
| 
 | ||||
|         if name in self.names: | ||||
|             return self.names[name] | ||||
|         self.logger.error("Failed to find function symbol with name: %s", name) | ||||
|         return None | ||||
							
								
								
									
										42
									
								
								tools/isledecomp/isledecomp/utils.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								tools/isledecomp/isledecomp/utils.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| import os | ||||
| import sys | ||||
| import colorama | ||||
| 
 | ||||
| 
 | ||||
| def print_diff(udiff, plain): | ||||
|     has_diff = False | ||||
|     for line in udiff: | ||||
|         has_diff = True | ||||
|         color = "" | ||||
|         if line.startswith("++") or line.startswith("@@") or line.startswith("--"): | ||||
|             # Skip unneeded parts of the diff for the brief view | ||||
|             continue | ||||
|         # Work out color if we are printing color | ||||
|         if not plain: | ||||
|             if line.startswith("+"): | ||||
|                 color = colorama.Fore.GREEN | ||||
|             elif line.startswith("-"): | ||||
|                 color = colorama.Fore.RED | ||||
|         print(color + line) | ||||
|         # Reset color if we're printing in color | ||||
|         if not plain: | ||||
|             print(colorama.Style.RESET_ALL, end="") | ||||
|     return has_diff | ||||
| 
 | ||||
| 
 | ||||
| def get_file_in_script_dir(fn): | ||||
|     return os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), fn) | ||||
| 
 | ||||
| 
 | ||||
| class OffsetPlaceholderGenerator: | ||||
|     def __init__(self): | ||||
|         self.counter = 0 | ||||
|         self.replacements = {} | ||||
| 
 | ||||
|     def get(self, replace_addr): | ||||
|         if replace_addr in self.replacements: | ||||
|             return self.replacements[replace_addr] | ||||
|         self.counter += 1 | ||||
|         replacement = f"<OFFSET{self.counter}>" | ||||
|         self.replacements[replace_addr] = replacement | ||||
|         return replacement | ||||
| @@ -1,9 +1,9 @@ | ||||
| from setuptools import setup, find_packages | ||||
| 
 | ||||
| setup( | ||||
|     name='isledecomp', | ||||
|     version='0.1.0', | ||||
|     description='Python tools for the isledecomp project', | ||||
|     name="isledecomp", | ||||
|     version="0.1.0", | ||||
|     description="Python tools for the isledecomp project", | ||||
|     packages=find_packages(), | ||||
|     tests_require=['pytest'], | ||||
|     tests_require=["pytest"], | ||||
| ) | ||||
|   | ||||
| @@ -1,17 +1,16 @@ | ||||
| import os | ||||
| import pytest | ||||
| from typing import List, TextIO | ||||
| from isledecomp.parser import find_code_blocks | ||||
| from isledecomp.parser.util import CodeBlock | ||||
| 
 | ||||
| SAMPLE_DIR = os.path.join(os.path.dirname(__file__), 'samples') | ||||
| SAMPLE_DIR = os.path.join(os.path.dirname(__file__), "samples") | ||||
| 
 | ||||
| 
 | ||||
| def sample_file(filename: str) -> TextIO: | ||||
|     """Wrapper for opening the samples from the directory that does not | ||||
|        depend on the cwd where we run the test""" | ||||
|     depend on the cwd where we run the test""" | ||||
|     full_path = os.path.join(SAMPLE_DIR, filename) | ||||
|     return open(full_path, 'r') | ||||
|     return open(full_path, "r", encoding="utf-8") | ||||
| 
 | ||||
| 
 | ||||
| def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool: | ||||
| @@ -25,7 +24,7 @@ def code_blocks_are_sorted(blocks: List[CodeBlock]) -> bool: | ||||
| 
 | ||||
| def test_sanity(): | ||||
|     """Read a very basic file""" | ||||
|     with sample_file('basic_file.cpp') as f: | ||||
|     with sample_file("basic_file.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| 
 | ||||
|     assert len(blocks) == 3 | ||||
| @@ -39,7 +38,7 @@ def test_sanity(): | ||||
| def test_oneline(): | ||||
|     """(Assuming clang-format permits this) This sample has a function | ||||
|     on a single line. This will test the end-of-function detection""" | ||||
|     with sample_file('oneline_function.cpp') as f: | ||||
|     with sample_file("oneline_function.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| 
 | ||||
|     assert len(blocks) == 2 | ||||
| @@ -49,7 +48,7 @@ def test_oneline(): | ||||
| 
 | ||||
| def test_missing_offset(): | ||||
|     """What if the function doesn't have an offset comment?""" | ||||
|     with sample_file('missing_offset.cpp') as f: | ||||
|     with sample_file("missing_offset.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| 
 | ||||
|     # TODO: For now, the function without the offset will just be ignored. | ||||
| @@ -60,9 +59,9 @@ def test_missing_offset(): | ||||
| 
 | ||||
| def test_jumbled_case(): | ||||
|     """The parser just reports what it sees. It is the responsibility of | ||||
|        the downstream tools to do something about a jumbled file. | ||||
|        Just verify that we are reading it correctly.""" | ||||
|     with sample_file('out_of_order.cpp') as f: | ||||
|     the downstream tools to do something about a jumbled file. | ||||
|     Just verify that we are reading it correctly.""" | ||||
|     with sample_file("out_of_order.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| 
 | ||||
|     assert len(blocks) == 3 | ||||
| @@ -70,7 +69,7 @@ def test_jumbled_case(): | ||||
| 
 | ||||
| 
 | ||||
| def test_bad_file(): | ||||
|     with sample_file('poorly_formatted.cpp') as f: | ||||
|     with sample_file("poorly_formatted.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| 
 | ||||
|     assert len(blocks) == 3 | ||||
| @@ -78,7 +77,7 @@ def test_bad_file(): | ||||
| 
 | ||||
| def test_indented(): | ||||
|     """Offsets for functions inside of a class will probably be indented.""" | ||||
|     with sample_file('basic_class.cpp') as f: | ||||
|     with sample_file("basic_class.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| 
 | ||||
|     # TODO: We don't properly detect the end of these functions | ||||
| @@ -87,17 +86,17 @@ def test_indented(): | ||||
|     # all the functions that are there. | ||||
| 
 | ||||
|     assert len(blocks) == 2 | ||||
|     assert blocks[0].offset == int('0x12345678', 16) | ||||
|     assert blocks[0].offset == int("0x12345678", 16) | ||||
|     assert blocks[0].start_line == 15 | ||||
|     # assert blocks[0].end_line == 18 | ||||
| 
 | ||||
|     assert blocks[1].offset == int('0xdeadbeef', 16) | ||||
|     assert blocks[1].offset == int("0xdeadbeef", 16) | ||||
|     assert blocks[1].start_line == 22 | ||||
|     # assert blocks[1].end_line == 24 | ||||
| 
 | ||||
| 
 | ||||
| def test_inline(): | ||||
|     with sample_file('inline.cpp') as f: | ||||
|     with sample_file("inline.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| 
 | ||||
|     assert len(blocks) == 2 | ||||
| @@ -108,21 +107,21 @@ def test_inline(): | ||||
| 
 | ||||
| def test_multiple_offsets(): | ||||
|     """If multiple offset marks appear before for a code block, take them | ||||
|        all but ensure module name (case-insensitive) is distinct. | ||||
|        Use first module occurrence in case of duplicates.""" | ||||
|     with sample_file('multiple_offsets.cpp') as f: | ||||
|     all but ensure module name (case-insensitive) is distinct. | ||||
|     Use first module occurrence in case of duplicates.""" | ||||
|     with sample_file("multiple_offsets.cpp") as f: | ||||
|         blocks = find_code_blocks(f) | ||||
| 
 | ||||
|     assert len(blocks) == 4 | ||||
|     assert blocks[0].module == 'TEST' | ||||
|     assert blocks[0].module == "TEST" | ||||
|     assert blocks[0].start_line == 9 | ||||
| 
 | ||||
|     assert blocks[1].module == 'HELLO' | ||||
|     assert blocks[1].module == "HELLO" | ||||
|     assert blocks[1].start_line == 9 | ||||
| 
 | ||||
|     # Duplicate modules are ignored | ||||
|     assert blocks[2].start_line == 16 | ||||
|     assert blocks[2].offset == 0x2345 | ||||
| 
 | ||||
|     assert blocks[3].module == 'TEST' | ||||
|     assert blocks[3].module == "TEST" | ||||
|     assert blocks[3].offset == 0x2002 | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| import pytest | ||||
| from collections import namedtuple | ||||
| from typing import List | ||||
| import pytest | ||||
| from isledecomp.parser.util import ( | ||||
|     is_blank_or_comment, | ||||
|     match_offset_comment, | ||||
| @@ -10,21 +10,20 @@ from isledecomp.parser.util import ( | ||||
| 
 | ||||
| 
 | ||||
| blank_or_comment_param = [ | ||||
|     (True,  ''), | ||||
|     (True,  '\t'), | ||||
|     (True,  '    '), | ||||
|     (False, '\tint abc=123;'), | ||||
|     (True,  '// OFFSET: LEGO1 0xdeadbeef'), | ||||
|     (True,  '   /* Block comment beginning'), | ||||
|     (True,  'Block comment ending */   '), | ||||
| 
 | ||||
|     (True, ""), | ||||
|     (True, "\t"), | ||||
|     (True, "    "), | ||||
|     (False, "\tint abc=123;"), | ||||
|     (True, "// OFFSET: LEGO1 0xdeadbeef"), | ||||
|     (True, "   /* Block comment beginning"), | ||||
|     (True, "Block comment ending */   "), | ||||
|     # TODO: does clang-format have anything to say about these cases? | ||||
|     (False, 'x++; // Comment folows'), | ||||
|     (False, 'x++; /* Block comment begins'), | ||||
|     (False, "x++; // Comment folows"), | ||||
|     (False, "x++; /* Block comment begins"), | ||||
| ] | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('expected, line', blank_or_comment_param) | ||||
| @pytest.mark.parametrize("expected, line", blank_or_comment_param) | ||||
| def test_is_blank_or_comment(line: str, expected: bool): | ||||
|     assert is_blank_or_comment(line) is expected | ||||
| 
 | ||||
| @@ -32,82 +31,73 @@ def test_is_blank_or_comment(line: str, expected: bool): | ||||
| offset_comment_samples = [ | ||||
|     # (can_parse: bool, exact_match: bool, line: str) | ||||
|     # Should match both expected modules with optional STUB marker | ||||
|     (True,  True,  '// OFFSET: LEGO1 0xdeadbeef'), | ||||
|     (True,  True,  '// OFFSET: LEGO1 0xdeadbeef STUB'), | ||||
|     (True,  True,  '// OFFSET: ISLE 0x12345678'), | ||||
|     (True,  True,  '// OFFSET: ISLE 0x12345678 STUB'), | ||||
| 
 | ||||
|     (True, True, "// OFFSET: LEGO1 0xdeadbeef"), | ||||
|     (True, True, "// OFFSET: LEGO1 0xdeadbeef STUB"), | ||||
|     (True, True, "// OFFSET: ISLE 0x12345678"), | ||||
|     (True, True, "// OFFSET: ISLE 0x12345678 STUB"), | ||||
|     # No trailing spaces allowed | ||||
|     (True,  False, '// OFFSET: LEGO1 0xdeadbeef  '), | ||||
|     (True,  False, '// OFFSET: LEGO1 0xdeadbeef STUB '), | ||||
| 
 | ||||
|     (True, False, "// OFFSET: LEGO1 0xdeadbeef  "), | ||||
|     (True, False, "// OFFSET: LEGO1 0xdeadbeef STUB "), | ||||
|     # Must have exactly one space between elements | ||||
|     (True,  False, '//OFFSET: ISLE 0xdeadbeef'), | ||||
|     (True,  False, '// OFFSET:ISLE 0xdeadbeef'), | ||||
|     (True,  False, '//  OFFSET: ISLE 0xdeadbeef'), | ||||
|     (True,  False, '// OFFSET:  ISLE 0xdeadbeef'), | ||||
|     (True,  False, '// OFFSET: ISLE  0xdeadbeef'), | ||||
|     (True,  False, '// OFFSET: ISLE 0xdeadbeef  STUB'), | ||||
| 
 | ||||
|     (True, False, "//OFFSET: ISLE 0xdeadbeef"), | ||||
|     (True, False, "// OFFSET:ISLE 0xdeadbeef"), | ||||
|     (True, False, "//  OFFSET: ISLE 0xdeadbeef"), | ||||
|     (True, False, "// OFFSET:  ISLE 0xdeadbeef"), | ||||
|     (True, False, "// OFFSET: ISLE  0xdeadbeef"), | ||||
|     (True, False, "// OFFSET: ISLE 0xdeadbeef  STUB"), | ||||
|     # Must have 0x prefix for hex number | ||||
|     (True,  False, '// OFFSET: ISLE deadbeef'), | ||||
| 
 | ||||
|     (True, False, "// OFFSET: ISLE deadbeef"), | ||||
|     # Offset, module name, and STUB must be uppercase | ||||
|     (True,  False, '// offset: ISLE 0xdeadbeef'), | ||||
|     (True,  False, '// offset: isle 0xdeadbeef'), | ||||
|     (True,  False, '// OFFSET: LEGO1 0xdeadbeef stub'), | ||||
| 
 | ||||
|     (True, False, "// offset: ISLE 0xdeadbeef"), | ||||
|     (True, False, "// offset: isle 0xdeadbeef"), | ||||
|     (True, False, "// OFFSET: LEGO1 0xdeadbeef stub"), | ||||
|     # Hex string must be lowercase | ||||
|     (True,  False, '// OFFSET: ISLE 0xDEADBEEF'), | ||||
| 
 | ||||
|     (True, False, "// OFFSET: ISLE 0xDEADBEEF"), | ||||
|     # TODO: How flexible should we be with matching the module name? | ||||
|     (True,  True,  '// OFFSET: OMNI 0x12345678'), | ||||
|     (True,  True,  '// OFFSET: LEG01 0x12345678'), | ||||
|     (True,  False,  '// OFFSET: hello 0x12345678'), | ||||
| 
 | ||||
|     (True, True, "// OFFSET: OMNI 0x12345678"), | ||||
|     (True, True, "// OFFSET: LEG01 0x12345678"), | ||||
|     (True, False, "// OFFSET: hello 0x12345678"), | ||||
|     # Not close enough to match | ||||
|     (False, False, '// OFFSET: ISLE0x12345678'), | ||||
|     (False, False, '// OFFSET: 0x12345678'), | ||||
|     (False, False, '// LEGO1: 0x12345678'), | ||||
| 
 | ||||
|     (False, False, "// OFFSET: ISLE0x12345678"), | ||||
|     (False, False, "// OFFSET: 0x12345678"), | ||||
|     (False, False, "// LEGO1: 0x12345678"), | ||||
|     # Hex string shorter than 8 characters | ||||
|     (True,  True,  '// OFFSET: LEGO1 0x1234'), | ||||
| 
 | ||||
|     (True, True, "// OFFSET: LEGO1 0x1234"), | ||||
|     # TODO: These match but shouldn't. | ||||
|     # (False, False, '// OFFSET: LEGO1 0'), | ||||
|     # (False, False, '// OFFSET: LEGO1 0x'), | ||||
| ] | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('match, exact, line', offset_comment_samples) | ||||
| def test_offset_match(line: str, match: bool, exact): | ||||
| @pytest.mark.parametrize("match, _, line", offset_comment_samples) | ||||
| def test_offset_match(line: str, match: bool, _): | ||||
|     did_match = match_offset_comment(line) is not None | ||||
|     assert did_match is match | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('match, exact, line', offset_comment_samples) | ||||
| def test_exact_offset_comment(line: str, exact: bool, match): | ||||
| @pytest.mark.parametrize("_, exact, line", offset_comment_samples) | ||||
| def test_exact_offset_comment(line: str, exact: bool, _): | ||||
|     assert is_exact_offset_comment(line) is exact | ||||
| 
 | ||||
| 
 | ||||
| # Helper for the next test: cut down version of OffsetMatch | ||||
| MiniOfs = namedtuple('MiniOfs', ['module', 'value']) | ||||
| MiniOfs = namedtuple("MiniOfs", ["module", "value"]) | ||||
| 
 | ||||
| distinct_by_module_samples = [ | ||||
|     # empty set | ||||
|     ([], []), | ||||
|     # same module name | ||||
|     ([MiniOfs('TEST', 123), MiniOfs('TEST', 555)], | ||||
|      [MiniOfs('TEST', 123)]), | ||||
|     ([MiniOfs("TEST", 123), MiniOfs("TEST", 555)], [MiniOfs("TEST", 123)]), | ||||
|     # same module name, case-insensitive | ||||
|     ([MiniOfs('test', 123), MiniOfs('TEST', 555)], | ||||
|      [MiniOfs('test', 123)]), | ||||
|     ([MiniOfs("test", 123), MiniOfs("TEST", 555)], [MiniOfs("test", 123)]), | ||||
|     # duplicates, non-consecutive | ||||
|     ([MiniOfs('test', 123), MiniOfs('abc', 111), MiniOfs('TEST', 555)], | ||||
|      [MiniOfs('test', 123), MiniOfs('abc', 111)]), | ||||
|     ( | ||||
|         [MiniOfs("test", 123), MiniOfs("abc", 111), MiniOfs("TEST", 555)], | ||||
|         [MiniOfs("test", 123), MiniOfs("abc", 111)], | ||||
|     ), | ||||
| ] | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.parametrize('sample, expected', distinct_by_module_samples) | ||||
| @pytest.mark.parametrize("sample, expected", distinct_by_module_samples) | ||||
| def test_distinct_by_module(sample: List[MiniOfs], expected: List[MiniOfs]): | ||||
|     assert distinct_by_module(sample) == expected | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Thomas Phillips
					Thomas Phillips