diff --git a/misctools/lldb/README.md b/misctools/lldb/README.md new file mode 100644 index 0000000000..7e943c766d --- /dev/null +++ b/misctools/lldb/README.md @@ -0,0 +1,121 @@ +# LLDB Pretty Printers for Bun + +This directory contains LLDB pretty printers for various Bun data structures to improve the debugging experience. + +## Files + +- `bun_pretty_printer.py` - Pretty printers for Bun-specific types (bun.String, WTFStringImpl, ZigString, BabyList, etc.) +- `lldb_pretty_printers.py` - Pretty printers for Zig language types from the Zig project +- `lldb_webkit.py` - Pretty printers for WebKit/JavaScriptCore types +- `init.lldb` - LLDB initialization commands + +## Supported Types + +### bun.String Types +- `bun.String` (or just `String`) - The main Bun string type +- `WTFStringImpl` - WebKit string implementation (Latin1/UTF16) +- `ZigString` - Zig string type (UTF8/Latin1/UTF16 with pointer tagging) + +### Display Format + +The pretty printers show string content directly, with additional metadata: + +``` +# bun.String examples: +"Hello, World!" [latin1] # Regular ZigString +"UTF-8 String 🎉" [utf8] # UTF-8 encoded +"Static content" [latin1 static] # Static string +"" # Empty string + # Dead/invalid string + +# WTFStringImpl examples: +"WebKit String" # Shows the actual string content + +# ZigString examples: +"Some text" [utf16 global] # UTF16 globally allocated +"ASCII text" [latin1] # Latin1 encoded +``` + +## Usage + +### Option 1: Manual Loading +In your LLDB session: +```lldb +command script import /path/to/bun/misctools/lldb/bun_pretty_printer.py +``` + +### Option 2: Add to ~/.lldbinit +Add the following line to your `~/.lldbinit` file to load automatically: +```lldb +command script import /path/to/bun/misctools/lldb/bun_pretty_printer.py +``` + +### Option 3: Use init.lldb +```lldb +command source /path/to/bun/misctools/lldb/init.lldb +``` + +## Testing + +To test the pretty printers: + +1. Build a debug version of Bun: +```bash +bun bd +``` + +2. Create a test file that uses bun.String types + +3. Debug with LLDB: +```bash +lldb ./build/debug/bun-debug +(lldb) command script import misctools/lldb/bun_pretty_printer.py +(lldb) breakpoint set --file your_test.zig --line +(lldb) run your_test.zig +(lldb) frame variable +``` + +## Implementation Details + +### ZigString Pointer Tagging +ZigString uses pointer tagging in the upper bits: +- Bit 63: 1 = UTF16, 0 = UTF8/Latin1 +- Bit 62: 1 = Globally allocated (mimalloc) +- Bit 61: 1 = UTF8 encoding + +The pretty printer automatically detects and handles these tags. + +### WTFStringImpl Encoding +WTFStringImpl uses flags in `m_hashAndFlags`: +- Bit 2 (s_hashFlag8BitBuffer): 1 = Latin1, 0 = UTF16 + +### bun.String Tag Union +bun.String is a tagged union with these variants: +- Dead (0): Invalid/freed string +- WTFStringImpl (1): WebKit string +- ZigString (2): Regular Zig string +- StaticZigString (3): Static/immortal string +- Empty (4): Empty string "" + +## Troubleshooting + +If the pretty printers don't work: + +1. Verify the Python script loaded: +```lldb +(lldb) script print("Python works") +``` + +2. Check if the category is enabled: +```lldb +(lldb) type category list +``` + +3. Enable the Bun category manually: +```lldb +(lldb) type category enable bun +``` + +4. For debugging the pretty printer itself, check for exceptions: +- The pretty printers catch all exceptions and return `` +- Modify the code to print exceptions for debugging \ No newline at end of file diff --git a/misctools/lldb/bun_pretty_printer.py b/misctools/lldb/bun_pretty_printer.py index 389b908d64..4f501056b5 100644 --- a/misctools/lldb/bun_pretty_printer.py +++ b/misctools/lldb/bun_pretty_printer.py @@ -10,8 +10,8 @@ class bun_BabyList_SynthProvider: try: self.ptr = self.value.GetChildMemberWithName('ptr') - self.len = self.value.GetChildMemberWithName('len').unsigned - self.cap = self.value.GetChildMemberWithName('cap').unsigned + self.len = self.value.GetChildMemberWithName('len').GetValueAsUnsigned() + self.cap = self.value.GetChildMemberWithName('cap').GetValueAsUnsigned() self.elem_type = self.ptr.type.GetPointeeType() self.elem_size = self.elem_type.size except: @@ -46,7 +46,7 @@ def bun_BabyList_SummaryProvider(value, _=None): value = value.GetNonSyntheticValue() len_val = value.GetChildMemberWithName('len') cap_val = value.GetChildMemberWithName('cap') - return 'len=%d cap=%d' % (len_val.unsigned, cap_val.unsigned) + return 'len=%d cap=%d' % (len_val.GetValueAsUnsigned(), cap_val.GetValueAsUnsigned()) except: return 'len=? cap=?' @@ -67,6 +67,241 @@ def add(debugger, *, category, regex=False, type, identifier=None, synth=False, type )) +def WTFStringImpl_SummaryProvider(value, _=None): + try: + # Get the raw pointer (it's already a pointer type) + value = value.GetNonSyntheticValue() + + # Check if it's a pointer type and dereference if needed + if value.type.IsPointerType(): + struct = value.deref + else: + struct = value + + m_length = struct.GetChildMemberWithName('m_length').GetValueAsUnsigned() + m_hashAndFlags = struct.GetChildMemberWithName('m_hashAndFlags').GetValueAsUnsigned() + m_ptr = struct.GetChildMemberWithName('m_ptr') + + # Check if it's 8-bit (latin1) or 16-bit (utf16) string + s_hashFlag8BitBuffer = 1 << 2 + is_8bit = (m_hashAndFlags & s_hashFlag8BitBuffer) != 0 + + if m_length == 0: + return '[%s] ""' % ('latin1' if is_8bit else 'utf16') + + # Limit memory reads to 1MB for performance + MAX_BYTES = 1024 * 1024 # 1MB + MAX_DISPLAY_CHARS = 200 # Maximum characters to display + + # Calculate how much to read + bytes_per_char = 1 if is_8bit else 2 + total_bytes = m_length * bytes_per_char + truncated = False + + if total_bytes > MAX_BYTES: + # Read only first part of very large strings + chars_to_read = MAX_BYTES // bytes_per_char + bytes_to_read = chars_to_read * bytes_per_char + truncated = True + else: + chars_to_read = m_length + bytes_to_read = total_bytes + + if is_8bit: + # Latin1 string + latin1_ptr = m_ptr.GetChildMemberWithName('latin1') + process = value.process + error = lldb.SBError() + ptr_addr = latin1_ptr.GetValueAsUnsigned() + if ptr_addr: + byte_data = process.ReadMemory(ptr_addr, min(chars_to_read, m_length), error) + if error.Success(): + string_val = byte_data.decode('latin1', errors='replace') + else: + return '[latin1] ' % error + else: + return '[latin1] ' + else: + # UTF16 string + utf16_ptr = m_ptr.GetChildMemberWithName('utf16') + process = value.process + error = lldb.SBError() + ptr_addr = utf16_ptr.GetValueAsUnsigned() + if ptr_addr: + byte_data = process.ReadMemory(ptr_addr, bytes_to_read, error) + if error.Success(): + # Properly decode UTF16LE to string + string_val = byte_data.decode('utf-16le', errors='replace') + else: + return '[utf16] ' % error + else: + return '[utf16] ' + + # Escape special characters + string_val = string_val.replace('\\', '\\\\') + string_val = string_val.replace('"', '\\"') + string_val = string_val.replace('\n', '\\n') + string_val = string_val.replace('\r', '\\r') + string_val = string_val.replace('\t', '\\t') + + # Truncate display if too long + display_truncated = truncated or len(string_val) > MAX_DISPLAY_CHARS + if len(string_val) > MAX_DISPLAY_CHARS: + string_val = string_val[:MAX_DISPLAY_CHARS] + + # Add encoding and size info at the beginning + encoding = 'latin1' if is_8bit else 'utf16' + + if display_truncated: + size_info = ' %d chars' % m_length + if total_bytes >= 1024 * 1024: + size_info += ' (%.1fMB)' % (total_bytes / (1024.0 * 1024.0)) + elif total_bytes >= 1024: + size_info += ' (%.1fKB)' % (total_bytes / 1024.0) + return '[%s%s] "%s..." ' % (encoding, size_info, string_val) + else: + return '[%s] "%s"' % (encoding, string_val) + except: + return '' + +def ZigString_SummaryProvider(value, _=None): + try: + value = value.GetNonSyntheticValue() + + ptr = value.GetChildMemberWithName('_unsafe_ptr_do_not_use').GetValueAsUnsigned() + length = value.GetChildMemberWithName('len').GetValueAsUnsigned() + + # Check encoding flags + is_16bit = (ptr & (1 << 63)) != 0 + is_utf8 = (ptr & (1 << 61)) != 0 + is_global = (ptr & (1 << 62)) != 0 + + # Determine encoding + encoding = 'utf16' if is_16bit else ('utf8' if is_utf8 else 'latin1') + flags = ' global' if is_global else '' + + if length == 0: + return '[%s%s] ""' % (encoding, flags) + + # Untag the pointer (keep only the lower 53 bits) + untagged_ptr = ptr & ((1 << 53) - 1) + + # Limit memory reads to 1MB for performance + MAX_BYTES = 1024 * 1024 # 1MB + MAX_DISPLAY_CHARS = 200 # Maximum characters to display + + # Calculate how much to read + bytes_per_char = 2 if is_16bit else 1 + total_bytes = length * bytes_per_char + truncated = False + + if total_bytes > MAX_BYTES: + # Read only first part of very large strings + chars_to_read = MAX_BYTES // bytes_per_char + bytes_to_read = chars_to_read * bytes_per_char + truncated = True + else: + bytes_to_read = total_bytes + + # Read the string data + process = value.process + error = lldb.SBError() + + byte_data = process.ReadMemory(untagged_ptr, bytes_to_read, error) + if not error.Success(): + return '[%s%s] ' % (encoding, flags) + + # Decode based on encoding + if is_16bit: + string_val = byte_data.decode('utf-16le', errors='replace') + elif is_utf8: + string_val = byte_data.decode('utf-8', errors='replace') + else: + string_val = byte_data.decode('latin1', errors='replace') + + # Escape special characters + string_val = string_val.replace('\\', '\\\\') + string_val = string_val.replace('"', '\\"') + string_val = string_val.replace('\n', '\\n') + string_val = string_val.replace('\r', '\\r') + string_val = string_val.replace('\t', '\\t') + + # Truncate display if too long + display_truncated = truncated or len(string_val) > MAX_DISPLAY_CHARS + if len(string_val) > MAX_DISPLAY_CHARS: + string_val = string_val[:MAX_DISPLAY_CHARS] + + # Build the output + if display_truncated: + size_info = ' %d chars' % length + if total_bytes >= 1024 * 1024: + size_info += ' (%.1fMB)' % (total_bytes / (1024.0 * 1024.0)) + elif total_bytes >= 1024: + size_info += ' (%.1fKB)' % (total_bytes / 1024.0) + return '[%s%s%s] "%s..." ' % (encoding, flags, size_info, string_val) + else: + return '[%s%s] "%s"' % (encoding, flags, string_val) + except: + return '' + +def bun_String_SummaryProvider(value, _=None): + try: + value = value.GetNonSyntheticValue() + + # Debug: Show the actual type name LLDB sees + type_name = value.GetTypeName() + + tag = value.GetChildMemberWithName('tag') + if not tag or not tag.IsValid(): + # Try alternate field names + tag = value.GetChildMemberWithName('Tag') + if not tag or not tag.IsValid(): + # Show type name to help debug + return '' % type_name + + tag_value = tag.GetValueAsUnsigned() + + # Map tag values to names + tag_names = { + 0: 'Dead', + 1: 'WTFStringImpl', + 2: 'ZigString', + 3: 'StaticZigString', + 4: 'Empty' + } + + tag_name = tag_names.get(tag_value, 'Unknown') + + if tag_name == 'Empty': + return '""' + elif tag_name == 'Dead': + return '' + elif tag_name == 'WTFStringImpl': + value_union = value.GetChildMemberWithName('value') + if not value_union or not value_union.IsValid(): + return '' + impl_value = value_union.GetChildMemberWithName('WTFStringImpl') + if not impl_value or not impl_value.IsValid(): + return '' + return WTFStringImpl_SummaryProvider(impl_value, _) + elif tag_name == 'ZigString' or tag_name == 'StaticZigString': + value_union = value.GetChildMemberWithName('value') + if not value_union or not value_union.IsValid(): + return '' + field_name = 'ZigString' if tag_name == 'ZigString' else 'StaticZigString' + zig_string_value = value_union.GetChildMemberWithName(field_name) + if not zig_string_value or not zig_string_value.IsValid(): + return '' % field_name + result = ZigString_SummaryProvider(zig_string_value, _) + # Add static marker if needed + if tag_name == 'StaticZigString': + result = result.replace(']', ' static]') + return result + else: + return '' % tag_value + except Exception as e: + return '' % str(e) + def __lldb_init_module(debugger, _=None): # Initialize Bun Category debugger.HandleCommand('type category define --language c99 bun') @@ -74,5 +309,30 @@ def __lldb_init_module(debugger, _=None): # Initialize Bun Data Structures add(debugger, category='bun', regex=True, type='^baby_list\\.BabyList\\(.*\\)$', identifier='bun_BabyList', synth=True, expand=True, summary=True) + # Add WTFStringImpl pretty printer - try multiple possible type names + add(debugger, category='bun', type='WTFStringImpl', identifier='WTFStringImpl', summary=True) + add(debugger, category='bun', type='*WTFStringImplStruct', identifier='WTFStringImpl', summary=True) + add(debugger, category='bun', type='string.WTFStringImpl', identifier='WTFStringImpl', summary=True) + add(debugger, category='bun', type='string.WTFStringImplStruct', identifier='WTFStringImpl', summary=True) + add(debugger, category='bun', type='*string.WTFStringImplStruct', identifier='WTFStringImpl', summary=True) + + # Add ZigString pretty printer - try multiple possible type names + add(debugger, category='bun', type='ZigString', identifier='ZigString', summary=True) + add(debugger, category='bun', type='bun.js.bindings.ZigString', identifier='ZigString', summary=True) + add(debugger, category='bun', type='bindings.ZigString', identifier='ZigString', summary=True) + + # Add bun.String pretty printer - try multiple possible type names + add(debugger, category='bun', type='String', identifier='bun_String', summary=True) + add(debugger, category='bun', type='bun.String', identifier='bun_String', summary=True) + add(debugger, category='bun', type='string.String', identifier='bun_String', summary=True) + add(debugger, category='bun', type='BunString', identifier='bun_String', summary=True) + add(debugger, category='bun', type='bun::String', identifier='bun_String', summary=True) + add(debugger, category='bun', type='bun::string::String', identifier='bun_String', summary=True) + + # Try regex patterns for more flexible matching + add(debugger, category='bun', regex=True, type='.*String$', identifier='bun_String', summary=True) + add(debugger, category='bun', regex=True, type='.*WTFStringImpl.*', identifier='WTFStringImpl', summary=True) + add(debugger, category='bun', regex=True, type='.*ZigString.*', identifier='ZigString', summary=True) + # Enable the category debugger.HandleCommand('type category enable bun') \ No newline at end of file