Use low alignment bits instead of truncating high pointer bits

Changed PackedNext implementation to preserve ARM TBI/PAC/MTE pointer metadata: - Store full usize pointer with auto_delete flag in LOW alignment bit (not high bit) - ConcurrentTask is 8-byte aligned (contains u64 Task), so low bit is guaranteed zero - Use enum(usize) { zero = 0, _ } for type safety and clean initialization syntax - All atomic operations use usize (instead of u64) with @intFromEnum/@enumFromInt - Masks: AUTO_DELETE_MASK=0x1, POINTER_MASK=~0x1 Benefits: - Preserves all high pointer metadata (ARM pointer authentication, tagging, etc.) - Uses guaranteed-zero low alignment bits instead of truncating address space - Type-safe enum wrapper prevents accidental misuse - Clean .zero initialization syntax
Add safety improvements and encapsulation
2026-02-06 08:58:52 +00:00 · 2025-11-03 03:18:20 +00:00 · 2025-11-02 12:40:50 +00:00 · 2025-11-02 12:33:42 +00:00 · 2025-11-02 12:05:59 +00:00 · 2025-11-02 11:38:24 +00:00
7 changed files with 161 additions and 29 deletions
--- a/src/bake/DevServer/WatcherAtomics.zig
+++ b/src/bake/DevServer/WatcherAtomics.zig
@@ -132,10 +132,10 @@ pub fn watcherReleaseAndSubmitEvent(self: *Self, ev: *HotReloadEvent) void {
                self.dbg_server_event = ev;
            }
            ev.concurrent_task = .{
-                .auto_delete = false,
-                .next = null,
+                .next = .zero,
                .task = jsc.Task.init(ev),
            };
+            ev.concurrent_task.next.setAutoDelete(false);
            ev.owner.vm.event_loop.enqueueTaskConcurrent(&ev.concurrent_task);
        },

--- a/src/bun.js/event_loop.zig
+++ b/src/bun.js/event_loop.zig
@@ -321,7 +321,7 @@ pub fn tickConcurrentWithCount(this: *EventLoop) usize {
            dest.deinit();
        }

-        if (task.auto_delete) {
+        if (task.auto_delete()) {
            to_destroy = task;
        }

--- a/src/bun.js/event_loop/ConcurrentTask.zig
+++ b/src/bun.js/event_loop/ConcurrentTask.zig
@@ -11,23 +11,96 @@
 const ConcurrentTask = @This();

 task: Task = undefined,
-next: ?*ConcurrentTask = null,
-auto_delete: bool = false,
+next: PackedNext = .zero,

-pub const Queue = UnboundedQueue(ConcurrentTask, .next);
+pub const PackedNext = enum(usize) {
+    // Store the full usize pointer with auto_delete flag in the low alignment bit
+    // ConcurrentTask contains a u64 Task field, so it's at least 8-byte aligned
+    // This preserves all pointer metadata (ARM TBI/PAC/MTE tags, etc.)
+    zero = 0,
+    _,
+
+    const AUTO_DELETE_MASK: usize = 0x1;
+    const POINTER_MASK: usize = ~AUTO_DELETE_MASK;
+
+    pub fn init(ptr: ?*ConcurrentTask) PackedNext {
+        if (ptr) |p| {
+            const addr = @intFromPtr(p);
+            // Pointer should be aligned, verify low bit is zero
+            if (bun.Environment.allow_assert) {
+                bun.assertf((addr & AUTO_DELETE_MASK) == 0, "ConcurrentTask pointer must be aligned", .{});
+            }
+            // Store pointer with auto_delete = false (low bit = 0)
+            return @enumFromInt(addr);
+        }
+        return @enumFromInt(0);
+    }
+
+    pub fn initPreserveAutoDelete(self: PackedNext, ptr: ?*ConcurrentTask) PackedNext {
+        const self_val = @intFromEnum(self);
+        if (ptr) |p| {
+            const addr = @intFromPtr(p);
+            // Pointer should be aligned, verify low bit is zero
+            if (bun.Environment.allow_assert) {
+                bun.assertf((addr & AUTO_DELETE_MASK) == 0, "ConcurrentTask pointer must be aligned", .{});
+            }
+            // Combine new pointer with existing auto_delete flag
+            return @enumFromInt(addr | (self_val & AUTO_DELETE_MASK));
+        }
+        // Null pointer but preserve auto_delete flag
+        return @enumFromInt(self_val & AUTO_DELETE_MASK);
+    }
+
+    pub fn get(self: PackedNext) ?*ConcurrentTask {
+        // Mask out the auto_delete bit to get the original pointer
+        const addr = @intFromEnum(self) & POINTER_MASK;
+        if (addr == 0) return null;
+        return @ptrFromInt(addr);
+    }
+
+    pub fn autoDelete(self: PackedNext) bool {
+        return (@intFromEnum(self) & AUTO_DELETE_MASK) != 0;
+    }
+
+    pub fn setAutoDelete(self: *PackedNext, value: bool) void {
+        // Non-atomic write is safe because this is only called during initialization
+        // before the task is shared with other threads
+        const self_val = @intFromEnum(self.*);
+        if (value) {
+            self.* = @enumFromInt(self_val | AUTO_DELETE_MASK);
+        } else {
+            self.* = @enumFromInt(self_val & POINTER_MASK);
+        }
+    }
+
+    comptime {
+        if (@sizeOf(PackedNext) != @sizeOf(usize)) {
+            @compileError("PackedNext must be the same size as a usize");
+        }
+    }
+};
+
+pub const Queue = bun.threading.UnboundedQueuePacked(ConcurrentTask, .next, .@"packed");
 pub const new = bun.TrivialNew(@This());
 pub const deinit = bun.TrivialDeinit(@This());

+/// Returns whether this task should be automatically deleted after completion.
+/// The auto_delete flag being stored in the next field is an implementation detail.
+pub inline fn auto_delete(this: *const ConcurrentTask) bool {
+    return this.next.autoDelete();
+}
+
 pub const AutoDeinit = enum {
    manual_deinit,
    auto_deinit,
 };
 pub fn create(task: Task) *ConcurrentTask {
-    return ConcurrentTask.new(.{
+    var concurrent_task = ConcurrentTask.new(.{
        .task = task,
-        .next = null,
-        .auto_delete = true,
+        .next = .zero,
    });
+    concurrent_task.next.setAutoDelete(true);
+    return concurrent_task;
 }

 pub fn createFrom(task: anytype) *ConcurrentTask {
@@ -46,16 +119,22 @@ pub fn from(this: *ConcurrentTask, of: anytype, auto_deinit: AutoDeinit) *Concur

    this.* = .{
        .task = Task.init(of),
-        .next = null,
-        .auto_delete = auto_deinit == .auto_deinit,
+        .next = .zero,
    };
+    this.next.setAutoDelete(auto_deinit == .auto_deinit);
    return this;
 }

-const std = @import("std");
+comptime {
+    // Verify that ConcurrentTask is 16 bytes (not 24)
+    // Task is 8 bytes (u64), PackedNext is 8 bytes (u64) = 16 bytes total
+    if (@sizeOf(ConcurrentTask) != 16) {
+        @compileError(bun.fmt.comptimePrint("ConcurrentTask should be 16 bytes, but it's {d} bytes", .{@sizeOf(ConcurrentTask)}));
+    }
+}

 const bun = @import("bun");
-const UnboundedQueue = bun.threading.UnboundedQueue;
+const std = @import("std");

 const jsc = bun.jsc;
 const ManagedTask = jsc.ManagedTask;
--- a/src/bun.js/event_loop/JSCScheduler.zig
+++ b/src/bun.js/event_loop/JSCScheduler.zig
@@ -25,11 +25,12 @@ export fn Bun__eventLoop__incrementRefConcurrently(jsc_vm: *VirtualMachine, delt
 export fn Bun__queueJSCDeferredWorkTaskConcurrently(jsc_vm: *VirtualMachine, task: *JSCScheduler.JSCDeferredWorkTask) void {
    jsc.markBinding(@src());
    var loop = jsc_vm.eventLoop();
-    loop.enqueueTaskConcurrent(ConcurrentTask.new(.{
+    const concurrent_task = ConcurrentTask.new(.{
        .task = Task.init(task),
-        .next = null,
-        .auto_delete = true,
-    }));
+        .next = .zero,
+    });
+    concurrent_task.next.setAutoDelete(true);
+    loop.enqueueTaskConcurrent(concurrent_task);
 }

 export fn Bun__tickWhilePaused(paused: *bool) void {
--- a/src/bun.js/hot_reloader.zig
+++ b/src/bun.js/hot_reloader.zig
@@ -225,7 +225,8 @@ pub fn NewHotReloader(comptime Ctx: type, comptime EventLoopType: type, comptime
                    .hashes = this.hashes,
                    .concurrent_task = undefined,
                });
-                that.concurrent_task = .{ .task = jsc.Task.init(that), .auto_delete = false };
+                that.concurrent_task = .{ .task = jsc.Task.init(that), .next = .zero };
+                that.concurrent_task.next.setAutoDelete(false);
                that.reloader.enqueueTaskConcurrent(&that.concurrent_task);
                this.count = 0;
            }
--- a/src/threading.zig
+++ b/src/threading.zig
@@ -9,3 +9,4 @@ pub const WaitGroup = @import("./threading/WaitGroup.zig");
 pub const ThreadPool = @import("./threading/ThreadPool.zig");
 pub const Channel = @import("./threading/channel.zig").Channel;
 pub const UnboundedQueue = @import("./threading/unbounded_queue.zig").UnboundedQueue;
+pub const UnboundedQueuePacked = @import("./threading/unbounded_queue.zig").UnboundedQueuePacked;
--- a/src/threading/unbounded_queue.zig
+++ b/src/threading/unbounded_queue.zig
@@ -6,10 +6,30 @@ pub const cache_line_length = switch (@import("builtin").target.cpu.arch) {
 };

 pub fn UnboundedQueue(comptime T: type, comptime next_field: meta.FieldEnum(T)) type {
+    return UnboundedQueuePacked(T, next_field, .unpacked);
+}
+
+pub fn UnboundedQueuePacked(comptime T: type, comptime next_field: meta.FieldEnum(T), comptime packed_or_unpacked: enum { @"packed", unpacked }) type {
    const next_name = meta.fieldInfo(T, next_field).name;
    return struct {
        const Self = @This();

+        fn getter(instance: *T) ?*T {
+            if (packed_or_unpacked == .@"packed") {
+                return @field(instance, next_name).get();
+            } else {
+                return @field(instance, next_name);
+            }
+        }
+
+        fn setter(instance: *T, value: ?*T) void {
+            if (packed_or_unpacked == .@"packed") {
+                @field(instance, next_name) = @field(instance, next_name).initPreserveAutoDelete(value);
+            } else {
+                @field(instance, next_name) = value;
+            }
+        }
+
        pub const Batch = struct {
            pub const Iterator = struct {
                batch: Self.Batch,
@@ -17,7 +37,7 @@ pub fn UnboundedQueue(comptime T: type, comptime next_field: meta.FieldEnum(T))
                pub fn next(self: *Self.Batch.Iterator) ?*T {
                    if (self.batch.count == 0) return null;
                    const front = self.batch.front orelse unreachable;
-                    self.batch.front = @field(front, next_name);
+                    self.batch.front = getter(front);
                    self.batch.count -= 1;
                    return front;
                }
@@ -43,25 +63,43 @@ pub fn UnboundedQueue(comptime T: type, comptime next_field: meta.FieldEnum(T))
        }

        pub fn pushBatch(self: *Self, first: *T, last: *T) void {
-            @field(last, next) = null;
+            setter(last, null);
            if (comptime bun.Environment.allow_assert) {
                var item = first;
-                while (@field(item, next)) |next_item| {
+                while (getter(item)) |next_item| {
                    item = next_item;
                }
                assertf(item == last, "`last` should be reachable from `first`", .{});
            }
-            const prev_next_ptr = if (self.back.swap(last, .acq_rel)) |old_back|
-                &@field(old_back, next)
-            else
-                &self.front.raw;
-            @atomicStore(?*T, prev_next_ptr, first, .release);
+
+            if (self.back.swap(last, .acq_rel)) |old_back| {
+                // There was a previous back item - set its `next` field to point to `first`
+                if (packed_or_unpacked == .@"packed") {
+                    // We need to preserve old_back's auto_delete flag while updating the pointer
+                    const prev_next_ptr = @as(*usize, @ptrCast(&@field(old_back, next)));
+                    const current_packed = @atomicLoad(usize, prev_next_ptr, .monotonic);
+                    const current_typed: @TypeOf(@field(first, next_name)) = @enumFromInt(current_packed);
+                    const new_packed = current_typed.initPreserveAutoDelete(first);
+                    @atomicStore(usize, prev_next_ptr, @intFromEnum(new_packed), .release);
+                } else {
+                    @atomicStore(?*T, &@field(old_back, next), first, .release);
+                }
+            } else {
+                // Queue was empty - set front to point to `first`
+                // front/back atomics hold plain pointers, not PackedNext
+                @atomicStore(?*T, &self.front.raw, first, .release);
+            }
        }

        pub fn pop(self: *Self) ?*T {
            var first = self.front.load(.acquire) orelse return null;
            const next_item = while (true) {
-                const next_item = @atomicLoad(?*T, &@field(first, next), .acquire);
+                const next_item = if (packed_or_unpacked == .@"packed") blk: {
+                    const packed_val = @atomicLoad(usize, @as(*usize, @ptrCast(&@field(first, next))), .acquire);
+                    const packed_typed: @TypeOf(@field(first, next_name)) = @enumFromInt(packed_val);
+                    break :blk packed_typed.get();
+                } else @atomicLoad(?*T, &@field(first, next), .acquire);
+
                const maybe_first = self.front.cmpxchgWeak(
                    first,
                    next_item,
@@ -85,7 +123,13 @@ pub fn UnboundedQueue(comptime T: type, comptime next_field: meta.FieldEnum(T))
            // Another item was added to the queue before we could finish removing this one.
            const new_first = while (true) : (atomic.spinLoopHint()) {
                // Wait for push/pushBatch to set `next`.
-                break @atomicLoad(?*T, &@field(first, next), .acquire) orelse continue;
+                if (packed_or_unpacked == .@"packed") {
+                    const packed_val = @atomicLoad(usize, @as(*usize, @ptrCast(&@field(first, next))), .acquire);
+                    const packed_typed: @TypeOf(@field(first, next_name)) = @enumFromInt(packed_val);
+                    break packed_typed.get() orelse continue;
+                } else {
+                    break @atomicLoad(?*T, &@field(first, next), .acquire) orelse continue;
+                }
            };

            self.front.store(new_first, .release);
@@ -108,7 +152,13 @@ pub fn UnboundedQueue(comptime T: type, comptime next_field: meta.FieldEnum(T))
            while (next_item != last) : (batch.count += 1) {
                next_item = while (true) : (atomic.spinLoopHint()) {
                    // Wait for push/pushBatch to set `next`.
-                    break @atomicLoad(?*T, &@field(next_item, next), .acquire) orelse continue;
+                    if (packed_or_unpacked == .@"packed") {
+                        const packed_val = @atomicLoad(usize, @as(*usize, @ptrCast(&@field(next_item, next))), .acquire);
+                        const packed_typed: @TypeOf(@field(next_item, next_name)) = @enumFromInt(packed_val);
+                        break packed_typed.get() orelse continue;
+                    } else {
+                        break @atomicLoad(?*T, &@field(next_item, next), .acquire) orelse continue;
+                    }
                };
            }
Author	SHA1	Message	Date
Claude Bot	bd90052694	Use low alignment bits instead of truncating high pointer bits Changed PackedNext implementation to preserve ARM TBI/PAC/MTE pointer metadata: - Store full usize pointer with auto_delete flag in LOW alignment bit (not high bit) - ConcurrentTask is 8-byte aligned (contains u64 Task), so low bit is guaranteed zero - Use enum(usize) { zero = 0, _ } for type safety and clean initialization syntax - All atomic operations use usize (instead of u64) with @intFromEnum/@enumFromInt - Masks: AUTO_DELETE_MASK=0x1, POINTER_MASK=~0x1 Benefits: - Preserves all high pointer metadata (ARM pointer authentication, tagging, etc.) - Uses guaranteed-zero low alignment bits instead of truncating address space - Type-safe enum wrapper prevents accidental misuse - Clean .zero initialization syntax	2025-11-03 03:18:20 +00:00
Claude Bot	063a5a9775	Add safety improvements and encapsulation 1. Explicitly zero out bit 63 when unpacking pointer to avoid any UB - Added mask: ptr_bits & 0x7FFFFFFFFFFFFFFF before ptrFromInt 2. Add ConcurrentTask.auto_delete() getter to hide implementation detail - The fact that auto_delete is packed into next field is now internal - Updated event_loop.zig to use the public getter - Makes the API cleaner and easier to maintain	2025-11-02 12:40:50 +00:00
autofix-ci[bot]	27a6cb4fbd	[autofix.ci] apply automated fixes	2025-11-02 12:33:42 +00:00
Claude Bot	ade70ec09e	Fix atomic operations to preserve auto_delete flag The previous implementation had a critical bug where atomic stores in pushBatch would overwrite the auto_delete flag. This fix ensures: 1. When atomically updating the next pointer, we preserve the existing auto_delete flag by doing a read-modify-write 2. All atomic operations use consistent types: - u64 for packed structs (via bitcast) - ?*T for plain pointers (front/back queue pointers) 3. Added initPreserveAutoDelete() method to create new PackedNext values while preserving the auto_delete bit The key insight is that front/back hold plain pointers to tasks, while each task's 'next' field holds a PackedNext struct. When updating a task's next pointer via pushBatch, we must preserve that task's auto_delete flag.	2025-11-02 12:05:59 +00:00
Claude Bot	0c52688060	Reduce ConcurrentTask size from 24 to 16 bytes by packing auto_delete Pack the auto_delete boolean into the lowest bit of the next pointer in ConcurrentTask, reducing memory usage by 33% (from 24 to 16 bytes). Implementation details: - Created PackedNext struct that packs u63 pointer + bool into u64 - Extended UnboundedQueue to support packed pointer types via UnboundedQueuePacked function - Added getter/setter helpers to abstract packed vs unpacked access - All atomic operations properly handle packed pointer unpacking - Added compile-time assertion to ensure ConcurrentTask is 16 bytes The next pointer uses only 63 bits since pointers on 64-bit systems don't use the full address space, leaving the LSB for the auto_delete flag.	2025-11-02 11:38:24 +00:00