Update bundle_v2.zig

2026-02-16 13:51:47 +00:00 · 2025-02-23 22:21:40 -08:00 · 2025-02-23 20:26:30 -08:00 · 2025-02-23 20:09:43 -08:00 · 2025-02-23 20:08:53 -08:00 · 2025-02-23 20:04:22 -08:00
1 changed files with 122 additions and 0 deletions
--- a/src/bundler/bundle_v2.zig
+++ b/src/bundler/bundle_v2.zig
@@ -307,6 +307,128 @@ pub const ThreadPool = struct {

            this.data.transpiler.resolver.caches = CacheSet.Set.init(this.allocator);
            debug("Worker.create()", .{});
+
+            if (Environment.isLinux) {
+                if (!bun.getRuntimeFeatureFlag("BUN_FEATURE_FLAG_NO_CPU_AFFINITY") and bun.getThreadCount() > 2) {
+                    const Affinity = struct {
+                        var cpu_index: std.atomic.Value(usize) = std.atomic.Value(usize).init(0);
+                        var affinity_lock: bun.Mutex = .{};
+                        var shared_cpu_mask: std.os.linux.cpu_set_t = undefined;
+                        var is_initialized: bool = false;
+                        var core_to_ccx: [MAX_CPUS]u32 = undefined;
+                        var unique_ccx_count: u32 = 0;
+                        threadlocal var has_set_affinity_on_thread: bool = false;
+
+                        const MAX_CPUS = std.os.linux.CPU_SETSIZE;
+                        const CORE_GROUP_SIZE = 2;
+                        const DEFAULT_CORES_PER_CCX = 6;
+
+                        pub fn CPU_SET(cpu: usize, set: *std.os.linux.cpu_set_t) void {
+                            const word_index = cpu / @bitSizeOf(usize);
+                            const bit_index = cpu % @bitSizeOf(usize);
+                            set[word_index] |= @as(usize, 1) << @as(u6, @intCast(bit_index));
+                        }
+
+                        pub fn CPU_ISSET(cpu: usize, set: *const std.os.linux.cpu_set_t) bool {
+                            const word_index = cpu / @bitSizeOf(usize);
+                            const bit_index = cpu % @bitSizeOf(usize);
+                            return (set[word_index] & (@as(usize, 1) << @as(u6, @intCast(bit_index)))) != 0;
+                        }
+
+                        pub fn pin() void {
+                            if (!has_set_affinity_on_thread) {
+                                has_set_affinity_on_thread = true;
+
+                                affinity_lock.lock();
+                                defer affinity_lock.unlock();
+
+                                if (!is_initialized) {
+                                    shared_cpu_mask = std.mem.zeroes(std.os.linux.cpu_set_t);
+                                    if (std.os.linux.sched_getaffinity(0, @sizeOf(std.os.linux.cpu_set_t), &shared_cpu_mask) != 0) {
+                                        Output.debugWarn("Failed to get CPU affinity mask", .{});
+                                        return;
+                                    }
+
+                                    // Count available CPUs first
+                                    var available_cpus: u32 = 0;
+                                    for (0..MAX_CPUS) |cpu| {
+                                        if (CPU_ISSET(cpu, &shared_cpu_mask)) {
+                                            available_cpus += 1;
+                                        }
+                                    }
+
+                                    // Simple division of CPUs into CCX groups
+                                    unique_ccx_count = (available_cpus + DEFAULT_CORES_PER_CCX - 1) / DEFAULT_CORES_PER_CCX;
+                                    if (unique_ccx_count == 0) unique_ccx_count = 1;
+
+                                    // Map CPUs to CCX groups
+                                    var ccx_index: u32 = 0;
+                                    var cpus_in_current_ccx: u32 = 0;
+                                    for (0..MAX_CPUS) |cpu| {
+                                        if (CPU_ISSET(cpu, &shared_cpu_mask)) {
+                                            core_to_ccx[cpu] = ccx_index;
+                                            cpus_in_current_ccx += 1;
+                                            if (cpus_in_current_ccx >= DEFAULT_CORES_PER_CCX) {
+                                                ccx_index = (ccx_index + 1) % unique_ccx_count;
+                                                cpus_in_current_ccx = 0;
+                                            }
+                                        } else {
+                                            core_to_ccx[cpu] = std.math.maxInt(u32);
+                                        }
+                                    }
+
+                                    is_initialized = true;
+                                }
+
+                                var thread_mask = std.mem.zeroes(std.os.linux.cpu_set_t);
+                                const current_index = cpu_index.fetchAdd(1, .seq_cst);
+                                const target_ccx = current_index % unique_ccx_count;
+
+                                // Find all available CPUs in this CCX
+                                var ccx_cpus = std.ArrayList(usize).init(bun.default_allocator);
+                                defer ccx_cpus.deinit();
+
+                                for (0..MAX_CPUS) |cpu| {
+                                    if (CPU_ISSET(cpu, &shared_cpu_mask) and core_to_ccx[cpu] == target_ccx) {
+                                        ccx_cpus.append(cpu) catch break;
+                                    }
+                                }
+
+                                if (ccx_cpus.items.len > 0) {
+                                    const target_cpu = ccx_cpus.items[current_index % ccx_cpus.items.len];
+
+                                    // Add the target CPU and its closest neighbors within the CCX
+                                    for (ccx_cpus.items) |cpu| {
+                                        const distance = if (cpu >= target_cpu)
+                                            cpu - target_cpu
+                                        else
+                                            target_cpu - cpu;
+
+                                        if (distance <= CORE_GROUP_SIZE) {
+                                            CPU_SET(cpu, &thread_mask);
+                                        }
+                                    }
+
+                                    std.os.linux.sched_setaffinity(0, &thread_mask) catch |err| {
+                                        Output.debugWarn("Failed to set affinity for worker thread: {s}", .{@errorName(err)});
+                                        return;
+                                    };
+
+                                    debug("Pinned thread {d} to CCX {d} ({d} CPUs)", .{ current_index, target_ccx, ccx_cpus.items.len });
+                                } else {
+                                    std.os.linux.sched_setaffinity(0, &shared_cpu_mask) catch |err| {
+                                        Output.debugWarn("Failed to set affinity for worker thread: {s}", .{@errorName(err)});
+                                        return;
+                                    };
+                                    debug("Thread {d} using full CPU mask (no CCX CPUs available)", .{current_index});
+                                }
+                            }
+                        }
+                    };
+
+                    Affinity.pin();
+                }
+            }
        }

        pub fn run(this: *Worker, ctx: *BundleV2) void {
Author	SHA1	Message	Date
Jarred Sumner	6db6b1a572	Update bundle_v2.zig	2025-02-23 22:21:40 -08:00
Jarred Sumner	b577050a2d	Update bundle_v2.zig	2025-02-23 20:26:30 -08:00
Jarred Sumner	6a63e3600f	Update bundle_v2.zig	2025-02-23 20:09:43 -08:00
Jarred Sumner	d406dce432	Update bundle_v2.zig	2025-02-23 20:08:53 -08:00
Jarred Sumner	66df4f7be4	Update bundle_v2.zig	2025-02-23 20:04:22 -08:00
Jarred Sumner	3ec7a2ae63	Try CPU pinning on Linux	2025-02-23 19:51:14 -08:00