Compare commits

...

6 Commits

Author SHA1 Message Date
Claude
9bb697dfb3 Retrigger CI: Windows failures are pre-existing flakes unrelated to this PR
Windows test failures in build #39164 are all pre-existing:
- test/v8/v8.test.ts (windows-2019-x64, windows-11-aarch64)
- test/napi/napi.test.ts (windows-2019-x64-baseline)
- test/cli/install/bun-install-lifecycle-scripts.test.ts (windows-2019-x64-baseline)
- test/js/node/test/parallel/test-repl-close.js (windows-2019-x64-baseline)

None involve worker_threads, JSCTaskScheduler, or Worker.cpp.
darwin-aarch64 "Expired" jobs were infra timeouts.
2026-03-10 04:42:49 +00:00
Alistair Smith
149b44a57e Merge branch 'main' into claude/deflake-worker-threads-test 2026-03-09 19:58:58 -07:00
Claude
88798e98cc Fix eventLoop ref counting magnitude and terminate() idempotency
- Bun__eventLoop__incrementRefConcurrently now honors the full delta
  magnitude instead of always ref/unref by 1. This fixes cancelAllPendingWork
  leaking event loop refs when more than one ticket was alive.

- Worker::terminate() uses atomic fetch_or to guarantee exactly one call
  to WebWorker__notifyNeedTermination, closing the double-call race.
2026-03-10 02:22:32 +00:00
Alistair Smith
bd3dea0411 Merge branch 'main' into claude/deflake-worker-threads-test 2026-03-09 18:48:08 -07:00
Claude Bot
e2c90938b9 fix: use clear() instead of cancelAndClear() for cross-library compat
cancelAndClear() is an inline function defined in WebKit's .cpp file,
not exported from the static library. Simply clearing the sets drops
the Ref<TicketData>, which destroys the tickets and automatically
clears the weak back-references in GlobalObject's m_weakTickets.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-09 22:45:59 +00:00
Claude Bot
a6cf872c6b fix(worker): cancel deferred work tickets during worker VM shutdown
The worker_threads test has been chronically flaky, crashing with a
segfault at address 0x10 during GC marking constraints on Windows,
macOS, and Alpine.

Root cause: Bun routes DeferredWorkTimer tickets to its own
JSCTaskScheduler instead of JSC's internal m_pendingTickets. During
worker VM shutdown, JSC's GC cleanup (cancelPendingWork) sees an empty
m_pendingTickets and does nothing, leaving stale JSCell* dependency
pointers in the scheduler's ticket sets. When the GC's core constraints
then visit these stale dependencies during collectNow(), it crashes.

Fix:
- Add JSCTaskScheduler::cancelAllPendingWork() to cancel and clear all
  tickets held by Bun's scheduler
- Call it in WebWorker__dispatchExit before the final GC collection
- Guard Worker::terminate() against calling into freed WebWorker memory
  when the worker has already been terminated
- Fix tests to properly terminate all workers (several tests were
  creating workers without calling terminate(), leaving them to be
  cleaned up during process exit)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-09 22:22:40 +00:00
5 changed files with 55 additions and 9 deletions

View File

@@ -93,6 +93,25 @@ static void runPendingWork(void* bunVM, Bun::JSCTaskScheduler& scheduler, JSCDef
delete job;
}
void JSCTaskScheduler::cancelAllPendingWork(void* bunVM)
{
Locker<Lock> holder { m_lock };
int eventLoopTickets = static_cast<int>(m_pendingTicketsKeepingEventLoopAlive.size());
// Clear both ticket sets. Dropping the Ref releases our hold on each ticket;
// the TicketData destructor will run if no other reference exists, which
// automatically clears the weak back-reference in the GlobalObject's
// m_weakTickets set so the GC won't visit stale JSCell* dependencies.
m_pendingTicketsKeepingEventLoopAlive.clear();
m_pendingTicketsOther.clear();
holder.unlockEarly();
if (eventLoopTickets > 0)
Bun__eventLoop__incrementRefConcurrently(bunVM, -eventLoopTickets);
}
extern "C" void Bun__runDeferredWork(Bun::JSCDeferredWorkTask* job)
{
auto& vm = job->vm();

View File

@@ -20,6 +20,9 @@ public:
static void onScheduleWorkSoon(WebCore::JSVMClientData* clientData, JSC::DeferredWorkTimer::Ticket ticket, JSC::DeferredWorkTimer::Task&& task);
static void onCancelPendingWork(WebCore::JSVMClientData* clientData, JSC::DeferredWorkTimer::Ticket ticket);
// Cancel all pending tickets, used during VM shutdown to prevent GC from visiting stale dependencies
void cancelAllPendingWork(void* bunVM);
public:
Lock m_lock;
UncheckedKeyHashSet<Ref<JSC::DeferredWorkTimer::TicketData>> m_pendingTicketsKeepingEventLoopAlive;

View File

@@ -27,6 +27,7 @@
#include "config.h"
#include "Worker.h"
#include "BunClientData.h"
// #include "ContentSecurityPolicy.h"
// #include "DedicatedWorkerGlobalScope.h"
#include "ErrorCode.h"
@@ -262,7 +263,10 @@ ExceptionOr<void> Worker::postMessage(JSC::JSGlobalObject& state, JSC::JSValue m
void Worker::terminate()
{
// m_contextProxy.terminateWorkerGlobalScope();
m_terminationFlags.fetch_or(TerminateRequestedFlag);
// Use fetch_or to atomically set TerminateRequestedFlag and check the old value.
// If either flag was already set, we've already initiated or completed termination.
if (m_terminationFlags.fetch_or(TerminateRequestedFlag) & (TerminateRequestedFlag | TerminatedFlag))
return;
WebWorker__notifyNeedTermination(impl_);
}
@@ -475,6 +479,17 @@ extern "C" void WebWorker__dispatchExit(Zig::GlobalObject* globalObject, Worker*
auto& vm = JSC::getVM(globalObject);
vm.setHasTerminationRequest();
// Cancel all pending deferred work tickets before the final GC.
// Bun routes DeferredWorkTimer tickets to JSCTaskScheduler instead of
// JSC's internal m_pendingTickets. Without explicitly cancelling them here,
// the GC's core constraints may visit stale JSCell* dependencies held by
// tickets that JSC doesn't know about, causing a use-after-free crash.
{
auto* clientData = WebCore::clientData(vm);
if (clientData)
clientData->deferredWorkTimer.cancelAllPendingWork(clientData->bunVM);
}
{
auto scope = DECLARE_THROW_SCOPE(vm);
auto* esmRegistryMap = globalObject->esmRegistryMap();

View File

@@ -16,10 +16,13 @@ export fn Bun__eventLoop__incrementRefConcurrently(jsc_vm: *VirtualMachine, delt
jsc.markBinding(@src());
if (delta > 0) {
jsc_vm.event_loop.refConcurrently();
_ = jsc_vm.event_loop.concurrent_ref.fetchAdd(@intCast(delta), .seq_cst);
} else if (delta < 0) {
_ = jsc_vm.event_loop.concurrent_ref.fetchSub(@intCast(-delta), .seq_cst);
} else {
jsc_vm.event_loop.unrefConcurrently();
return;
}
jsc_vm.event_loop.wakeup();
}
export fn Bun__queueJSCDeferredWorkTaskConcurrently(jsc_vm: *VirtualMachine, task: *JSCScheduler.JSCDeferredWorkTask) void {

View File

@@ -295,7 +295,7 @@ test("eval does not leak source code", async () => {
});
describe("worker event", () => {
test("is emitted on the next tick with the right value", () => {
test("is emitted on the next tick with the right value", async () => {
const { promise, resolve } = Promise.withResolvers();
let worker: Worker | undefined = undefined;
let called = false;
@@ -306,14 +306,15 @@ describe("worker event", () => {
});
worker = new Worker(new URL("data:text/javascript,"));
expect(called).toBeFalse();
return promise;
await promise;
await worker.terminate();
});
test("uses an overridden process.emit function", async () => {
const previousEmit = process.emit;
let worker: Worker | undefined;
try {
const { promise, resolve, reject } = Promise.withResolvers();
let worker: Worker | undefined;
// should not actually emit the event
process.on("worker", expect.unreachable);
worker = new Worker("", { eval: true });
@@ -331,6 +332,7 @@ describe("worker event", () => {
} finally {
process.emit = previousEmit;
process.off("worker", expect.unreachable);
if (worker) await worker.terminate();
}
});
@@ -361,6 +363,7 @@ describe("environmentData", () => {
);
const [msg] = await once(worker, "message");
expect(msg).toEqual(new Map([["hello", "world"]]));
await worker.terminate();
});
test("child modifications do not affect parent", async () => {
@@ -424,7 +427,7 @@ describe("error event", () => {
});
describe("getHeapSnapshot", () => {
test("throws if the wrong options are passed", () => {
test("throws if the wrong options are passed", async () => {
const worker = new Worker("", { eval: true });
// @ts-expect-error
expect(() => worker.getHeapSnapshot(0)).toThrow({
@@ -441,15 +444,17 @@ describe("getHeapSnapshot", () => {
name: "TypeError",
message: 'The "options.exposeNumericValues" property must be of type boolean. Received type number (0)',
});
await worker.terminate();
});
test("returns a rejected promise if the worker is not running", () => {
test("returns a rejected promise if the worker is not running", async () => {
const worker = new Worker("", { eval: true });
expect(worker.getHeapSnapshot()).rejects.toMatchObject({
await expect(worker.getHeapSnapshot()).rejects.toMatchObject({
name: "Error",
code: "ERR_WORKER_NOT_RUNNING",
message: "Worker instance not running",
});
await worker.terminate();
});
test("resolves to a Stream.Readable with JSON text in V8 format", async () => {
@@ -485,5 +490,6 @@ describe("getHeapSnapshot", () => {
"trace_tree",
]);
worker.postMessage(0);
await once(worker, "exit");
});
});