Files
bun.sh/src/codegen/class-definitions.ts
Jarred Sumner bd88717ddc codegen: Add WriteBarrierEarlyInit support for classes with values and valuesArray (#23624)
## Summary

Adds comprehensive support to `generate-classes.ts` for JavaScript
classes that need both named WriteBarrier members (like callbacks) and a
dynamic array of JSValues, all properly tracked by the garbage
collector. This replaces error-prone manual `protect()/unprotect()`
calls with proper GC integration.

## Motivation

The shell interpreter was using `JSValue.protect()/unprotect()` to keep
JavaScript objects alive, which caused memory leaks when cleanup paths
didn't properly unprotect values. This is a common pattern that needed a
better solution.

## What Changed

### Code Generator (`generate-classes.ts`)

When a class has both `values: ["resolve", "reject"]` and `valuesArray:
true`:

**Generated C++ class gets:**
- `WTF::FixedVector<JSC::WriteBarrier<JSC::Unknown>> jsvalueArray`
member for dynamic array
- Individual `JSC::WriteBarrier<JSC::Unknown> m_resolve, m_reject`
members for named values
- 4 `create()` overloads covering all combinations:
  1. Basic: `create(vm, globalObject, structure, ptr)`
  2. Array only: `create(..., FixedVector<WriteBarrier<Unknown>>&&)`
  3. Named values: `create(..., JSValue resolve, JSValue reject)` 
  4. Both: `create(..., FixedVector&&, JSValue resolve, JSValue reject)`

**Constructor overloads using `WriteBarrierEarlyInit`:**
```cpp
JSShellInterpreter(VM& vm, Structure* structure, void* ptr, 
                   JSValue resolve, JSValue reject)
    : Base(vm, structure)
    , m_resolve(resolve, JSC::WriteBarrierEarlyInit)  // ← Key technique
    , m_reject(reject, JSC::WriteBarrierEarlyInit)
{
    m_ctx = ptr;
}
```

The `WriteBarrierEarlyInit` tag allows initializing WriteBarriers in the
constructor initializer list before the object is fully constructed,
which is required for proper GC integration.

**Extern C bridge functions:**
- `TypeName__createWithValues(globalObject, ptr, markedArgumentBuffer*)`
- `TypeName__createWithInitialValues(globalObject, ptr, resolve,
reject)`
- `TypeName__createWithValuesAndInitialValues(globalObject, ptr,
buffer*, resolve, reject)`

**Zig convenience wrappers:**
- `toJSWithValues(this, globalObject, markedArgumentBuffer)`
- `toJSWithInitialValues(this, globalObject, resolve, reject)`
- `toJSWithValuesAndInitialValues(this, globalObject, buffer, resolve,
reject)`

### Shell Interpreter Memory Leak Fix

**Before:**
```zig
const js_value = JSShellInterpreter.toJS(interpreter, globalThis);
resolve.protect();  // Manual reference counting
reject.protect();
// ... later in cleanup ...
resolve.unprotect();  // Easy to forget/miss in error paths
reject.unprotect();
```

**After:**
```zig
const js_value = Bun__createShellInterpreter(
    globalThis, 
    interpreter,
    parsed_shell_script,
    resolve,  // Stored with WriteBarrierEarlyInit
    reject,   // GC tracks automatically
);
// No manual memory management needed!
```

### Supporting Changes

- Added `MarkedArgumentBuffer.wrap()` helper in Zig for safe
MarkedArgumentBuffer usage
- Created `ShellBindings.cpp` with `Bun__createShellInterpreter()` using
the new API
- Removed all `protect()/unprotect()` calls from shell interpreter
- Applied pattern to both `ShellInterpreter` and `ShellArgs` classes

## Benefits

1. **No memory leaks**: GC tracks all references automatically
2. **Safer**: Cannot forget to unprotect values
3. **Cleaner code**: No manual reference counting
4. **Reusable**: Pattern works for any class needing to store JSValues
5. **Performance**: Same cost as manual protect/unprotect but safer

## Testing

Existing shell tests verify the functionality. The pattern is already
used throughout JavaScriptCore for similar cases (see
`JSWrappingFunction`, `AsyncContextFrame`, `JSModuleMock`, etc.)

## When to Use This Pattern

Use `values` + `valuesArray` + `WriteBarrierEarlyInit` when:
- Your C++ class needs to keep JavaScript values alive
- You have both known named callbacks AND dynamic arrays of values
- You want the GC to track references instead of manual
protect/unprotect
- Your class extends `JSDestructibleObject`

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-10-13 19:15:38 -07:00

308 lines
8.0 KiB
TypeScript

interface PropertyAttribute {
enumerable?: boolean;
configurable?: boolean;
/**
* The name for a private symbol to use as the property name. The value should
* be a private symbol from `BunBuiltinNames.h`. This will omit the property
* from the prototype hash table, instead setting it using `putDirect()`.
*/
privateSymbol?: string;
publicSymbol?: string;
name?: string;
}
/**
* Specifies what happens when a method is called with `this` set to a value that is not an instance
* of the class.
*/
export enum InvalidThisBehavior {
/**
* Default. Throws a `TypeError`.
*/
Throw,
/**
* Do not call the native implementation; return `undefined`. Some Node.js methods are supposed to
* work like this.
*/
NoOp,
}
export type Field =
| ({
getter: string;
cache?: true | string;
/**
* Allow overriding the value of the property
*/
writable?: boolean;
this?: boolean;
} & PropertyAttribute)
| { value: string }
| ({ setter: string; this?: boolean } & PropertyAttribute)
| ({
accessor: { getter: string; setter: string };
cache?: true | string;
this?: boolean;
} & PropertyAttribute)
| ({
fn: string;
/**
* Mark it as an async function in the TypeScript definition.
*
* Does not do anything at runtime.
*/
async?: boolean;
/**
* Number of parameters accepted by the function.
*
* Sets [`function.length`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/length).
*/
length?: number;
passThis?: boolean;
invalidThisBehavior?: InvalidThisBehavior;
DOMJIT?: {
returns: string;
args?: [string, string] | [string, string, string] | [string] | [];
pure?: boolean;
};
} & PropertyAttribute)
| { internal: true }
| {
/**
* The function is a builtin (its implementation is defined in
* src/js/builtins/), this value is the name of the code generator
* function: `camelCase(fileName + functionName + "CodeGenerator"`)
*/
builtin: string;
/**
* Number of parameters accepted by the function.
*
* Sets [`function.length`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Function/length).
*/
length?: number;
};
export class ClassDefinition {
/**
* Class name.
*
* Used to find the proper struct and as the `.name` of the JS constructor
* function.
*/
name: string;
/**
* Class constructor is newable. Called before the JSValue corresponding to
* the object is created. Throwing an exception prevents the object from being
* created.
*/
construct?: boolean;
/**
* Class constructor needs `this` value.
*
* Makes the code generator call the Zig constructor function **after** the
* JSValue is instantiated. Only use this if you must, as it probably isn't
* good for GC since it means if the constructor throws the GC will have to
* clean up the object that never reached JS.
*/
constructNeedsThis?: boolean;
/**
* Class constructor is callable. In JS, ES6 class constructors are not
* callable.
*/
call?: boolean;
/**
* The instances of this class are intended to be inside the this of a bound function.
*/
forBind?: boolean;
/**
* ## IMPORTANT
* You _must_ free the pointer to your native class!
*
* Example for pointers only owned by JavaScript classes:
* ```zig
* pub const NativeClass = struct {
*
* fn constructor(global: *JSC.JSGlobalObject, frame: *JSC.CallFrame) bun.JSError!*SocketAddress {
* // do stuff
* return bun.new(NativeClass, .{
* // ...
* });
* }
*
* fn finalize(this: *NativeClass) void {
* // free allocations owned by this class, then free the struct itself.
* bun.destroy(this);
* }
* };
* ```
* Example with ref counting:
* ```
* pub const RefCountedNativeClass = struct {
* const RefCount = bun.ptr.RefCount(@This(), "ref_count", deinit, .{});
* pub const ref = RefCount.ref;
* pub const deref = RefCount.deref;
*
* fn constructor(global: *JSC.JSGlobalObject, frame: *JSC.CallFrame) bun.JSError!*SocketAddress {
* // do stuff
* return bun.new(NativeClass, .{
* // ...
* });
* }
*
* fn deinit(this: *NativeClass) void {
* // free allocations owned by this class, then free the struct itself.
* bun.destroy(this);
* }
*
* pub const finalize = deref; // GC will deref, which can free if no references are left.
* };
* ```
* @todo remove this and require all classes to implement `finalize`.
*/
finalize?: boolean;
overridesToJS?: boolean;
/**
* Static properties and methods.
*/
klass: Record<string, Field>;
/**
* properties and methods on the prototype.
*/
proto: Record<string, Field>;
/**
* Properties and methods attached to the instance itself.
*/
own: Record<string, string>;
values?: string[];
/**
* When true, the class will accept a MarkedArgumentBuffer* to create a
* WTF::FixedVector<JSC::Unknown> jsvalueArray member that will be visited by GC.
*/
valuesArray?: boolean;
/**
* Set this to `"0b11101110"`.
*/
JSType?: string;
noConstructor?: boolean;
final?: boolean;
/**
* Class has an `estimatedSize` function that reports external allocations to GC.
* Called from any thread.
*
* When `true`, classes should have a method with this signature:
* ```zig
* pub fn estimatedSize(this: *@This()) usize;
* ```
*
* Report `@sizeOf(@this())` as well as any external allocations.
*/
estimatedSize?: boolean;
/**
* Used in heap snapshots.
*
* If true, the class will have a `memoryCost` method that returns the size of the object in bytes.
*
* Unlike estimatedSize, this is always called on the main thread and not used for GC.
*
* If none is provided, we use the struct size.
*/
memoryCost?: boolean;
hasPendingActivity?: boolean;
isEventEmitter?: boolean;
supportsObjectCreate?: boolean;
getInternalProperties?: boolean;
custom?: Record<string, CustomField>;
configurable?: boolean;
enumerable?: boolean;
structuredClone?: { transferable: boolean; tag: number; storable: boolean };
inspectCustom?: boolean;
callbacks?: Record<string, string>;
constructor(options: Partial<ClassDefinition>) {
this.name = options.name ?? "";
this.klass = options.klass ?? {};
this.proto = options.proto ?? {};
this.own = options.own ?? {};
Object.assign(this, options);
}
hasOwnProperties() {
for (const key in this.own) {
return true;
}
return false;
}
}
export interface CustomField {
header?: string;
extraHeaderIncludes?: string[];
impl?: string;
type?: string;
}
/**
* Define a native class written in ZIg. Bun's codegen step will create CPP wrappers
* for interacting with JSC.
*/
export function define(
{
klass = {},
proto = {},
own = {},
values = [],
overridesToJS = false,
estimatedSize = false,
call = false,
construct = false,
structuredClone,
inspectCustom = false,
...rest
} = {} as Partial<ClassDefinition>,
): ClassDefinition {
if (inspectCustom) {
proto.inspectCustom = {
fn: "inspectCustom",
length: 2,
publicSymbol: "inspectCustom",
name: "[nodejs.util.inspect.custom]",
};
}
return new ClassDefinition({
...rest,
call,
overridesToJS,
construct,
estimatedSize,
structuredClone,
values,
own: own || {},
klass: Object.fromEntries(
Object.entries(klass)
.sort(([a], [b]) => a.localeCompare(b))
.map(([k, v]) => {
v["DOMJIT"] = undefined;
return [k, v];
}),
),
proto: Object.fromEntries(
Object.entries(proto)
.sort(([a], [b]) => a.localeCompare(b))
.map(([k, v]) => {
v["DOMJIT"] = undefined;
return [k, v];
}),
),
});
}