diff --git a/cmake/targets/BuildBun.cmake b/cmake/targets/BuildBun.cmake index a1edfe216e..b20820bdc9 100644 --- a/cmake/targets/BuildBun.cmake +++ b/cmake/targets/BuildBun.cmake @@ -902,6 +902,19 @@ target_include_directories(${bun} PRIVATE ${NODEJS_HEADERS_PATH}/include/node ) +# --- Python --- +set(PYTHON_ROOT /Users/dylan/code/bun/vendor/cpython/install) +set(PYTHON_VERSION_MAJOR 3) +set(PYTHON_VERSION_MINOR 13) +set(PYTHON_VERSION "${PYTHON_VERSION_MAJOR}.${PYTHON_VERSION_MINOR}") +target_include_directories(${bun} PRIVATE + ${PYTHON_ROOT}/include/python${PYTHON_VERSION} +) +# Pass PYTHON_ROOT to C++ so BunPython.cpp can use it for runtime paths +target_compile_definitions(${bun} PRIVATE + PYTHON_ROOT="${PYTHON_ROOT}" +) + if(NOT WIN32) target_include_directories(${bun} PRIVATE ${CWD}/src/bun.js/bindings/libuv) endif() @@ -1314,6 +1327,19 @@ if(APPLE) target_compile_definitions(${bun} PRIVATE U_DISABLE_RENAMING=1) endif() +# --- Python --- +# Link against shared Python library so extension modules can find symbols +if(APPLE) + target_link_libraries(${bun} PRIVATE + "${PYTHON_ROOT}/lib/libpython${PYTHON_VERSION}.dylib" + "-framework CoreFoundation" + ) +else() + target_link_libraries(${bun} PRIVATE + "${PYTHON_ROOT}/lib/libpython${PYTHON_VERSION}.so" + ) +endif() + if(USE_STATIC_SQLITE) target_compile_definitions(${bun} PRIVATE LAZY_LOAD_SQLITE=0) else() diff --git a/src/api/schema.zig b/src/api/schema.zig index a748f5c518..7dfbeff953 100644 --- a/src/api/schema.zig +++ b/src/api/schema.zig @@ -345,6 +345,7 @@ pub const api = struct { yaml = 19, json5 = 20, md = 21, + py = 22, _, pub fn jsonStringify(self: @This(), writer: anytype) !void { diff --git a/src/bake/DevServer/DirectoryWatchStore.zig b/src/bake/DevServer/DirectoryWatchStore.zig index 67a4dcb688..dd170a4d5b 100644 --- a/src/bake/DevServer/DirectoryWatchStore.zig +++ b/src/bake/DevServer/DirectoryWatchStore.zig @@ -58,6 +58,7 @@ pub fn trackResolutionFailure(store: *DirectoryWatchStore, import_source: []cons .sqlite, .sqlite_embedded, .md, + .py, => bun.debugAssert(false), } diff --git a/src/bun.js/HardcodedModule.zig b/src/bun.js/HardcodedModule.zig index 84f6ff9968..9aaf1bd56a 100644 --- a/src/bun.js/HardcodedModule.zig +++ b/src/bun.js/HardcodedModule.zig @@ -370,6 +370,78 @@ pub const HardcodedModule = enum { .{ "bun:internal-for-testing", .{ .path = "bun:internal-for-testing" } }, .{ "ffi", .{ .path = "bun:ffi" } }, + // Python builtin modules + entry("python:this"), + entry("python:builtins"), + entry("python:pathlib"), + entry("python:os"), + entry("python:json"), + entry("python:sys"), + entry("python:re"), + entry("python:math"), + entry("python:datetime"), + entry("python:collections"), + entry("python:itertools"), + entry("python:functools"), + entry("python:random"), + entry("python:hashlib"), + entry("python:base64"), + entry("python:urllib"), + entry("python:http"), + entry("python:io"), + entry("python:struct"), + entry("python:copy"), + entry("python:pickle"), + entry("python:csv"), + entry("python:sqlite3"), + entry("python:subprocess"), + entry("python:threading"), + entry("python:multiprocessing"), + entry("python:asyncio"), + entry("python:typing"), + entry("python:dataclasses"), + entry("python:enum"), + entry("python:abc"), + entry("python:contextlib"), + entry("python:logging"), + entry("python:argparse"), + entry("python:shutil"), + entry("python:glob"), + entry("python:fnmatch"), + entry("python:tempfile"), + entry("python:gzip"), + entry("python:zipfile"), + entry("python:tarfile"), + entry("python:uuid"), + entry("python:socket"), + entry("python:ssl"), + entry("python:email"), + entry("python:html"), + entry("python:xml"), + entry("python:configparser"), + entry("python:inspect"), + entry("python:traceback"), + entry("python:warnings"), + entry("python:time"), + entry("python:calendar"), + entry("python:string"), + entry("python:textwrap"), + entry("python:difflib"), + entry("python:pprint"), + entry("python:statistics"), + entry("python:decimal"), + entry("python:fractions"), + entry("python:operator"), + entry("python:heapq"), + entry("python:bisect"), + entry("python:array"), + entry("python:weakref"), + entry("python:types"), + entry("python:codecs"), + entry("python:unicodedata"), + entry("python:secrets"), + entry("python:hmac"), + // Thirdparty packages we override .{ "@vercel/fetch", .{ .path = "@vercel/fetch" } }, .{ "isomorphic-fetch", .{ .path = "isomorphic-fetch" } }, diff --git a/src/bun.js/ModuleLoader.zig b/src/bun.js/ModuleLoader.zig index 57e8497ad9..846b06799c 100644 --- a/src/bun.js/ModuleLoader.zig +++ b/src/bun.js/ModuleLoader.zig @@ -735,6 +735,18 @@ pub fn transpileSourceCode( }; }, + .py => { + // Return the file path with .python tag - C++ will run Python + // and create JSPyObject wrappers for exports + return ResolvedSource{ + .allocator = null, + .source_code = bun.String.cloneUTF8(path.text), + .specifier = input_specifier, + .source_url = input_specifier.createIfDifferent(path.text), + .tag = .python, + }; + }, + else => { if (flags.disableTranspiling()) { return ResolvedSource{ @@ -828,17 +840,32 @@ pub export fn Bun__resolveAndFetchBuiltinModule( var log = logger.Log.init(jsc_vm.transpiler.allocator); defer log.deinit(); - const alias = HardcodedModule.Alias.bun_aliases.getWithEql(specifier.*, bun.String.eqlComptime) orelse - return false; - const hardcoded = HardcodedModule.map.get(alias.path) orelse { - bun.debugAssert(false); - return false; - }; - ret.* = .ok( - getHardcodedModule(jsc_vm, specifier.*, hardcoded) orelse - return false, - ); - return true; + // Check hardcoded aliases first + if (HardcodedModule.Alias.bun_aliases.getWithEql(specifier.*, bun.String.eqlComptime)) |alias| { + const hardcoded = HardcodedModule.map.get(alias.path) orelse { + bun.debugAssert(false); + return false; + }; + ret.* = .ok( + getHardcodedModule(jsc_vm, specifier.*, hardcoded) orelse + return false, + ); + return true; + } + + // Handle any python: prefixed module (for submodule imports like python:matplotlib.pyplot) + if (specifier.hasPrefixComptime("python:")) { + ret.* = .ok(.{ + .allocator = null, + .source_code = specifier.dupeRef(), + .specifier = specifier.dupeRef(), + .source_url = specifier.dupeRef(), + .tag = .python_builtin, + }); + return true; + } + + return false; } pub export fn Bun__fetchBuiltinModule( @@ -1222,6 +1249,43 @@ pub fn fetchBuiltinModule(jsc_vm: *VirtualMachine, specifier: bun.String) !?Reso } } + // Handle python: prefix for Python builtin modules + if (specifier.hasPrefixComptime("python:")) { + // Pass the full specifier (python:pathlib) - C++ will strip the prefix + return .{ + .allocator = null, + .source_code = specifier.dupeRef(), + .specifier = specifier.dupeRef(), + .source_url = specifier.dupeRef(), + .tag = .python_builtin, + }; + } + + // Check if this is a Python package in .venv/lib/python{version}/site-packages/ + // This allows `import numpy from "numpy"` to work for installed Python packages + const specifier_utf8 = specifier.toUTF8(bun.default_allocator); + defer specifier_utf8.deinit(); + const spec_slice = specifier_utf8.slice(); + + // Only check for bare specifiers (not paths) + if (spec_slice.len > 0 and spec_slice[0] != '.' and spec_slice[0] != '/') { + // Check if package exists in .venv/lib/python{version}/site-packages/ + var path_buf: bun.PathBuffer = undefined; + const venv_path = std.fmt.bufPrint(&path_buf, pypi.venv_site_packages ++ "/{s}", .{spec_slice}) catch return null; + + // Check if directory exists (Python package) or .py file exists + if (bun.sys.directoryExistsAt(bun.FD.cwd(), venv_path).unwrap() catch false) { + // Return as python_builtin - the module loader will import it via Python + return .{ + .allocator = null, + .source_code = specifier.dupeRef(), + .specifier = specifier.dupeRef(), + .source_url = specifier.dupeRef(), + .tag = .python_builtin, + }; + } + } + return null; } @@ -1363,6 +1427,7 @@ const dumpSourceString = @import("./RuntimeTranspilerStore.zig").dumpSourceStrin const setBreakPointOnFirstLine = @import("./RuntimeTranspilerStore.zig").setBreakPointOnFirstLine; const bun = @import("bun"); +const pypi = bun.install.PyPI; const Environment = bun.Environment; const MutableString = bun.MutableString; const Output = bun.Output; diff --git a/src/bun.js/VirtualMachine.zig b/src/bun.js/VirtualMachine.zig index 8d70f6db23..675244352d 100644 --- a/src/bun.js/VirtualMachine.zig +++ b/src/bun.js/VirtualMachine.zig @@ -1808,6 +1808,12 @@ pub fn resolveMaybeNeedsTrailingSlash( return; } + // Handle any python: prefixed module (allows submodule imports like python:matplotlib.pyplot) + if (bun.strings.hasPrefixComptime(specifier_utf8.slice(), "python:")) { + res.* = ErrorableString.ok(specifier); + return; + } + const old_log = jsc_vm.log; // the logger can end up being called on another thread, it must not use threadlocal Heap Allocator var log = logger.Log.init(bun.default_allocator); @@ -1821,6 +1827,58 @@ pub fn resolveMaybeNeedsTrailingSlash( jsc_vm.transpiler.resolver.log = old_log; } jsc_vm._resolve(&result, specifier_utf8.slice(), normalizeSource(source_utf8.slice()), is_esm, is_a_file_path) catch |err_| { + // Check if this is a Python package in .venv (fallback after node_modules) + // Only check for bare specifiers (not paths) + const spec_slice = specifier_utf8.slice(); + if (spec_slice.len > 0 and spec_slice[0] != '.' and spec_slice[0] != '/') { + // Handle submodule imports like "matplotlib/pyplot" -> "python:matplotlib.pyplot" + // Extract the base package name (before any /) + const slash_idx = bun.strings.indexOfChar(spec_slice, '/'); + const base_package = if (slash_idx) |idx| spec_slice[0..idx] else spec_slice; + + // Check if package exists in .venv/lib/python{version}/site-packages/ + // Normalize package name: Python uses underscores in module names, pip uses hyphens + var normalized_name_buf: [256]u8 = undefined; + var normalized_name = normalized_name_buf[0..@min(base_package.len, normalized_name_buf.len)]; + for (base_package, 0..) |c, i| { + if (i >= normalized_name.len) break; + normalized_name[i] = if (c == '-') '_' else c; + } + + var path_buf: bun.PathBuffer = undefined; + if (std.fmt.bufPrint(&path_buf, pypi.venv_site_packages ++ "/{s}", .{normalized_name})) |venv_path| { + // Check if directory exists (Python package directory) + const is_dir = bun.sys.directoryExistsAt(bun.FD.cwd(), venv_path).unwrap() catch false; + + // Also check for single-file packages like typing_extensions.py, six.py + var py_path_buf: bun.PathBuffer = undefined; + const py_path = std.fmt.bufPrint(&py_path_buf, pypi.venv_site_packages ++ "/{s}.py", .{normalized_name}) catch null; + const is_py_file = if (py_path) |p| brk: { + break :brk switch (bun.sys.existsAtType(bun.FD.cwd(), p)) { + .result => |t| t == .file, + .err => false, + }; + } else false; + + if (is_dir or is_py_file) { + // Add python: prefix so fetchBuiltinModule handles it + // Normalize hyphens to underscores for Python module names + // Keep slashes - BunPython.cpp will convert them to dots + var module_buf: [512]u8 = undefined; + var module_name = std.ArrayList(u8).initBuffer(&module_buf); + module_name.appendSliceAssumeCapacity("python:"); + + // Append normalized spec_slice (hyphens -> underscores) + for (spec_slice) |c| { + module_name.appendAssumeCapacity(if (c == '-') '_' else c); + } + + res.* = ErrorableString.ok(bun.String.createAtomASCII(module_name.items)); + return; + } + } else |_| {} + } + var err = err_; const msg: logger.Msg = brk: { const msgs: []logger.Msg = log.msgs.items; @@ -3784,3 +3842,5 @@ const ServerEntryPoint = bun.transpiler.EntryPoints.ServerEntryPoint; const webcore = bun.webcore; const Body = webcore.Body; + +const pypi = @import("../install/pypi.zig"); diff --git a/src/bun.js/bindings/BunPython.cpp b/src/bun.js/bindings/BunPython.cpp new file mode 100644 index 0000000000..58865f8a70 --- /dev/null +++ b/src/bun.js/bindings/BunPython.cpp @@ -0,0 +1,1774 @@ +#include "BunPython.h" +#include "JSPyObject.h" +#include "PyJSValueObject.h" +#include "ZigGlobalObject.h" +#include "BunClientData.h" +#include "InternalModuleRegistry.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern "C" void Bun__atexit(void (*callback)()); +extern "C" JSC::EncodedJSValue Bun__Process__getCwd(JSC::JSGlobalObject* globalObject); + +// Zig timer functions +extern "C" JSC::EncodedJSValue Bun__Timer__setTimeout(JSC::JSGlobalObject* globalThis, JSC::EncodedJSValue callback, JSC::EncodedJSValue arguments, JSC::EncodedJSValue countdown); +extern "C" JSC::EncodedJSValue Bun__Timer__setImmediate(JSC::JSGlobalObject* globalThis, JSC::EncodedJSValue callback, JSC::EncodedJSValue arguments); + +namespace Bun::Python { + +using namespace JSC; + +// ============================================================================= +// Python Callback Management for Event Loop Integration +// ============================================================================= + +static bool g_bunEventLoopInitialized = false; +static PyObject* g_bunEventLoop = nullptr; + +// Forward declarations +static JSGlobalObject* getThreadJSGlobal(); +static void setThreadJSGlobal(JSGlobalObject* global); + +// Get monotonic time in seconds (for Python asyncio) +static double getMonotonicTimeSeconds() +{ + static mach_timebase_info_data_t timebaseInfo; + static bool timebaseInitialized = false; + + if (!timebaseInitialized) { + mach_timebase_info(&timebaseInfo); + timebaseInitialized = true; + } + + uint64_t machTime = mach_absolute_time(); + uint64_t nanos = machTime * timebaseInfo.numer / timebaseInfo.denom; + return static_cast(nanos) / 1e9; +} + +// Helper to get or create JSPyObject structure +static Structure* getJSPyObjectStructure(JSGlobalObject* globalObject) +{ + auto* zigGlobalObject = jsCast(globalObject); + VM& vm = globalObject->vm(); + + Structure* structure = zigGlobalObject->m_JSPyObjectStructure.get(); + if (!structure) { + structure = JSPyObject::createStructure(vm, globalObject, globalObject->objectPrototype()); + zigGlobalObject->m_JSPyObjectStructure.set(vm, zigGlobalObject, structure); + } + return structure; +} + +// Python C function: _bun._schedule_timer(delay_ms, callable) -> timer_id +static PyObject* bun_schedule_timer(PyObject* self, PyObject* args) +{ + double delay_ms; + PyObject* callable; + + if (!PyArg_ParseTuple(args, "dO", &delay_ms, &callable)) { + return nullptr; + } + + if (!PyCallable_Check(callable)) { + PyErr_SetString(PyExc_TypeError, "callback must be callable"); + return nullptr; + } + + JSGlobalObject* globalObject = getThreadJSGlobal(); + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "No JavaScript context available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + Structure* structure = getJSPyObjectStructure(globalObject); + + // Wrap Python callable in JSPyObject - this handles reference counting + JSPyObject* jsCallable = JSPyObject::create(vm, globalObject, structure, callable); + + // Call setTimeout with the JSPyObject as callback + JSValue result = JSValue::decode(Bun__Timer__setTimeout( + globalObject, + JSValue::encode(jsCallable), + JSValue::encode(jsUndefined()), // no arguments needed + JSValue::encode(jsNumber(delay_ms)))); + + if (result.isEmpty()) { + PyErr_SetString(PyExc_RuntimeError, "Failed to schedule timer"); + return nullptr; + } + + // Return 0 for now - cancellation is handled by Python's _cancelled flag + // The timer object is kept alive by JSC until it fires + return PyLong_FromLong(0); +} + +// Python C function: _bun._schedule_soon(callable) -> timer_id +static PyObject* bun_schedule_soon(PyObject* self, PyObject* args) +{ + PyObject* callable; + + if (!PyArg_ParseTuple(args, "O", &callable)) { + return nullptr; + } + + if (!PyCallable_Check(callable)) { + PyErr_SetString(PyExc_TypeError, "callback must be callable"); + return nullptr; + } + + JSGlobalObject* globalObject = getThreadJSGlobal(); + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "No JavaScript context available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + Structure* structure = getJSPyObjectStructure(globalObject); + + // Wrap Python callable in JSPyObject - this handles reference counting + JSPyObject* jsCallable = JSPyObject::create(vm, globalObject, structure, callable); + + // Call setImmediate with the JSPyObject as callback + JSValue result = JSValue::decode(Bun__Timer__setImmediate( + globalObject, + JSValue::encode(jsCallable), + JSValue::encode(jsUndefined()) // no arguments needed + )); + + if (result.isEmpty()) { + PyErr_SetString(PyExc_RuntimeError, "Failed to schedule immediate"); + return nullptr; + } + + // Return 0 for now - cancellation is handled by Python's _cancelled flag + return PyLong_FromLong(0); +} + +// Python C function: _bun._time() -> float (seconds) +static PyObject* bun_time(PyObject* self, PyObject* args) +{ + return PyFloat_FromDouble(getMonotonicTimeSeconds()); +} + +// Python C function: _bun._cancel_handle(timer_id) -> None +// Note: Currently a no-op - cancellation is handled by Python's _cancelled flag +// TODO: Implement proper timer cancellation by storing timer objects +static PyObject* bun_cancel_handle(PyObject* self, PyObject* args) +{ + // Cancellation is handled by Python's BunHandle._cancelled flag + // which prevents the callback from executing when _run() is called. + // The timer will still fire, but the callback will be a no-op. + Py_RETURN_NONE; +} + +// Extern declaration for Bun's event loop tick +extern "C" void Bun__drainMicrotasks(); + +// Python C function: _bun._tick() -> None +// Runs one iteration of Bun's event loop (processes I/O, timers, microtasks) +static PyObject* bun_tick(PyObject* self, PyObject* args) +{ + Bun__drainMicrotasks(); + Py_RETURN_NONE; +} + +// ============================================================================= +// BunEventLoop Python Class Definition +// ============================================================================= + +static const char* bunEventLoopCode = R"( +import asyncio +import asyncio.events as events +import asyncio.futures as futures +import asyncio.tasks as tasks +import contextvars + +class BunHandle: + __slots__ = ('_callback', '_args', '_cancelled', '_loop', '_context', '_handle_id') + + def __init__(self, callback, args, loop, context=None): + self._loop = loop + self._callback = callback + self._args = args + self._cancelled = False + self._context = context if context is not None else contextvars.copy_context() + self._handle_id = None + + def cancel(self): + if not self._cancelled: + self._cancelled = True + if self._handle_id is not None: + import _bun + _bun._cancel_handle(self._handle_id) + self._callback = None + self._args = None + + def cancelled(self): + return self._cancelled + + def _run(self): + if self._cancelled: + return + # Mark as cancelled to prevent double-execution + # (callbacks may be scheduled on both Bun's queue and our queue) + self._cancelled = True + try: + self._context.run(self._callback, *self._args) + except (SystemExit, KeyboardInterrupt): + raise + except BaseException as exc: + self._loop.call_exception_handler({ + 'message': f'Exception in callback {self._callback!r}', + 'exception': exc, + 'handle': self, + }) + + +class BunTimerHandle(BunHandle): + __slots__ = ('_when', '_scheduled') + + def __init__(self, when, callback, args, loop, context=None): + super().__init__(callback, args, loop, context) + self._when = when + self._scheduled = True + + def when(self): + return self._when + + def cancel(self): + if not self._cancelled: + self._loop._timer_handle_cancelled(self) + super().cancel() + + +class BunEventLoop(asyncio.AbstractEventLoop): + def __init__(self): + self._closed = False + self._timer_cancelled_count = 0 + self._debug = False + self._ready = [] # Queue of callbacks to run + self._scheduled = [] # Heap of timer handles + + def time(self): + import _bun + return _bun._time() + + def call_later(self, delay, callback, *args, context=None): + if delay < 0: + delay = 0 + when = self.time() + delay + return self.call_at(when, callback, *args, context=context) + + def call_at(self, when, callback, *args, context=None): + import _bun + import heapq + handle = BunTimerHandle(when, callback, args, self, context) + delay = max(0, when - self.time()) + # Use Bun's timer so it fires on Bun's event loop + handle._handle_id = _bun._schedule_timer(delay * 1000, handle._run) + # Also add to scheduled heap for Python-driven loop + heapq.heappush(self._scheduled, (when, handle)) + return handle + + def call_soon(self, callback, *args, context=None): + import _bun + handle = BunHandle(callback, args, self, context) + # Use Bun's setImmediate so callbacks run on Bun's event loop + # This is important for JS->Python async where Bun's loop is driving + handle._handle_id = _bun._schedule_soon(handle._run) + # Also add to ready queue for Python->JS async where our loop is driving + self._ready.append(handle) + return handle + + def call_soon_threadsafe(self, callback, *args, context=None): + return self.call_soon(callback, *args, context=context) + + def _run_once(self): + import _bun + import heapq + + # Tick Bun's event loop first - this processes I/O and setImmediate callbacks + # which may include our call_soon callbacks + _bun._tick() + + # Process any remaining ready callbacks that weren't run by Bun + # (BunHandle._run checks _cancelled to avoid double-execution) + ready = self._ready + self._ready = [] + for handle in ready: + if not handle._cancelled: + handle._run() + + # Check for timers that are ready + now = self.time() + while self._scheduled and self._scheduled[0][0] <= now: + when, handle = heapq.heappop(self._scheduled) + if not handle._cancelled: + handle._run() + + def create_future(self): + return futures.Future(loop=self) + + def create_task(self, coro, *, name=None, context=None): + return tasks.Task(coro, loop=self, name=name, context=context) + + def is_running(self): + return True + + def is_closed(self): + return self._closed + + def close(self): + self._closed = True + + def get_debug(self): + return self._debug + + def set_debug(self, enabled): + self._debug = enabled + + def _timer_handle_cancelled(self, handle): + self._timer_cancelled_count += 1 + + def call_exception_handler(self, context): + message = context.get('message', 'Unhandled exception in event loop') + exception = context.get('exception') + if exception: + import traceback + print(f"{message}: {exception}") + traceback.print_exception(type(exception), exception, exception.__traceback__) + else: + print(message) + + def run_forever(self): + while not self._closed: + self._run_once() + + def run_until_complete(self, future): + import asyncio + + # Convert coroutine to future if needed + future = asyncio.ensure_future(future, loop=self) + + # Set this loop as the running loop + events._set_running_loop(self) + + try: + # Run until the future is done + while not future.done(): + self._run_once() + + # Return the result or raise exception + return future.result() + finally: + events._set_running_loop(None) + + def stop(self): + self._closed = True + + async def shutdown_asyncgens(self): + pass + + async def shutdown_default_executor(self, timeout=None): + pass + + +# Singleton event loop instance +_bun_loop = None + +def get_bun_loop(): + global _bun_loop + if _bun_loop is None: + _bun_loop = BunEventLoop() + return _bun_loop + +def set_bun_loop_running(): + loop = get_bun_loop() + events._set_running_loop(loop) + return loop + + +class BunEventLoopPolicy(asyncio.AbstractEventLoopPolicy): + """Event loop policy that uses BunEventLoop for all operations.""" + + def __init__(self): + self._local = None + + def get_event_loop(self): + return get_bun_loop() + + def set_event_loop(self, loop): + pass # We always use the singleton BunEventLoop + + def new_event_loop(self): + return get_bun_loop() + + +# Install our event loop policy so asyncio.run() uses BunEventLoop +asyncio.set_event_loop_policy(BunEventLoopPolicy()) +)"; + +// ============================================================================= +// PyPromiseResolver - Python type to resolve JS Promises when Tasks complete +// ============================================================================= + +struct PyPromiseResolver { + PyObject_HEAD + JSC::Strong + promise; + JSGlobalObject* globalObject; +}; + +static void PyPromiseResolver_dealloc(PyPromiseResolver* self) +{ + self->promise.clear(); + Py_TYPE(self)->tp_free(reinterpret_cast(self)); +} + +static PyObject* PyPromiseResolver_call(PyPromiseResolver* self, PyObject* args, PyObject* kwargs) +{ + PyObject* task; + if (!PyArg_ParseTuple(args, "O", &task)) { + return nullptr; + } + + JSPromise* promise = self->promise.get(); + if (!promise) { + // Promise was garbage collected + Py_RETURN_NONE; + } + + JSGlobalObject* globalObject = self->globalObject; + VM& vm = globalObject->vm(); + + // Check if task was cancelled + PyObject* cancelledMethod = PyObject_GetAttrString(task, "cancelled"); + if (cancelledMethod) { + PyObject* cancelled = PyObject_CallNoArgs(cancelledMethod); + Py_DECREF(cancelledMethod); + if (cancelled && PyObject_IsTrue(cancelled)) { + Py_DECREF(cancelled); + promise->reject(vm, globalObject, JSC::createError(globalObject, "Task was cancelled"_s)); + Py_RETURN_NONE; + } + Py_XDECREF(cancelled); + } + PyErr_Clear(); + + // Check for exception + PyObject* exceptionMethod = PyObject_GetAttrString(task, "exception"); + if (exceptionMethod) { + PyObject* exception = PyObject_CallNoArgs(exceptionMethod); + Py_DECREF(exceptionMethod); + if (exception && exception != Py_None) { + PyObject* excStr = PyObject_Str(exception); + const char* excCStr = excStr ? PyUnicode_AsUTF8(excStr) : "Unknown error"; + promise->reject(vm, globalObject, JSC::createError(globalObject, String::fromUTF8(excCStr))); + Py_XDECREF(excStr); + Py_DECREF(exception); + Py_RETURN_NONE; + } + Py_XDECREF(exception); + } + PyErr_Clear(); + + // Get result + PyObject* resultMethod = PyObject_GetAttrString(task, "result"); + if (!resultMethod) { + PyErr_Clear(); + promise->reject(vm, globalObject, JSC::createError(globalObject, "Failed to get task result"_s)); + Py_RETURN_NONE; + } + + PyObject* result = PyObject_CallNoArgs(resultMethod); + Py_DECREF(resultMethod); + + if (!result) { + PyErr_Clear(); + promise->reject(vm, globalObject, JSC::createError(globalObject, "Task result raised exception"_s)); + Py_RETURN_NONE; + } + + // Convert result to JS and resolve + JSValue jsResult = toJS(globalObject, result); + Py_DECREF(result); + + promise->resolve(globalObject, jsResult); + Py_RETURN_NONE; +} + +static PyTypeObject PyPromiseResolverType = { + .ob_base = PyVarObject_HEAD_INIT(nullptr, 0) + .tp_name + = "_bun.PromiseResolver", + .tp_basicsize = sizeof(PyPromiseResolver), + .tp_itemsize = 0, + .tp_dealloc = reinterpret_cast(PyPromiseResolver_dealloc), + .tp_call = reinterpret_cast(PyPromiseResolver_call), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = "Resolves a JavaScript Promise when a Python Task completes", +}; + +static PyPromiseResolver* createPromiseResolver(JSGlobalObject* globalObject, JSPromise* promise) +{ + PyPromiseResolver* resolver = PyObject_New(PyPromiseResolver, &PyPromiseResolverType); + if (!resolver) { + return nullptr; + } + + new (&resolver->promise) JSC::Strong(globalObject->vm(), promise); + resolver->globalObject = globalObject; + return resolver; +} + +// ============================================================================= +// Coroutine to Promise Conversion (used internally by toJS) +// ============================================================================= + +// Forward declaration +static void ensureBunEventLoopRunning(); + +static JSValue coroutineToPromise(JSGlobalObject* globalObject, PyObject* coro) +{ + VM& vm = globalObject->vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + // Ensure BunEventLoop is running + ensureBunEventLoopRunning(); + + // Import asyncio + PyObject* asyncio = PyImport_ImportModule("asyncio"); + if (!asyncio) { + PyErr_Print(); + throwTypeError(globalObject, scope, "Failed to import asyncio"_s); + return {}; + } + + // Get the running loop + PyObject* getRunningLoop = PyObject_GetAttrString(asyncio, "get_running_loop"); + if (!getRunningLoop) { + Py_DECREF(asyncio); + PyErr_Print(); + throwTypeError(globalObject, scope, "Failed to get get_running_loop"_s); + return {}; + } + + PyObject* loop = PyObject_CallNoArgs(getRunningLoop); + Py_DECREF(getRunningLoop); + + if (!loop) { + Py_DECREF(asyncio); + PyErr_Print(); + throwTypeError(globalObject, scope, "No running event loop"_s); + return {}; + } + + // Create a Task: loop.create_task(coro) + PyObject* createTask = PyObject_GetAttrString(loop, "create_task"); + if (!createTask) { + Py_DECREF(loop); + Py_DECREF(asyncio); + PyErr_Print(); + throwTypeError(globalObject, scope, "Failed to get create_task"_s); + return {}; + } + + PyObject* task = PyObject_CallOneArg(createTask, coro); + Py_DECREF(createTask); + + if (!task) { + Py_DECREF(loop); + Py_DECREF(asyncio); + PyErr_Print(); + throwTypeError(globalObject, scope, "Failed to create task"_s); + return {}; + } + + // Create JavaScript Promise + JSPromise* promise = JSPromise::create(vm, globalObject->promiseStructure()); + + // Create the resolver callback + PyPromiseResolver* resolver = createPromiseResolver(globalObject, promise); + if (!resolver) { + Py_DECREF(task); + Py_DECREF(loop); + Py_DECREF(asyncio); + throwOutOfMemoryError(globalObject, scope); + return {}; + } + + // Add done callback: task.add_done_callback(resolver) + PyObject* addDoneCallback = PyObject_GetAttrString(task, "add_done_callback"); + if (!addDoneCallback) { + Py_DECREF(reinterpret_cast(resolver)); + Py_DECREF(task); + Py_DECREF(loop); + Py_DECREF(asyncio); + PyErr_Print(); + throwTypeError(globalObject, scope, "Failed to get add_done_callback"_s); + return {}; + } + + PyObject* callbackResult = PyObject_CallOneArg(addDoneCallback, reinterpret_cast(resolver)); + Py_DECREF(addDoneCallback); + Py_DECREF(reinterpret_cast(resolver)); + + if (!callbackResult) { + Py_DECREF(task); + Py_DECREF(loop); + Py_DECREF(asyncio); + PyErr_Print(); + throwTypeError(globalObject, scope, "Failed to add done callback"_s); + return {}; + } + Py_DECREF(callbackResult); + + Py_DECREF(task); + Py_DECREF(loop); + Py_DECREF(asyncio); + + return promise; +} + +static void ensureBunEventLoopRunning() +{ + if (g_bunEventLoopInitialized && g_bunEventLoop) { + // Already set up, but make sure it's still the running loop + PyObject* asyncioEvents = PyImport_ImportModule("asyncio.events"); + if (asyncioEvents) { + PyObject* setRunningLoop = PyObject_GetAttrString(asyncioEvents, "_set_running_loop"); + if (setRunningLoop) { + PyObject* result = PyObject_CallOneArg(setRunningLoop, g_bunEventLoop); + Py_XDECREF(result); + Py_DECREF(setRunningLoop); + } + Py_DECREF(asyncioEvents); + } + PyErr_Clear(); + return; + } + + // Get the _bun_event_loop module from __main__ + PyObject* mainModule = PyImport_AddModule("__main__"); + if (!mainModule) { + PyErr_Print(); + return; + } + + PyObject* mainDict = PyModule_GetDict(mainModule); + + // Call set_bun_loop_running() + PyObject* setBunLoopRunning = PyDict_GetItemString(mainDict, "set_bun_loop_running"); + if (!setBunLoopRunning) { + PyErr_Print(); + return; + } + + PyObject* loop = PyObject_CallNoArgs(setBunLoopRunning); + if (!loop) { + PyErr_Print(); + return; + } + + g_bunEventLoop = loop; // Keep a reference + g_bunEventLoopInitialized = true; +} + +// Convert PyObject to JSValue - may return JSPyObject for complex types +JSValue toJS(JSGlobalObject* globalObject, PyObject* obj) +{ + if (!obj || obj == Py_None) { + return jsNull(); + } + + // Check if this is a wrapped JSValue (PyJSValueObject, PyJSDictObject, PyJSListObject) + // If so, unwrap it directly instead of wrapping in JSPyObject + JSValue unwrapped = tryUnwrapJSValue(obj); + if (unwrapped) { + return unwrapped; + } + + // Primitive types get converted directly + if (PyBool_Check(obj)) { + return jsBoolean(obj == Py_True); + } + + if (PyLong_Check(obj)) { + // Check if it fits in a safe integer range + int overflow; + long long val = PyLong_AsLongLongAndOverflow(obj, &overflow); + if (overflow == 0) { + return jsNumber(static_cast(val)); + } + // For large integers, convert to double (may lose precision) + return jsNumber(PyLong_AsDouble(obj)); + } + + if (PyFloat_Check(obj)) { + return jsNumber(PyFloat_AsDouble(obj)); + } + + VM& vm = globalObject->vm(); + + if (PyUnicode_Check(obj)) { + Py_ssize_t size; + const char* str = PyUnicode_AsUTF8AndSize(obj, &size); + if (str) { + return jsString(vm, WTF::String::fromUTF8({ str, static_cast(size) })); + } + return jsNull(); + } + + // Check for coroutines - convert to JavaScript Promise + if (PyCoro_CheckExact(obj) || PyAsyncGen_CheckExact(obj)) { + return coroutineToPromise(globalObject, obj); + } + + // For all other types (lists, dicts, objects, callables, etc.), + // wrap in JSPyObject + auto* zigGlobalObject = jsCast(globalObject); + + // Use Array.prototype for sequences (lists, tuples) so JS array methods work + Structure* structure; + if (PySequence_Check(obj) && !PyUnicode_Check(obj) && !PyBytes_Check(obj)) { + structure = zigGlobalObject->m_JSPyArrayStructure.get(); + if (!structure) { + structure = JSPyObject::createStructure(vm, globalObject, globalObject->arrayPrototype()); + zigGlobalObject->m_JSPyArrayStructure.set(vm, zigGlobalObject, structure); + } + } else { + structure = zigGlobalObject->m_JSPyObjectStructure.get(); + if (!structure) { + structure = JSPyObject::createStructure(vm, globalObject, globalObject->objectPrototype()); + zigGlobalObject->m_JSPyObjectStructure.set(vm, zigGlobalObject, structure); + } + } + + return JSPyObject::create(vm, globalObject, structure, obj); +} + +// Create a PyObject from a JSValue +// For arrays: returns PyJSListObject (list-like wrapper with shared reference) +// For objects: returns PyJSDictObject (dict-like wrapper with shared reference) +// For primitives: returns native Python types +// For functions/other: returns PyJSValueObject +PyObject* fromJS(JSGlobalObject* globalObject, JSValue value) +{ + // Convert primitives directly to Python types + if (value.isUndefined() || value.isNull()) { + Py_RETURN_NONE; + } + if (value.isBoolean()) { + if (value.asBoolean()) { + Py_RETURN_TRUE; + } + Py_RETURN_FALSE; + } + if (value.isInt32()) { + return PyLong_FromLong(value.asInt32()); + } + if (value.isNumber()) { + return PyFloat_FromDouble(value.asNumber()); + } + if (value.isString()) { + auto str = value.toWTFString(globalObject); + auto utf8 = str.utf8(); + return PyUnicode_FromStringAndSize(utf8.data(), utf8.length()); + } + + // For arrays, use PyJSListObject (wrapper with shared reference) + if (isArray(globalObject, value)) { + return reinterpret_cast(PyJSValueObject::NewList(globalObject, value)); + } + + // For Promises, use PyJSValueObject (which has am_await support) + if (jsDynamicCast(value)) { + PyJSValueObject* wrapper = PyJSValueObject::New(); + if (!wrapper) { + return nullptr; + } + wrapper->jsValue = value; + wrapper->globalObject = globalObject; + if (value.isCell()) { + gcProtect(value.asCell()); + } + return reinterpret_cast(wrapper); + } + + // For iterators/generators (objects with 'next' method), use PyJSValueObject (which has iterator support) + if (value.isObject()) { + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + JSObject* jsObj = value.getObject(); + JSValue nextMethod = jsObj->get(globalObject, Identifier::fromString(vm, "next"_s)); + if (!scope.exception() && nextMethod.isCallable()) { + // It's an iterator - wrap in PyJSValueObject for iterator protocol support + PyJSValueObject* wrapper = PyJSValueObject::New(); + if (!wrapper) { + return nullptr; + } + wrapper->jsValue = value; + wrapper->globalObject = globalObject; + if (value.isCell()) { + gcProtect(value.asCell()); + } + return reinterpret_cast(wrapper); + } + scope.clearException(); + } + + // For plain objects, use PyJSDictObject (wrapper with shared reference) + if (value.isObject() && !value.isCallable()) { + return reinterpret_cast(PyJSValueObject::NewDict(globalObject, value)); + } + + // For functions and other complex objects - wrap in PyJSValueObject + PyJSValueObject* wrapper = PyJSValueObject::New(); + if (!wrapper) { + return nullptr; + } + + wrapper->jsValue = value; + wrapper->globalObject = globalObject; + + // Protect from JavaScript GC while Python holds a reference + if (value.isCell()) { + gcProtect(value.asCell()); + } + + return reinterpret_cast(wrapper); +} + +static std::once_flag pythonInitFlag; + +static void finalizePython() +{ + if (Py_IsInitialized()) { + Py_Finalize(); + } +} + +// Forward declarations +static void registerJSImportHook(); +static void initPyJSValueType(); + +// Stringify macros for constructing paths +#define PYTHON_STRINGIFY(x) #x +#define PYTHON_TOSTRING(x) PYTHON_STRINGIFY(x) +#define PYTHON_VERSION_STRING PYTHON_TOSTRING(PY_MAJOR_VERSION) "." PYTHON_TOSTRING(PY_MINOR_VERSION) + +// Python install root - set by CMake via target_compile_definitions +// Fallback only used if CMake doesn't define it (shouldn't happen in normal builds) +#ifndef PYTHON_ROOT +#error "PYTHON_ROOT must be defined by CMake" +#endif + +void ensurePythonInitialized() +{ + std::call_once(pythonInitFlag, []() { + if (!Py_IsInitialized()) { + PyConfig config; + PyConfig_InitPythonConfig(&config); + + // Construct paths using Python version from the linked library + static const wchar_t* pythonHome = L"" PYTHON_ROOT; + static const wchar_t* stdlibDir = L"" PYTHON_ROOT "/lib/python" PYTHON_VERSION_STRING; + + PyConfig_SetString(&config, &config.home, pythonHome); + PyConfig_SetString(&config, &config.stdlib_dir, stdlibDir); + // Disable buffered stdio so Python's print() flushes immediately + config.buffered_stdio = 0; + + PyStatus status = Py_InitializeFromConfig(&config); + if (PyStatus_Exception(status)) { + PyConfig_Clear(&config); + Py_Initialize(); + } else { + PyConfig_Clear(&config); + } + + Bun__atexit(finalizePython); + + // Initialize the PyJSValueObject type for wrapping JS values in Python + PyJSValueObject::initType(); + + // Register the JS import hook so Python can import JS modules + registerJSImportHook(); + } + }); +} + +static std::once_flag venvPathFlag; + +// Add .venv/lib/python{version}/site-packages to sys.path for local Python packages +// This is called after ensurePythonInitialized() when we have access to the JSGlobalObject +void ensureVenvPathAdded(JSGlobalObject* globalObject) +{ + std::call_once(venvPathFlag, [globalObject]() { + // Get cwd using Bun's process.cwd() implementation + auto cwdValue = JSC::JSValue::decode(Bun__Process__getCwd(globalObject)); + if (!cwdValue || !cwdValue.isString()) + return; + + auto cwdString = cwdValue.toWTFString(globalObject); + auto cwdUTF8 = cwdString.utf8(); + + // Construct path: /.venv/lib/python{major}.{minor}/site-packages + // This matches where bun install puts Python packages + char venvPath[PATH_MAX]; + snprintf(venvPath, sizeof(venvPath), "%s/.venv/lib/python" PYTHON_VERSION_STRING "/site-packages", cwdUTF8.data()); + + // Check if directory exists + struct stat st; + if (stat(venvPath, &st) == 0 && S_ISDIR(st.st_mode)) { + PyObject* sysPath = PySys_GetObject("path"); + if (sysPath && PyList_Check(sysPath)) { + PyObject* dirStr = PyUnicode_FromString(venvPath); + if (dirStr) { + PyList_Insert(sysPath, 0, dirStr); + Py_DECREF(dirStr); + } + } + } + }); +} + +static const char* BUN_GLOBAL_KEY = "bun.jsglobal"; + +// Store JSGlobalObject in Python's thread state dict +static void setThreadJSGlobal(JSGlobalObject* global) +{ + PyObject* threadDict = PyThreadState_GetDict(); + if (!threadDict) + return; + + PyObject* capsule = PyCapsule_New(global, BUN_GLOBAL_KEY, nullptr); + if (capsule) { + PyDict_SetItemString(threadDict, BUN_GLOBAL_KEY, capsule); + Py_DECREF(capsule); + } +} + +// Retrieve JSGlobalObject from Python's thread state dict +static JSGlobalObject* getThreadJSGlobal() +{ + PyObject* threadDict = PyThreadState_GetDict(); + if (!threadDict) + return nullptr; + + PyObject* capsule = PyDict_GetItemString(threadDict, BUN_GLOBAL_KEY); + if (!capsule || !PyCapsule_CheckExact(capsule)) + return nullptr; + + return static_cast(PyCapsule_GetPointer(capsule, BUN_GLOBAL_KEY)); +} + +// C function callable from Python to load a JS/TS/JSX module +static PyObject* bun_load_js_module(PyObject* self, PyObject* args) +{ + const char* filePath; + + if (!PyArg_ParseTuple(args, "s", &filePath)) { + return nullptr; + } + + JSGlobalObject* globalObject = getThreadJSGlobal(); + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "No JavaScript context available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + // Create the file URL for the module + WTF::String filePathStr = WTF::String::fromUTF8(filePath); + + // Use importModule to load the ES module + auto* promise = JSC::importModule(globalObject, Identifier::fromString(vm, filePathStr), jsUndefined(), jsUndefined(), jsUndefined()); + + if (!promise) { + if (scope.exception()) { + JSValue exception = scope.exception()->value(); + scope.clearException(); + auto msg = exception.toWTFString(globalObject); + PyErr_Format(PyExc_RuntimeError, "JavaScript error: %s", msg.utf8().data()); + } else { + PyErr_Format(PyExc_RuntimeError, "Failed to import module: %s", filePath); + } + return nullptr; + } + + // Drain the microtask queue to allow the module to load + vm.drainMicrotasks(); + + auto status = promise->status(); + + if (status == JSPromise::Status::Fulfilled) { + JSValue result = promise->result(); + return Python::fromJS(globalObject, result); + } else if (status == JSPromise::Status::Rejected) { + JSValue error = promise->result(); + auto msg = error.toWTFString(globalObject); + PyErr_Format(PyExc_RuntimeError, "JavaScript error: %s", msg.utf8().data()); + return nullptr; + } else { + // Promise is still pending - this shouldn't happen for simple modules + PyErr_SetString(PyExc_RuntimeError, "Module loading is pending - async imports not yet supported"); + return nullptr; + } +} + +// Get the current working directory +static PyObject* bun_get_cwd(PyObject* self, PyObject* args) +{ + char cwd[PATH_MAX]; + if (getcwd(cwd, sizeof(cwd))) { + return PyUnicode_FromString(cwd); + } + Py_RETURN_NONE; +} + +// Python C function: _bun._load_bun_module() -> Bun module object +// This loads the JS "Bun" global object and wraps it for Python +static PyObject* bun_load_bun_module(PyObject* self, PyObject* args) +{ + JSGlobalObject* globalObject = getThreadJSGlobal(); + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "No JavaScript context available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + // Get the "Bun" object from global scope + JSValue bunObject = globalObject->get(globalObject, + Identifier::fromString(vm, "Bun"_s)); + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error accessing Bun global"); + return nullptr; + } + + if (bunObject.isUndefined() || bunObject.isNull()) { + PyErr_SetString(PyExc_RuntimeError, "Bun global not found"); + return nullptr; + } + + // Wrap the Bun object as a Python object + // This will create a JSPyObject that proxies all attribute access to the JS object + return Python::fromJS(globalObject, bunObject); +} + +// Map node module names to InternalModuleRegistry::Field enum values +static std::optional getNodeModuleField(const char* name) +{ + using Field = Bun::InternalModuleRegistry::Field; + + // Note: These mappings correspond to the generated InternalModuleRegistry+enum.h + static const std::unordered_map moduleMap = { + { "assert", Field::NodeAssert }, + { "assert/strict", Field::NodeAssertStrict }, + { "async_hooks", Field::NodeAsyncHooks }, + { "buffer", Field::NodeBuffer }, + { "child_process", Field::NodeChildProcess }, + { "cluster", Field::NodeCluster }, + { "console", Field::NodeConsole }, + { "constants", Field::NodeConstants }, + { "crypto", Field::NodeCrypto }, + { "dgram", Field::NodeDgram }, + { "diagnostics_channel", Field::NodeDiagnosticsChannel }, + { "dns", Field::NodeDNS }, + { "dns/promises", Field::NodeDNSPromises }, + { "domain", Field::NodeDomain }, + { "events", Field::NodeEvents }, + { "fs", Field::NodeFS }, + { "fs/promises", Field::NodeFSPromises }, + { "http", Field::NodeHttp }, + { "http2", Field::NodeHttp2 }, + { "https", Field::NodeHttps }, + { "inspector", Field::NodeInspector }, + { "module", Field::NodeModule }, + { "net", Field::NodeNet }, + { "os", Field::NodeOS }, + { "path", Field::NodePath }, + { "path/posix", Field::NodePathPosix }, + { "path/win32", Field::NodePathWin32 }, + { "perf_hooks", Field::NodePerfHooks }, + { "process", Field::NodeProcess }, + { "punycode", Field::NodePunycode }, + { "querystring", Field::NodeQuerystring }, + { "readline", Field::NodeReadline }, + { "readline/promises", Field::NodeReadlinePromises }, + { "repl", Field::NodeRepl }, + { "stream", Field::NodeStream }, + { "stream/consumers", Field::NodeStreamConsumers }, + { "stream/promises", Field::NodeStreamPromises }, + { "stream/web", Field::NodeStreamWeb }, + { "string_decoder", Field::NodeStringDecoder }, + { "test", Field::NodeTest }, + { "timers", Field::NodeTimers }, + { "timers/promises", Field::NodeTimersPromises }, + { "tls", Field::NodeTLS }, + { "trace_events", Field::NodeTraceEvents }, + { "tty", Field::NodeTty }, + { "url", Field::NodeUrl }, + { "util", Field::NodeUtil }, + { "util/types", Field::NodeUtilTypes }, + { "v8", Field::NodeV8 }, + { "vm", Field::NodeVM }, + { "wasi", Field::NodeWasi }, + { "worker_threads", Field::NodeWorkerThreads }, + { "zlib", Field::NodeZlib }, + }; + + auto it = moduleMap.find(name); + if (it != moduleMap.end()) { + return it->second; + } + return std::nullopt; +} + +// Python C function: _bun._load_node_module(name) -> Node module object +// This loads a Node.js built-in module like "path", "fs", etc. +static PyObject* bun_load_node_module(PyObject* self, PyObject* args) +{ + const char* moduleName; + if (!PyArg_ParseTuple(args, "s", &moduleName)) { + return nullptr; + } + + auto* globalObject = jsCast(getThreadJSGlobal()); + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "No JavaScript context available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + // Look up the module in our mapping + auto fieldOpt = getNodeModuleField(moduleName); + if (!fieldOpt.has_value()) { + PyErr_Format(PyExc_ImportError, "Unknown Node.js module: '%s'", moduleName); + return nullptr; + } + + // Load the module via internalModuleRegistry + JSValue moduleValue = globalObject->internalModuleRegistry()->requireId( + globalObject, vm, fieldOpt.value()); + + if (scope.exception()) { + JSValue exception = scope.exception()->value(); + scope.clearException(); + + // Try to get a useful error message + if (exception.isObject()) { + JSObject* errObj = exception.getObject(); + JSValue msgVal = errObj->get(globalObject, Identifier::fromString(vm, "message"_s)); + if (msgVal.isString()) { + auto msg = msgVal.toWTFString(globalObject); + PyErr_Format(PyExc_ImportError, "Cannot import 'node:%s': %s", moduleName, msg.utf8().data()); + return nullptr; + } + } + PyErr_Format(PyExc_ImportError, "Cannot import 'node:%s'", moduleName); + return nullptr; + } + + if (moduleValue.isUndefined() || moduleValue.isNull()) { + PyErr_Format(PyExc_ImportError, "Module 'node:%s' not found", moduleName); + return nullptr; + } + + return Python::fromJS(globalObject, moduleValue); +} + +// Python C function: _bun._get_global_this() -> globalThis object +// This returns the JavaScript globalThis wrapped as a PyJSValue +static PyObject* bun_get_global_this(PyObject* self, PyObject* args) +{ + auto* globalObject = jsCast(getThreadJSGlobal()); + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "No JavaScript context available"); + return nullptr; + } + + JSValue globalThis = globalObject->globalThis(); + return Python::fromJS(globalObject, globalThis); +} + +static PyMethodDef bunModuleMethods[] = { + { "_load_js_module", bun_load_js_module, METH_VARARGS, "Load a JavaScript module" }, + { "_load_bun_module", bun_load_bun_module, METH_NOARGS, "Load Bun APIs as Python module" }, + { "_load_node_module", bun_load_node_module, METH_VARARGS, "Load a Node.js built-in module" }, + { "_get_global_this", bun_get_global_this, METH_NOARGS, "Get JavaScript globalThis object" }, + { "_get_cwd", bun_get_cwd, METH_NOARGS, "Get current working directory" }, + { "_schedule_timer", bun_schedule_timer, METH_VARARGS, "Schedule a timer callback" }, + { "_schedule_soon", bun_schedule_soon, METH_VARARGS, "Schedule an immediate callback" }, + { "_time", bun_time, METH_NOARGS, "Get monotonic time in seconds" }, + { "_cancel_handle", bun_cancel_handle, METH_VARARGS, "Cancel a scheduled callback" }, + { "_tick", bun_tick, METH_NOARGS, "Run one iteration of Bun's event loop" }, + { nullptr, nullptr, 0, nullptr } +}; + +static struct PyModuleDef bunModuleDef = { + PyModuleDef_HEAD_INIT, + "_bun", + "Bun internal module", + -1, + bunModuleMethods +}; + +// Python code for the JS import hook +static const char* jsImportHookCode = R"( +import sys +import os +from types import ModuleType +from importlib.machinery import ModuleSpec +import _bun + +class BunModuleFinder: + """Special finder for 'bun' module - bridges to Bun's JS APIs""" + + def find_spec(self, fullname, path, target=None): + if fullname == "bun": + return ModuleSpec("bun", BunModuleLoader(), origin="bun://runtime") + return None + + +class BunModuleLoader: + """Loader for the 'bun' module - wraps JS Bun object""" + + def create_module(self, spec): + # Load Bun's JS "Bun" global object and wrap it for Python + return _bun._load_bun_module() + + def exec_module(self, module): + pass + + +class NodeProxyModule(ModuleType): + """Proxy module for 'node' that lazily loads submodules on attribute access. + + Supports: + import node + node.path.join(...) + from node import path, fs + import node.path + """ + _cache = {} + + def __init__(self): + super().__init__('node') + self.__path__ = [] # Makes it a package + self.__package__ = 'node' + + def __getattr__(self, name): + if name.startswith('_'): + raise AttributeError(f"module 'node' has no attribute '{name}'") + + # Check cache first + if name in NodeProxyModule._cache: + return NodeProxyModule._cache[name] + + # Load the node module + try: + module = _bun._load_node_module(name) + NodeProxyModule._cache[name] = module + # Also register in sys.modules for subsequent imports + sys.modules[f'node.{name}'] = module + return module + except ImportError as e: + raise AttributeError(f"module 'node' has no attribute '{name}'") from e + + +class NodeModuleFinder: + """Finder for 'node' and 'node.*' imports""" + + def find_spec(self, fullname, path, target=None): + if fullname == "node": + return ModuleSpec("node", NodeModuleLoader(), origin="node://builtin", is_package=True) + + if fullname.startswith("node."): + # Handle node.path, node.fs, node.fs.promises, etc. + submodule = fullname.split('.', 1)[1] + # Convert dots to slashes for Node.js submodule format + # e.g., "fs.promises" -> "fs/promises" + node_module_name = submodule.replace('.', '/') + return ModuleSpec(fullname, NodeSubmoduleLoader(node_module_name), origin=f"node://{node_module_name}") + + return None + + +class NodeModuleLoader: + """Loader for the 'node' package - returns the proxy module""" + + def create_module(self, spec): + return NodeProxyModule() + + def exec_module(self, module): + pass + + +class NodeModuleWrapper(ModuleType): + """Wrapper that makes a Node.js module appear as a Python package. + + This allows imports like: + from node.fs.promises import writeFile + from node.path.posix import basename + """ + + def __init__(self, name, js_module, python_name): + super().__init__(python_name) + self._js_module = js_module + self.__path__ = [] # Makes it a package + self.__package__ = python_name + + def __getattr__(self, name): + if name.startswith('_'): + raise AttributeError(f"module has no attribute '{name}'") + return getattr(self._js_module, name) + + def __dir__(self): + return dir(self._js_module) + + +class NodeSubmoduleLoader: + """Loader for node.* submodules like node.path, node.fs, node.fs.promises""" + + def __init__(self, name): + # name is in Node.js format: "fs", "fs/promises", "path/posix", etc. + self.name = name + + def create_module(self, spec): + # Check if already cached + if self.name in NodeProxyModule._cache: + cached = NodeProxyModule._cache[self.name] + # Return existing module if it's already wrapped + if isinstance(cached, NodeModuleWrapper): + return cached + return cached + + js_module = _bun._load_node_module(self.name) + + # Wrap in NodeModuleWrapper to make it act as a package + # This allows submodule imports like node.fs.promises + module = NodeModuleWrapper(self.name, js_module, spec.name) + + NodeProxyModule._cache[self.name] = module + # Also register in sys.modules for the full Python path + sys.modules[spec.name] = module + return module + + def exec_module(self, module): + pass + + +class JSModuleFinder: + def find_spec(self, fullname, path, target=None): + # Skip standard library and already-loaded modules + if fullname in sys.modules: + return None + + # Search sys.path entries (similar to how Python searches for .py files) + search_paths = sys.path if sys.path else [_bun._get_cwd() or os.getcwd()] + + for base_dir in search_paths: + if not base_dir: + base_dir = _bun._get_cwd() or os.getcwd() + + # Look for JS/TS/JSX/TSX files + for ext in ['.js', '.ts', '.jsx', '.tsx', '.mjs', '.mts']: + js_path = os.path.join(base_dir, fullname + ext) + if os.path.exists(js_path): + return ModuleSpec(fullname, JSModuleLoader(js_path), origin=js_path) + + return None + + +class JSModuleLoader: + def __init__(self, path): + self.path = path + + def create_module(self, spec): + return _bun._load_js_module(self.path) + + def exec_module(self, module): + pass + + +class JSGlobalProxyModule(ModuleType): + """Proxy module for 'js' that wraps JavaScript's globalThis. + + Supports: + import js + js.Response, js.fetch, js.console, etc. + from js import Response, fetch, URL, Headers + """ + _global_this = None + + def __init__(self): + super().__init__('js') + self.__package__ = 'js' + + @classmethod + def _get_global(cls): + if cls._global_this is None: + cls._global_this = _bun._get_global_this() + return cls._global_this + + def __getattr__(self, name): + if name.startswith('_'): + raise AttributeError(f"module 'js' has no attribute '{name}'") + return getattr(JSGlobalProxyModule._get_global(), name) + + def __dir__(self): + return dir(JSGlobalProxyModule._get_global()) + + +class JSGlobalModuleFinder: + """Finder for 'js' module - provides access to JavaScript globalThis""" + + def find_spec(self, fullname, path, target=None): + if fullname == "js": + return ModuleSpec("js", JSGlobalModuleLoader(), origin="js://globalThis") + return None + + +class JSGlobalModuleLoader: + """Loader for the 'js' module - returns the globalThis proxy""" + + def create_module(self, spec): + return JSGlobalProxyModule() + + def exec_module(self, module): + pass + + +# Register finders in order of priority +sys.meta_path.insert(0, BunModuleFinder()) +sys.meta_path.insert(1, NodeModuleFinder()) +sys.meta_path.insert(2, JSGlobalModuleFinder()) +sys.meta_path.insert(3, JSModuleFinder()) +)"; + +static bool jsImportHookRegistered = false; + +static void registerJSImportHook() +{ + if (jsImportHookRegistered) + return; + + // Initialize the PyPromiseResolverType + if (PyType_Ready(&PyPromiseResolverType) < 0) { + PyErr_Print(); + return; + } + + // Create the _bun module + PyObject* bunModule = PyModule_Create(&bunModuleDef); + if (!bunModule) { + PyErr_Print(); + return; + } + + // Add PromiseResolver type to the module + Py_INCREF(&PyPromiseResolverType); + if (PyModule_AddObject(bunModule, "PromiseResolver", reinterpret_cast(&PyPromiseResolverType)) < 0) { + Py_DECREF(&PyPromiseResolverType); + Py_DECREF(bunModule); + PyErr_Print(); + return; + } + + PyObject* sysModules = PyImport_GetModuleDict(); + PyDict_SetItemString(sysModules, "_bun", bunModule); + Py_DECREF(bunModule); + + // Execute the import hook registration code + PyObject* mainModule = PyImport_AddModule("__main__"); + PyObject* mainDict = PyModule_GetDict(mainModule); + + PyObject* result = PyRun_String(jsImportHookCode, Py_file_input, mainDict, mainDict); + if (!result) { + PyErr_Print(); + return; + } + Py_DECREF(result); + + // Execute the BunEventLoop registration code + result = PyRun_String(bunEventLoopCode, Py_file_input, mainDict, mainDict); + if (!result) { + PyErr_Print(); + return; + } + Py_DECREF(result); + + jsImportHookRegistered = true; +} + +SyntheticSourceProvider::SyntheticSourceGenerator +generatePythonModuleSourceCode(JSGlobalObject* globalObject, const WTF::String& filePath, bool isMainEntry) +{ + return [filePath = filePath.isolatedCopy(), isMainEntry](JSGlobalObject* lexicalGlobalObject, + Identifier moduleKey, + Vector& exportNames, + MarkedArgumentBuffer& exportValues) -> void { + VM& vm = lexicalGlobalObject->vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + ensurePythonInitialized(); + ensureVenvPathAdded(lexicalGlobalObject); + + // Set the JavaScript global for this thread so Python can import JS modules + setThreadJSGlobal(lexicalGlobalObject); + + // Read the Python file + auto pathUTF8 = filePath.utf8(); + FILE* fp = fopen(pathUTF8.data(), "rb"); + if (!fp) { + throwTypeError(lexicalGlobalObject, scope, makeString("Cannot open Python file: "_s, filePath)); + return; + } + + // Read file content + fseek(fp, 0, SEEK_END); + long fileSize = ftell(fp); + fseek(fp, 0, SEEK_SET); + + auto fileContent = std::make_unique(fileSize + 1); + size_t bytesRead = fread(fileContent.get(), 1, fileSize, fp); + fclose(fp); + fileContent[bytesRead] = '\0'; + + // Create the module name following Python semantics: + // - If running as main entry point: use "__main__" + // - If imported: derive from filename without path and .py extension + WTF::String moduleName; + if (isMainEntry) { + moduleName = "__main__"_s; + } else { + // Extract just the filename without path + size_t lastSlash = filePath.reverseFind('/'); + size_t lastBackslash = filePath.reverseFind('\\'); + size_t start = 0; + if (lastSlash != notFound) + start = lastSlash + 1; + if (lastBackslash != notFound && lastBackslash >= start) + start = lastBackslash + 1; + + // Remove .py extension if present + size_t lastDot = filePath.reverseFind('.'); + size_t end = filePath.length(); + if (lastDot != notFound && lastDot > start) + end = lastDot; + + moduleName = filePath.substring(start, end - start); + } + auto moduleNameUTF8 = moduleName.utf8(); + + // Add the script's directory to sys.path[0] (standard Python behavior) + { + size_t lastSlash = filePath.reverseFind('/'); + size_t lastBackslash = filePath.reverseFind('\\'); + WTF::String scriptDir; + if (lastSlash != notFound || lastBackslash != notFound) { + size_t lastSep = lastSlash != notFound ? lastSlash : 0; + if (lastBackslash != notFound && lastBackslash > lastSep) + lastSep = lastBackslash; + scriptDir = filePath.substring(0, lastSep); + } else { + scriptDir = "."_s; + } + + PyObject* sysPath = PySys_GetObject("path"); + if (sysPath && PyList_Check(sysPath)) { + auto scriptDirUTF8 = scriptDir.utf8(); + PyObject* dirStr = PyUnicode_FromString(scriptDirUTF8.data()); + if (dirStr) { + PyList_Insert(sysPath, 0, dirStr); + Py_DECREF(dirStr); + } + } + } + + // Compile the Python source + PyObject* code = Py_CompileString(fileContent.get(), pathUTF8.data(), Py_file_input); + + if (!code) { + PyErr_Print(); + PyErr_Clear(); + throwTypeError(lexicalGlobalObject, scope, makeString("Python compile error in: "_s, filePath)); + return; + } + + // Execute as a module + PyObject* module = PyImport_ExecCodeModule(moduleNameUTF8.data(), code); + Py_DECREF(code); + + if (!module) { + PyErr_Print(); + PyErr_Clear(); + throwTypeError(lexicalGlobalObject, scope, makeString("Python execution error in: "_s, filePath)); + return; + } + + // Get module dict (borrowed reference) + PyObject* dict = PyModule_GetDict(module); + + // Create the module object as default export + auto* zigGlobalObject = jsCast(lexicalGlobalObject); + Structure* structure = zigGlobalObject->m_JSPyObjectStructure.get(); + if (!structure) { + structure = JSPyObject::createStructure(vm, lexicalGlobalObject, lexicalGlobalObject->objectPrototype()); + zigGlobalObject->m_JSPyObjectStructure.set(vm, zigGlobalObject, structure); + } + + // Add default export - the module itself + exportNames.append(vm.propertyNames->defaultKeyword); + JSPyObject* moduleValue = JSPyObject::create(vm, lexicalGlobalObject, structure, module); + exportValues.append(moduleValue); + + // Iterate module dict and add named exports for public symbols + PyObject *key, *value; + Py_ssize_t pos = 0; + while (PyDict_Next(dict, &pos, &key, &value)) { + if (!PyUnicode_Check(key)) + continue; + + const char* keyStr = PyUnicode_AsUTF8(key); + if (!keyStr) + continue; + + // Skip private attributes (single underscore) but allow dunder attributes + if (keyStr[0] == '_') { + // Check if it's a dunder attribute (starts with __ and ends with __) + // These are useful: __version__, __name__, __file__, __doc__, __all__, etc. + size_t len = strlen(keyStr); + bool isDunder = len >= 4 && keyStr[1] == '_' && keyStr[len - 1] == '_' && keyStr[len - 2] == '_'; + if (!isDunder) + continue; // Skip single underscore private attributes + } + + exportNames.append(Identifier::fromString(vm, String::fromUTF8(keyStr))); + exportValues.append(Python::toJS(lexicalGlobalObject, value)); + } + + // Don't DECREF module here - the JSPyObject holds a reference + }; +} + +SyntheticSourceProvider::SyntheticSourceGenerator +generatePythonBuiltinModuleSourceCode(JSGlobalObject* globalObject, const WTF::String& moduleName) +{ + return [moduleName = moduleName.isolatedCopy()](JSGlobalObject* lexicalGlobalObject, + Identifier moduleKey, + Vector& exportNames, + MarkedArgumentBuffer& exportValues) -> void { + VM& vm = lexicalGlobalObject->vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + ensurePythonInitialized(); + ensureVenvPathAdded(lexicalGlobalObject); + + // Set the JavaScript global for this thread so Python can import JS modules + setThreadJSGlobal(lexicalGlobalObject); + + // Strip "python:" prefix to get the actual Python module name + WTF::String pythonModuleName = moduleName; + if (moduleName.startsWith("python:"_s)) { + pythonModuleName = moduleName.substring(7); + } + + // Convert slashes to dots for Python submodule notation + // e.g., "matplotlib/pyplot" -> "matplotlib.pyplot" + // Dots in the original name are left as-is (valid in package names) + auto moduleNameUTF8 = pythonModuleName.utf8(); + const char* moduleNameToImport = moduleNameUTF8.data(); + char convertedBuffer[512]; + + // Check if we need to convert slashes + if (pythonModuleName.contains('/') && moduleNameUTF8.length() < sizeof(convertedBuffer) - 1) { + for (size_t i = 0; i < moduleNameUTF8.length(); i++) { + char c = moduleNameUTF8.data()[i]; + convertedBuffer[i] = (c == '/') ? '.' : c; + } + convertedBuffer[moduleNameUTF8.length()] = '\0'; + moduleNameToImport = convertedBuffer; + } + + // Import the Python builtin module + PyObject* module = PyImport_ImportModule(moduleNameToImport); + + if (!module) { + PyErr_Print(); + PyErr_Clear(); + throwTypeError(lexicalGlobalObject, scope, makeString("Cannot import Python module: "_s, moduleName)); + return; + } + + // Get module dict (borrowed reference) + PyObject* dict = PyModule_GetDict(module); + + // Create the module object as default export + auto* zigGlobalObject = jsCast(lexicalGlobalObject); + Structure* structure = zigGlobalObject->m_JSPyObjectStructure.get(); + if (!structure) { + structure = JSPyObject::createStructure(vm, lexicalGlobalObject, lexicalGlobalObject->objectPrototype()); + zigGlobalObject->m_JSPyObjectStructure.set(vm, zigGlobalObject, structure); + } + + // Add default export - the module itself + exportNames.append(vm.propertyNames->defaultKeyword); + JSPyObject* moduleValue = JSPyObject::create(vm, lexicalGlobalObject, structure, module); + exportValues.append(moduleValue); + + // Iterate module dict and add named exports for public symbols + PyObject *key, *value; + Py_ssize_t pos = 0; + while (PyDict_Next(dict, &pos, &key, &value)) { + if (!PyUnicode_Check(key)) + continue; + + const char* keyStr = PyUnicode_AsUTF8(key); + if (!keyStr) + continue; + + // Skip private attributes (single underscore) but allow dunder attributes + if (keyStr[0] == '_') { + // Check if it's a dunder attribute (starts with __ and ends with __) + // These are useful: __version__, __name__, __file__, __doc__, __all__, etc. + size_t len = strlen(keyStr); + bool isDunder = len >= 4 && keyStr[1] == '_' && keyStr[len - 1] == '_' && keyStr[len - 2] == '_'; + if (!isDunder) + continue; // Skip single underscore private attributes + } + + exportNames.append(Identifier::fromString(vm, String::fromUTF8(keyStr))); + exportValues.append(Python::toJS(lexicalGlobalObject, value)); + } + + // Don't DECREF module here - the JSPyObject holds a reference + }; +} + +} // namespace Bun::Python diff --git a/src/bun.js/bindings/BunPython.h b/src/bun.js/bindings/BunPython.h new file mode 100644 index 0000000000..b3378f4e22 --- /dev/null +++ b/src/bun.js/bindings/BunPython.h @@ -0,0 +1,24 @@ +#pragma once + +#include "root.h" +#include +#include + +namespace Bun::Python { + +// Generate module source code for importing Python files as ES modules +// If isMainEntry is true, __name__ will be "__main__", otherwise it's derived from the filename +JSC::SyntheticSourceProvider::SyntheticSourceGenerator +generatePythonModuleSourceCode(JSC::JSGlobalObject* globalObject, const WTF::String& filePath, bool isMainEntry); + +// Generate module source code for importing Python builtin modules (e.g., "python:pathlib") +JSC::SyntheticSourceProvider::SyntheticSourceGenerator +generatePythonBuiltinModuleSourceCode(JSC::JSGlobalObject* globalObject, const WTF::String& moduleName); + +JSC::JSValue toJS(JSC::JSGlobalObject* globalObject, PyObject* value); +PyObject* fromJS(JSC::JSGlobalObject* globalObject, JSC::JSValue value); + +// Ensure Python is initialized +void ensurePythonInitialized(); + +} // namespace Bun::Python diff --git a/src/bun.js/bindings/JSPyObject.cpp b/src/bun.js/bindings/JSPyObject.cpp new file mode 100644 index 0000000000..737c520068 --- /dev/null +++ b/src/bun.js/bindings/JSPyObject.cpp @@ -0,0 +1,634 @@ +#include "JSPyObject.h" +#include "BunPython.h" +#include "ZigGlobalObject.h" +#include "BunClientData.h" +#include +#include +#include +#include + +namespace Bun { + +using namespace JSC; + +// Forward declaration for toString +static JSC_DECLARE_HOST_FUNCTION(jsPyObjectToString); + +// Forward declaration for call +static JSC_DECLARE_HOST_FUNCTION(jsPyObjectCall); + +// Forward declaration for iterator +static JSC_DECLARE_HOST_FUNCTION(jsPyObjectIterator); + +// Forward declaration for iterator next +static JSC_DECLARE_HOST_FUNCTION(jsPyIteratorNext); + +const ClassInfo JSPyObject::s_info = { "PythonValue"_s, &Base::s_info, nullptr, nullptr, CREATE_METHOD_TABLE(JSPyObject) }; + +template +void JSPyObject::visitChildrenImpl(JSCell* cell, Visitor& visitor) +{ + JSPyObject* thisObject = jsCast(cell); + ASSERT_GC_OBJECT_INHERITS(thisObject, info()); + Base::visitChildren(thisObject, visitor); +} + +DEFINE_VISIT_CHILDREN(JSPyObject); + +void JSPyObject::finishCreation(VM& vm) +{ + Base::finishCreation(vm); + ASSERT(inherits(info())); +} + +JSC::GCClient::IsoSubspace* JSPyObject::subspaceForImpl(JSC::VM& vm) +{ + return WebCore::subspaceForImpl( + vm, + [](auto& spaces) { return spaces.m_clientSubspaceForPyObject.get(); }, + [](auto& spaces, auto&& space) { spaces.m_clientSubspaceForPyObject = std::forward(space); }, + [](auto& spaces) { return spaces.m_subspaceForPyObject.get(); }, + [](auto& spaces, auto&& space) { spaces.m_subspaceForPyObject = std::forward(space); }); +} + +// Property access - proxy to Python's getattr +bool JSPyObject::getOwnPropertySlot(JSObject* object, JSGlobalObject* globalObject, PropertyName propertyName, PropertySlot& slot) +{ + JSPyObject* thisObject = jsCast(object); + VM& vm = globalObject->vm(); + + // Handle special JS properties + if (propertyName == vm.propertyNames->toStringTagSymbol) { + slot.setValue(object, static_cast(PropertyAttribute::DontEnum | PropertyAttribute::ReadOnly), jsString(vm, String("PythonValue"_s))); + return true; + } + + // Handle toString + if (propertyName == vm.propertyNames->toString) { + slot.setValue(object, static_cast(PropertyAttribute::DontEnum), + JSFunction::create(vm, globalObject, 0, "toString"_s, jsPyObjectToString, ImplementationVisibility::Public)); + return true; + } + + // Handle nodejs.util.inspect.custom for console.log + if (propertyName == Identifier::fromUid(vm.symbolRegistry().symbolForKey("nodejs.util.inspect.custom"_s))) { + slot.setValue(object, static_cast(PropertyAttribute::DontEnum), + JSFunction::create(vm, globalObject, 0, "inspect"_s, jsPyObjectToString, ImplementationVisibility::Public)); + return true; + } + + // Handle Symbol.iterator for Python iterables + if (propertyName == vm.propertyNames->iteratorSymbol) { + // Check if this Python object is iterable + if (PyIter_Check(thisObject->m_pyObject) || PyObject_HasAttrString(thisObject->m_pyObject, "__iter__")) { + slot.setValue(object, static_cast(PropertyAttribute::DontEnum), + JSFunction::create(vm, globalObject, 0, "[Symbol.iterator]"_s, jsPyObjectIterator, ImplementationVisibility::Public)); + return true; + } + } + + // Handle length property for Python sequences (needed for Array.prototype methods) + if (propertyName == vm.propertyNames->length) { + if (PySequence_Check(thisObject->m_pyObject) && !PyUnicode_Check(thisObject->m_pyObject)) { + Py_ssize_t len = PySequence_Size(thisObject->m_pyObject); + if (len >= 0) { + slot.setValue(object, static_cast(PropertyAttribute::DontEnum | PropertyAttribute::ReadOnly), jsNumber(len)); + return true; + } + PyErr_Clear(); + } + } + + // Convert property name to Python string + auto* nameString = propertyName.publicName(); + if (!nameString) { + return Base::getOwnPropertySlot(object, globalObject, propertyName, slot); + } + + auto nameUTF8 = nameString->utf8(); + PyObject* pyName = PyUnicode_FromStringAndSize(nameUTF8.data(), nameUTF8.length()); + if (!pyName) { + PyErr_Clear(); + return false; + } + + // First try attribute access (for regular objects) + PyObject* attr = PyObject_GetAttr(thisObject->m_pyObject, pyName); + if (!attr) { + PyErr_Clear(); + // If attribute access fails, try item access (for dicts/mappings) + if (PyMapping_Check(thisObject->m_pyObject)) { + attr = PyObject_GetItem(thisObject->m_pyObject, pyName); + if (!attr) { + PyErr_Clear(); + } + } + } + Py_DECREF(pyName); + + if (!attr) { + return false; + } + + JSValue jsAttr = Python::toJS(globalObject, attr); + Py_DECREF(attr); + + slot.setValue(object, static_cast(PropertyAttribute::None), jsAttr); + return true; +} + +bool JSPyObject::getOwnPropertySlotByIndex(JSObject* object, JSGlobalObject* globalObject, unsigned index, PropertySlot& slot) +{ + JSPyObject* thisObject = jsCast(object); + + PyObject* item = PySequence_GetItem(thisObject->m_pyObject, static_cast(index)); + if (!item) { + PyErr_Clear(); + return false; + } + + JSValue jsItem = Python::toJS(globalObject, item); + Py_DECREF(item); + + slot.setValue(object, static_cast(PropertyAttribute::None), jsItem); + return true; +} + +void JSPyObject::getOwnPropertyNames(JSObject* object, JSGlobalObject* globalObject, PropertyNameArrayBuilder& propertyNames, DontEnumPropertiesMode mode) +{ + JSPyObject* thisObject = jsCast(object); + VM& vm = globalObject->vm(); + + // Get dir() of the object + PyObject* dir = PyObject_Dir(thisObject->m_pyObject); + if (!dir) { + PyErr_Clear(); + return; + } + + Py_ssize_t len = PyList_Size(dir); + for (Py_ssize_t i = 0; i < len; i++) { + PyObject* name = PyList_GetItem(dir, i); // borrowed reference + if (PyUnicode_Check(name)) { + const char* nameStr = PyUnicode_AsUTF8(name); + if (nameStr && nameStr[0] != '_') { // Skip private/dunder + propertyNames.add(Identifier::fromString(vm, String::fromUTF8(nameStr))); + } + } + } + Py_DECREF(dir); +} + +// Helper to convert JSValue to PyObject +static PyObject* jsValueToPyObject(JSGlobalObject* globalObject, JSValue value) +{ + if (value.isNull() || value.isUndefined()) { + Py_INCREF(Py_None); + return Py_None; + } + if (value.isBoolean()) { + PyObject* result = value.asBoolean() ? Py_True : Py_False; + Py_INCREF(result); + return result; + } + if (value.isNumber()) { + double num = value.asNumber(); + constexpr double maxSafeInt = 9007199254740992.0; + if (std::floor(num) == num && num >= -maxSafeInt && num <= maxSafeInt) { + return PyLong_FromLongLong(static_cast(num)); + } + return PyFloat_FromDouble(num); + } + if (value.isString()) { + auto str = value.toWTFString(globalObject); + auto utf8 = str.utf8(); + return PyUnicode_FromStringAndSize(utf8.data(), utf8.length()); + } + if (auto* pyVal = jsDynamicCast(value)) { + PyObject* obj = pyVal->pyObject(); + Py_INCREF(obj); + return obj; + } + // For other JS objects, return None for now + Py_INCREF(Py_None); + return Py_None; +} + +bool JSPyObject::put(JSCell* cell, JSGlobalObject* globalObject, PropertyName propertyName, JSValue value, PutPropertySlot& slot) +{ + JSPyObject* thisObject = jsCast(cell); + + auto* nameString = propertyName.publicName(); + if (!nameString) { + return false; + } + + auto nameUTF8 = nameString->utf8(); + PyObject* pyName = PyUnicode_FromStringAndSize(nameUTF8.data(), nameUTF8.length()); + if (!pyName) { + PyErr_Clear(); + return false; + } + + PyObject* pyValue = jsValueToPyObject(globalObject, value); + if (!pyValue) { + Py_DECREF(pyName); + PyErr_Clear(); + return false; + } + + int result = -1; + + // For dicts/mappings, use item assignment + if (PyDict_Check(thisObject->m_pyObject)) { + result = PyDict_SetItem(thisObject->m_pyObject, pyName, pyValue); + } else if (PyMapping_Check(thisObject->m_pyObject)) { + result = PyObject_SetItem(thisObject->m_pyObject, pyName, pyValue); + } else { + // For other objects, try attribute assignment + result = PyObject_SetAttr(thisObject->m_pyObject, pyName, pyValue); + } + + Py_DECREF(pyName); + Py_DECREF(pyValue); + + if (result < 0) { + PyErr_Clear(); + return false; + } + + return true; +} + +bool JSPyObject::putByIndex(JSCell* cell, JSGlobalObject* globalObject, unsigned index, JSValue value, bool) +{ + JSPyObject* thisObject = jsCast(cell); + + if (!PySequence_Check(thisObject->m_pyObject)) { + return false; + } + + PyObject* pyValue = jsValueToPyObject(globalObject, value); + if (!pyValue) { + PyErr_Clear(); + return false; + } + + // Get current length + Py_ssize_t length = PySequence_Size(thisObject->m_pyObject); + if (length < 0) { + PyErr_Clear(); + Py_DECREF(pyValue); + return false; + } + + int result; + if (static_cast(index) >= length) { + // Index is beyond current length - we need to extend the list + if (PyList_Check(thisObject->m_pyObject)) { + // For lists, extend with None values up to the index, then set + PyObject* list = thisObject->m_pyObject; + for (Py_ssize_t i = length; i < static_cast(index); i++) { + if (PyList_Append(list, Py_None) < 0) { + PyErr_Clear(); + Py_DECREF(pyValue); + return false; + } + } + result = PyList_Append(list, pyValue); + } else { + // For other sequences, try insert or set item + result = PySequence_SetItem(thisObject->m_pyObject, static_cast(index), pyValue); + } + } else { + result = PySequence_SetItem(thisObject->m_pyObject, static_cast(index), pyValue); + } + + Py_DECREF(pyValue); + + if (result < 0) { + PyErr_Clear(); + return false; + } + + return true; +} + +// toString - returns Python's str() representation +JSC_DEFINE_HOST_FUNCTION(jsPyObjectToString, (JSGlobalObject * globalObject, CallFrame* callFrame)) +{ + VM& vm = globalObject->vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + JSValue thisValue = callFrame->thisValue(); + JSPyObject* thisObject = jsDynamicCast(thisValue); + if (!thisObject) { + return JSValue::encode(jsString(vm, String("[object PythonValue]"_s))); + } + + PyObject* str = PyObject_Str(thisObject->pyObject()); + if (!str) { + PyErr_Clear(); + return JSValue::encode(jsString(vm, String("[object PythonValue]"_s))); + } + + const char* utf8 = PyUnicode_AsUTF8(str); + if (!utf8) { + Py_DECREF(str); + PyErr_Clear(); + return JSValue::encode(jsString(vm, String("[object PythonValue]"_s))); + } + + JSValue result = jsString(vm, WTF::String::fromUTF8(utf8)); + Py_DECREF(str); + return JSValue::encode(result); +} + +// Iterator next - called from the JS iterator's next() method +JSC_DEFINE_HOST_FUNCTION(jsPyIteratorNext, (JSGlobalObject * globalObject, CallFrame* callFrame)) +{ + VM& vm = globalObject->vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + // Get the Python iterator from the thisValue (which should be the iterator wrapper object) + JSValue thisValue = callFrame->thisValue(); + JSObject* thisObject = thisValue.toObject(globalObject); + RETURN_IF_EXCEPTION(scope, {}); + + // Get the stored Python iterator + JSValue pyIterValue = thisObject->getDirect(vm, Identifier::fromString(vm, "_pyIter"_s)); + if (!pyIterValue) { + return JSValue::encode(constructEmptyObject(globalObject)); + } + + JSPyObject* pyIter = jsDynamicCast(pyIterValue); + if (!pyIter) { + return JSValue::encode(constructEmptyObject(globalObject)); + } + + // Call Python's next() on the iterator + PyObject* nextItem = PyIter_Next(pyIter->pyObject()); + + // Create the result object { value, done } + JSObject* result = constructEmptyObject(globalObject); + + if (nextItem) { + // Got an item + result->putDirect(vm, Identifier::fromString(vm, "value"_s), Python::toJS(globalObject, nextItem)); + result->putDirect(vm, Identifier::fromString(vm, "done"_s), jsBoolean(false)); + Py_DECREF(nextItem); + } else { + // Check if it's StopIteration or an error + if (PyErr_Occurred()) { + if (PyErr_ExceptionMatches(PyExc_StopIteration)) { + PyErr_Clear(); + } else { + // Real error - propagate it + PyErr_Print(); + PyErr_Clear(); + throwTypeError(globalObject, scope, "Python iterator error"_s); + return {}; + } + } + // Iterator exhausted + result->putDirect(vm, Identifier::fromString(vm, "value"_s), jsUndefined()); + result->putDirect(vm, Identifier::fromString(vm, "done"_s), jsBoolean(true)); + } + + return JSValue::encode(result); +} + +// Symbol.iterator - returns a JS iterator that wraps Python iteration +JSC_DEFINE_HOST_FUNCTION(jsPyObjectIterator, (JSGlobalObject * globalObject, CallFrame* callFrame)) +{ + VM& vm = globalObject->vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + JSValue thisValue = callFrame->thisValue(); + JSPyObject* thisObject = jsDynamicCast(thisValue); + if (!thisObject) { + throwTypeError(globalObject, scope, "Not a Python object"_s); + return {}; + } + + // Get a Python iterator for this object + PyObject* pyIter = PyObject_GetIter(thisObject->pyObject()); + if (!pyIter) { + PyErr_Clear(); + throwTypeError(globalObject, scope, "Python object is not iterable"_s); + return {}; + } + + // Create a JS iterator object + JSObject* jsIter = constructEmptyObject(globalObject); + + // Store the Python iterator (as JSPyObject) on the JS iterator object + auto* zigGlobalObject = jsCast(globalObject); + Structure* structure = zigGlobalObject->m_JSPyObjectStructure.get(); + if (!structure) { + structure = JSPyObject::createStructure(vm, globalObject, globalObject->objectPrototype()); + zigGlobalObject->m_JSPyObjectStructure.set(vm, zigGlobalObject, structure); + } + JSPyObject* wrappedIter = JSPyObject::create(vm, globalObject, structure, pyIter); + Py_DECREF(pyIter); // JSPyObject takes ownership + + jsIter->putDirect(vm, Identifier::fromString(vm, "_pyIter"_s), wrappedIter); + + // Add the next() method + jsIter->putDirect(vm, Identifier::fromString(vm, "next"_s), + JSFunction::create(vm, globalObject, 0, "next"_s, jsPyIteratorNext, ImplementationVisibility::Public)); + + return JSValue::encode(jsIter); +} + +// Helper to check if a JSValue is a plain object (not array, not wrapped Python object) +static bool isPlainJSObject(JSGlobalObject* globalObject, JSValue value) +{ + if (!value.isObject()) + return false; + JSObject* obj = value.getObject(); + // Not a plain object if it's a JSPyObject (wrapped Python object) + if (jsDynamicCast(obj)) + return false; + // Not a plain object if it's an array + if (isJSArray(obj)) + return false; + // Not a plain object if it's a function + if (obj->isCallable()) + return false; + // Check if it's a plain Object (not a special type like Date, Map, etc.) + // We consider it kwargs-eligible if its prototype is Object.prototype or null + JSValue proto = obj->getPrototype(globalObject); + return proto.isNull() || proto == globalObject->objectPrototype(); +} + +// Get the expected positional argument count for a Python callable +// Returns -1 if we can't determine (e.g., built-in functions) +static int getExpectedArgCount(PyObject* callable) +{ + PyObject* codeObj = nullptr; + + // For regular functions, get __code__ + if (PyFunction_Check(callable)) { + codeObj = PyFunction_GET_CODE(callable); + } + // For methods, get the underlying function's __code__ + else if (PyMethod_Check(callable)) { + PyObject* func = PyMethod_GET_FUNCTION(callable); + if (PyFunction_Check(func)) { + codeObj = PyFunction_GET_CODE(func); + } + } + // Try getting __code__ attribute for other callables (like lambdas assigned to variables) + else if (PyObject_HasAttrString(callable, "__code__")) { + codeObj = PyObject_GetAttrString(callable, "__code__"); + if (codeObj) { + PyObject* argCountObj = PyObject_GetAttrString(codeObj, "co_argcount"); + Py_DECREF(codeObj); + if (argCountObj) { + int count = static_cast(PyLong_AsLong(argCountObj)); + Py_DECREF(argCountObj); + return count; + } + } + PyErr_Clear(); + return -1; + } + + if (!codeObj) { + return -1; + } + + // Get co_argcount from the code object + PyCodeObject* code = reinterpret_cast(codeObj); + return code->co_argcount; +} + +// Call Python function from JS +JSC_DEFINE_HOST_FUNCTION(jsPyObjectCall, (JSGlobalObject * globalObject, CallFrame* callFrame)) +{ + VM& vm = globalObject->vm(); + auto scope = DECLARE_THROW_SCOPE(vm); + + JSPyObject* thisObject = jsDynamicCast(callFrame->jsCallee()); + if (!thisObject) { + throwTypeError(globalObject, scope, "Not a Python callable"_s); + return {}; + } + + PyObject* pyFunc = thisObject->pyObject(); + if (!PyCallable_Check(pyFunc)) { + throwTypeError(globalObject, scope, "Python object is not callable"_s); + return {}; + } + + // Convert all arguments as positional args + // TODO: Support kwargs via a special marker like $kwargs from "bun:python" + size_t argCount = callFrame->argumentCount(); + + // Check if the Python function expects fewer arguments than provided + // If so, trim the argument list to match (allows flexible callback signatures) + int expectedArgs = getExpectedArgCount(pyFunc); + if (expectedArgs >= 0 && static_cast(expectedArgs) < argCount) { + argCount = static_cast(expectedArgs); + } + PyObject* kwargs = nullptr; + + // Convert JS arguments to Python tuple + PyObject* args = PyTuple_New(static_cast(argCount)); + if (!args) { + Py_XDECREF(kwargs); + throwOutOfMemoryError(globalObject, scope); + return {}; + } + + for (size_t i = 0; i < argCount; i++) { + JSValue jsArg = callFrame->uncheckedArgument(i); + PyObject* pyArg = nullptr; + + // Check if it's already a wrapped Python object first + if (auto* pyVal = jsDynamicCast(jsArg)) { + // Unwrap JSPyObject back to PyObject + pyArg = pyVal->pyObject(); + Py_INCREF(pyArg); + } else { + // Convert JS value to Python using the standard conversion + // This handles primitives, arrays (as list), and objects (as dict) + pyArg = Python::fromJS(globalObject, jsArg); + } + + if (!pyArg) { + Py_DECREF(args); + Py_XDECREF(kwargs); + throwTypeError(globalObject, scope, "Failed to convert argument to Python"_s); + return {}; + } + PyTuple_SET_ITEM(args, i, pyArg); // steals reference + } + + // Call the Python function with args and optional kwargs + PyObject* result = PyObject_Call(pyFunc, args, kwargs); + Py_DECREF(args); + Py_XDECREF(kwargs); + + if (!result) { + // Get Python exception info + PyObject *type, *value, *traceback; + PyErr_Fetch(&type, &value, &traceback); + PyErr_NormalizeException(&type, &value, &traceback); + + WTF::String errorMessage = "Python error"_s; + if (value) { + PyObject* str = PyObject_Str(value); + if (str) { + const char* errStr = PyUnicode_AsUTF8(str); + if (errStr) { + errorMessage = WTF::String::fromUTF8(errStr); + } + Py_DECREF(str); + } + } + + Py_XDECREF(type); + Py_XDECREF(value); + Py_XDECREF(traceback); + + throwTypeError(globalObject, scope, errorMessage); + return {}; + } + + JSValue jsResult = Python::toJS(globalObject, result); + Py_DECREF(result); + + return JSValue::encode(jsResult); +} + +CallData JSPyObject::getCallData(JSCell* cell) +{ + JSPyObject* thisObject = jsCast(cell); + + CallData callData; + // Only allow direct calls for non-type callables (functions, lambdas, etc.) + // Python types (classes) should require `new`, like JS classes + if (thisObject->isCallable() && !PyType_Check(thisObject->m_pyObject)) { + callData.type = CallData::Type::Native; + callData.native.function = jsPyObjectCall; + } + return callData; +} + +// For Python, constructing and calling are the same thing +// This allows `new Counter()` to work for Python classes +CallData JSPyObject::getConstructData(JSCell* cell) +{ + JSPyObject* thisObject = jsCast(cell); + + CallData constructData; + if (thisObject->isCallable()) { + constructData.type = CallData::Type::Native; + constructData.native.function = jsPyObjectCall; + } + return constructData; +} + +} // namespace Bun diff --git a/src/bun.js/bindings/JSPyObject.h b/src/bun.js/bindings/JSPyObject.h new file mode 100644 index 0000000000..f0ff0bae54 --- /dev/null +++ b/src/bun.js/bindings/JSPyObject.h @@ -0,0 +1,86 @@ +#pragma once + +#include "root.h" +#include + +namespace Bun { +using namespace JSC; + +// JSPyObject wraps a PyObject* and proxies property access, calls, etc. to Python. +// When created, it increments the Python refcount; when finalized by GC, it decrements it. +class JSPyObject : public JSC::JSDestructibleObject { + using Base = JSC::JSDestructibleObject; + +public: + JSPyObject(JSC::VM& vm, JSC::Structure* structure, PyObject* pyObject) + : Base(vm, structure) + , m_pyObject(pyObject) + { + // Prevent Python from freeing this object while we hold it + Py_INCREF(m_pyObject); + } + + DECLARE_INFO; + DECLARE_VISIT_CHILDREN; + + static constexpr unsigned StructureFlags = Base::StructureFlags | OverridesGetOwnPropertySlot | OverridesGetOwnPropertyNames | OverridesPut | OverridesGetCallData | InterceptsGetOwnPropertySlotByIndexEvenWhenLengthIsNotZero; + + template + static JSC::GCClient::IsoSubspace* subspaceFor(JSC::VM& vm) + { + if constexpr (mode == JSC::SubspaceAccess::Concurrently) + return nullptr; + return subspaceForImpl(vm); + } + + static JSC::GCClient::IsoSubspace* subspaceForImpl(JSC::VM& vm); + + static JSC::Structure* createStructure(JSC::VM& vm, JSC::JSGlobalObject* globalObject, JSC::JSValue prototype) + { + return JSC::Structure::create(vm, globalObject, prototype, + JSC::TypeInfo(JSC::ObjectType, StructureFlags), info()); + } + + static JSPyObject* create(JSC::VM& vm, JSC::JSGlobalObject* globalObject, JSC::Structure* structure, PyObject* pyObject) + { + JSPyObject* value = new (NotNull, JSC::allocateCell(vm)) JSPyObject(vm, structure, pyObject); + value->finishCreation(vm); + return value; + } + + void finishCreation(JSC::VM& vm); + + static void destroy(JSCell* thisObject) + { + JSPyObject* value = static_cast(thisObject); + // Release Python reference + Py_DECREF(value->m_pyObject); + value->~JSPyObject(); + } + + // Property access - proxy to Python's __getattr__ + static bool getOwnPropertySlot(JSObject*, JSGlobalObject*, PropertyName, PropertySlot&); + static bool getOwnPropertySlotByIndex(JSObject*, JSGlobalObject*, unsigned, PropertySlot&); + static void getOwnPropertyNames(JSObject*, JSGlobalObject*, PropertyNameArrayBuilder&, DontEnumPropertiesMode); + + // Property set - proxy to Python's __setattr__ + static bool put(JSCell*, JSGlobalObject*, PropertyName, JSValue, PutPropertySlot&); + static bool putByIndex(JSCell*, JSGlobalObject*, unsigned, JSValue, bool); + + // If callable, proxy to Python's __call__ + static CallData getCallData(JSCell*); + + // If callable, also make constructible (for Python classes) + static CallData getConstructData(JSCell*); + + // Get the wrapped PyObject + PyObject* pyObject() const { return m_pyObject; } + + // Helper to check if Python object is callable + bool isCallable() const { return PyCallable_Check(m_pyObject); } + +private: + PyObject* m_pyObject; +}; + +} // namespace Bun diff --git a/src/bun.js/bindings/ModuleLoader.cpp b/src/bun.js/bindings/ModuleLoader.cpp index a647e3d52a..82a89eb889 100644 --- a/src/bun.js/bindings/ModuleLoader.cpp +++ b/src/bun.js/bindings/ModuleLoader.cpp @@ -40,6 +40,7 @@ #include "JSCommonJSExtensions.h" #include "BunProcess.h" +#include "BunPython.h" namespace Bun { using namespace JSC; @@ -65,6 +66,7 @@ public: }; extern "C" BunLoaderType Bun__getDefaultLoader(JSC::JSGlobalObject*, BunString* specifier); +extern "C" JSC::EncodedJSValue BunObject_getter_main(JSC::JSGlobalObject*); static JSC::JSInternalPromise* rejectedInternalPromise(JSC::JSGlobalObject* globalObject, JSC::JSValue value) { @@ -977,6 +979,15 @@ static JSValue fetchESMSourceCode( auto&& provider = Zig::SourceProvider::create(globalObject, res->result.value, JSC::SourceProviderSourceType::Module, true); RELEASE_AND_RETURN(scope, rejectOrResolve(JSSourceCode::create(vm, JSC::SourceCode(provider)))); } + case SyntheticModuleType::PythonBuiltin: { + // Python builtin module - import from Python's standard library + WTF::String moduleName = res->result.value.source_code.toWTFString(BunString::NonNull); + auto function = Python::generatePythonBuiltinModuleSourceCode(globalObject, moduleName); + auto source = JSC::SourceCode( + JSC::SyntheticSourceProvider::create(WTF::move(function), + JSC::SourceOrigin(), WTF::move(moduleKey))); + RELEASE_AND_RETURN(scope, rejectOrResolve(JSSourceCode::create(vm, WTF::move(source)))); + } #define CASE(str, name) \ case (SyntheticModuleType::name): { \ @@ -1103,6 +1114,21 @@ static JSValue fetchESMSourceCode( JSC::SourceOrigin(), specifier->toWTFString(BunString::ZeroCopy))); JSC::ensureStillAliveHere(value); RELEASE_AND_RETURN(scope, rejectOrResolve(JSSourceCode::create(globalObject->vm(), WTF::move(source)))); + } else if (res->result.value.tag == SyntheticModuleType::Python) { + // Python module - run Python file and wrap exports as JSPyObject + WTF::String filePath = res->result.value.source_code.toWTFString(BunString::NonNull); + // Check if this is the main entry point by comparing against Bun.main + bool isMainEntry = false; + JSValue mainValue = JSValue::decode(BunObject_getter_main(globalObject)); + if (mainValue.isString()) { + WTF::String mainPath = mainValue.toWTFString(globalObject); + isMainEntry = (filePath == mainPath); + } + auto function = Python::generatePythonModuleSourceCode(globalObject, filePath, isMainEntry); + auto source = JSC::SourceCode( + JSC::SyntheticSourceProvider::create(WTF::move(function), + JSC::SourceOrigin(), specifier->toWTFString(BunString::ZeroCopy))); + RELEASE_AND_RETURN(scope, rejectOrResolve(JSSourceCode::create(globalObject->vm(), WTF::move(source)))); } RELEASE_AND_RETURN(scope, rejectOrResolve(JSC::JSSourceCode::create(vm, JSC::SourceCode(Zig::SourceProvider::create(globalObject, res->result.value))))); diff --git a/src/bun.js/bindings/PyJSValueObject.cpp b/src/bun.js/bindings/PyJSValueObject.cpp new file mode 100644 index 0000000000..056d5f416f --- /dev/null +++ b/src/bun.js/bindings/PyJSValueObject.cpp @@ -0,0 +1,3000 @@ +#include "root.h" +#include "PyJSValueObject.h" +#include "Python.h" +#include "BunPython.h" +#include "JSPyObject.h" +#include "ZigGlobalObject.h" +#include "BunClientData.h" +#include +#include + +namespace Bun { + +using namespace JSC; + +// ============================================================================= +// PyFutureCallback - Python callable that resolves/rejects a Python Future +// Used for JS Promise -> Python await bridging +// ============================================================================= + +struct PyFutureCallback { + PyObject_HEAD PyObject* future; // The asyncio.Future to resolve/reject + bool isReject; // true = set_exception, false = set_result +}; + +static void PyFutureCallback_dealloc(PyFutureCallback* self) +{ + Py_XDECREF(self->future); + Py_TYPE(self)->tp_free(reinterpret_cast(self)); +} + +static PyObject* PyFutureCallback_call(PyFutureCallback* self, PyObject* args, PyObject* kwargs) +{ + if (!self->future) { + Py_RETURN_NONE; + } + + // Check if future is already done (cancelled, etc.) + PyObject* doneMethod = PyObject_GetAttrString(self->future, "done"); + if (doneMethod) { + PyObject* done = PyObject_CallNoArgs(doneMethod); + Py_DECREF(doneMethod); + if (done && PyObject_IsTrue(done)) { + Py_DECREF(done); + Py_RETURN_NONE; + } + Py_XDECREF(done); + } + PyErr_Clear(); + + // Get the value argument + PyObject* value = Py_None; + if (PyTuple_Size(args) > 0) { + value = PyTuple_GetItem(args, 0); + } + + if (self->isReject) { + // Convert the JS error to a Python exception + // Create a RuntimeError with the error message + PyObject* excType = PyExc_RuntimeError; + PyObject* excValue = nullptr; + + if (PyUnicode_Check(value)) { + excValue = value; + Py_INCREF(excValue); + } else { + // Get string representation + excValue = PyObject_Str(value); + if (!excValue) { + PyErr_Clear(); + excValue = PyUnicode_FromString("Unknown JavaScript error"); + } + } + + // Create an exception instance + PyObject* exception = PyObject_CallOneArg(excType, excValue); + Py_DECREF(excValue); + + if (exception) { + PyObject* setException = PyObject_GetAttrString(self->future, "set_exception"); + if (setException) { + PyObject* result = PyObject_CallOneArg(setException, exception); + Py_XDECREF(result); + Py_DECREF(setException); + } + Py_DECREF(exception); + } + PyErr_Clear(); + } else { + // Resolve with the value + PyObject* setResult = PyObject_GetAttrString(self->future, "set_result"); + if (setResult) { + PyObject* result = PyObject_CallOneArg(setResult, value); + Py_XDECREF(result); + Py_DECREF(setResult); + } + PyErr_Clear(); + } + + Py_RETURN_NONE; +} + +static PyTypeObject PyFutureCallback_Type = { + .ob_base = PyVarObject_HEAD_INIT(nullptr, 0) + .tp_name + = "_bun.FutureCallback", + .tp_basicsize = sizeof(PyFutureCallback), + .tp_itemsize = 0, + .tp_dealloc = reinterpret_cast(PyFutureCallback_dealloc), + .tp_call = reinterpret_cast(PyFutureCallback_call), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = "Resolves/rejects a Python Future when called from JavaScript", +}; + +static bool g_futureCallbackTypeReady = false; + +static PyFutureCallback* createFutureCallback(PyObject* future, bool isReject) +{ + if (!g_futureCallbackTypeReady) { + if (PyType_Ready(&PyFutureCallback_Type) < 0) { + return nullptr; + } + g_futureCallbackTypeReady = true; + } + + PyFutureCallback* callback = PyObject_New(PyFutureCallback, &PyFutureCallback_Type); + if (!callback) { + return nullptr; + } + + Py_INCREF(future); + callback->future = future; + callback->isReject = isReject; + return callback; +} + +// Helper to get or create JSPyObject structure +static Structure* getJSPyObjectStructure(JSGlobalObject* globalObject) +{ + auto* zigGlobalObject = jsCast(globalObject); + VM& vm = globalObject->vm(); + + Structure* structure = zigGlobalObject->m_JSPyObjectStructure.get(); + if (!structure) { + structure = JSPyObject::createStructure(vm, globalObject, globalObject->objectPrototype()); + zigGlobalObject->m_JSPyObjectStructure.set(vm, zigGlobalObject, structure); + } + return structure; +} + +// ============================================================================ +// PyJSValueObject - Base wrapper for functions and other non-container types +// ============================================================================ + +static void pyjsvalue_dealloc(PyObject* self); +static PyObject* pyjsvalue_repr(PyObject* self); +static PyObject* pyjsvalue_getattro(PyObject* self, PyObject* name); +static int pyjsvalue_setattro(PyObject* self, PyObject* name, PyObject* value); +static PyObject* pyjsvalue_call(PyObject* self, PyObject* args, PyObject* kwargs); +static PyObject* pyjsvalue_subscript(PyObject* self, PyObject* key); +static int pyjsvalue_ass_subscript(PyObject* self, PyObject* key, PyObject* value); +static PyObject* pyjsvalue_await(PyObject* self); + +// am_await implementation - allows Python to await JS Promises +static PyObject* pyjsvalue_await(PyObject* self) +{ + PyJSValueObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + // Check if this is a Promise + JSPromise* promise = jsDynamicCast(wrapper->jsValue); + if (!promise) { + PyErr_SetString(PyExc_TypeError, "object is not awaitable (not a Promise)"); + return nullptr; + } + + // Import asyncio and get the running loop + PyObject* asyncio = PyImport_ImportModule("asyncio"); + if (!asyncio) { + PyErr_SetString(PyExc_RuntimeError, "Failed to import asyncio"); + return nullptr; + } + + PyObject* getRunningLoop = PyObject_GetAttrString(asyncio, "get_running_loop"); + if (!getRunningLoop) { + Py_DECREF(asyncio); + PyErr_SetString(PyExc_RuntimeError, "Failed to get get_running_loop"); + return nullptr; + } + + PyObject* loop = PyObject_CallNoArgs(getRunningLoop); + Py_DECREF(getRunningLoop); + + if (!loop) { + Py_DECREF(asyncio); + PyErr_SetString(PyExc_RuntimeError, "No running event loop"); + return nullptr; + } + + // Create a Future: loop.create_future() + PyObject* createFuture = PyObject_GetAttrString(loop, "create_future"); + if (!createFuture) { + Py_DECREF(loop); + Py_DECREF(asyncio); + PyErr_SetString(PyExc_RuntimeError, "Failed to get create_future"); + return nullptr; + } + + PyObject* future = PyObject_CallNoArgs(createFuture); + Py_DECREF(createFuture); + + if (!future) { + Py_DECREF(loop); + Py_DECREF(asyncio); + PyErr_SetString(PyExc_RuntimeError, "Failed to create future"); + return nullptr; + } + + // Create resolve and reject callbacks + PyFutureCallback* resolver = createFutureCallback(future, false); + PyFutureCallback* rejecter = createFutureCallback(future, true); + + if (!resolver || !rejecter) { + Py_XDECREF(reinterpret_cast(resolver)); + Py_XDECREF(reinterpret_cast(rejecter)); + Py_DECREF(future); + Py_DECREF(loop); + Py_DECREF(asyncio); + PyErr_SetString(PyExc_RuntimeError, "Failed to create callbacks"); + return nullptr; + } + + // Wrap the Python callbacks as JSPyObjects so JS can call them + Structure* structure = getJSPyObjectStructure(globalObject); + JSPyObject* jsResolver = JSPyObject::create(vm, globalObject, structure, reinterpret_cast(resolver)); + JSPyObject* jsRejecter = JSPyObject::create(vm, globalObject, structure, reinterpret_cast(rejecter)); + + // We can release Python references now - JSPyObject holds them + Py_DECREF(reinterpret_cast(resolver)); + Py_DECREF(reinterpret_cast(rejecter)); + + // Call promise.then(resolver, rejecter) + // Get the 'then' method from the promise + JSValue thenMethod = promise->get(globalObject, Identifier::fromString(vm, "then"_s)); + if (scope.exception()) { + scope.clearException(); + Py_DECREF(future); + Py_DECREF(loop); + Py_DECREF(asyncio); + PyErr_SetString(PyExc_RuntimeError, "Failed to get Promise.then"); + return nullptr; + } + + auto callData = JSC::getCallData(thenMethod); + if (callData.type == CallData::Type::None) { + Py_DECREF(future); + Py_DECREF(loop); + Py_DECREF(asyncio); + PyErr_SetString(PyExc_RuntimeError, "Promise.then is not callable"); + return nullptr; + } + + MarkedArgumentBuffer thenArgs; + thenArgs.append(jsResolver); + thenArgs.append(jsRejecter); + + JSC::profiledCall(globalObject, ProfilingReason::API, thenMethod, callData, promise, thenArgs); + + if (scope.exception()) { + scope.clearException(); + Py_DECREF(future); + Py_DECREF(loop); + Py_DECREF(asyncio); + PyErr_SetString(PyExc_RuntimeError, "Failed to attach Promise handlers"); + return nullptr; + } + + Py_DECREF(loop); + Py_DECREF(asyncio); + + // Return future.__await__() which is the iterator Python expects + PyObject* awaitMethod = PyObject_GetAttrString(future, "__await__"); + if (!awaitMethod) { + Py_DECREF(future); + PyErr_SetString(PyExc_RuntimeError, "Future has no __await__ method"); + return nullptr; + } + + PyObject* awaiter = PyObject_CallNoArgs(awaitMethod); + Py_DECREF(awaitMethod); + Py_DECREF(future); + + return awaiter; +} + +// am_aiter implementation - allows Python to use 'async for' on JS async iterators +static PyObject* pyjsvalue_aiter(PyObject* self) +{ + PyJSValueObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSValue jsValue = wrapper->jsValue; + + if (!jsValue.isObject()) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an async iterable"); + return nullptr; + } + + JSObject* jsObj = jsValue.getObject(); + + // Check if it's already an async iterator (has a 'next' method that returns promises) + JSValue nextMethod = jsObj->get(globalObject, Identifier::fromString(vm, "next"_s)); + if (scope.exception()) { + scope.clearException(); + } else if (nextMethod.isCallable()) { + // It's already an async iterator, return self + Py_INCREF(self); + return self; + } + + // Try to get Symbol.asyncIterator + JSValue asyncIteratorMethod = jsObj->get(globalObject, vm.propertyNames->asyncIteratorSymbol); + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_TypeError, "JavaScript object is not an async iterable"); + return nullptr; + } + + if (asyncIteratorMethod.isCallable()) { + // Call [Symbol.asyncIterator]() to get the async iterator + auto callData = JSC::getCallData(asyncIteratorMethod); + MarkedArgumentBuffer args; + JSValue iterator = JSC::profiledCall(globalObject, ProfilingReason::API, asyncIteratorMethod, callData, jsObj, args); + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error calling Symbol.asyncIterator"); + return nullptr; + } + + // Wrap the iterator and return it + return Python::fromJS(globalObject, iterator); + } + + PyErr_SetString(PyExc_TypeError, "JavaScript object is not an async iterable"); + return nullptr; +} + +// am_anext implementation - returns an awaitable for the next value +static PyObject* pyjsvalue_anext(PyObject* self) +{ + PyJSValueObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSValue jsValue = wrapper->jsValue; + + if (!jsValue.isObject()) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an async iterator"); + return nullptr; + } + + JSObject* jsObj = jsValue.getObject(); + + // Get the 'next' method + JSValue nextMethod = jsObj->get(globalObject, Identifier::fromString(vm, "next"_s)); + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_TypeError, "JavaScript async iterator has no 'next' method"); + return nullptr; + } + + if (!nextMethod.isCallable()) { + PyErr_SetString(PyExc_TypeError, "JavaScript async iterator 'next' is not callable"); + return nullptr; + } + + // Call next() - returns a Promise + auto callData = JSC::getCallData(nextMethod); + MarkedArgumentBuffer args; + JSValue promiseValue = JSC::profiledCall(globalObject, ProfilingReason::API, nextMethod, callData, jsObj, args); + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error calling async iterator.next()"); + return nullptr; + } + + // The result should be a Promise that resolves to {value, done} + // We need to create an awaitable that: + // 1. Awaits the promise + // 2. Checks if done is true -> raise StopAsyncIteration + // 3. Otherwise returns value + + // Create a wrapper coroutine in Python to handle the async iteration logic + // We'll use Python code to handle this cleanly + PyObject* asyncioModule = PyImport_ImportModule("asyncio"); + if (!asyncioModule) { + PyErr_SetString(PyExc_RuntimeError, "Failed to import asyncio"); + return nullptr; + } + + // Get the wrapped Promise + PyObject* pyPromise = Python::fromJS(globalObject, promiseValue); + if (!pyPromise) { + Py_DECREF(asyncioModule); + return nullptr; + } + + // Create a coroutine that awaits the promise and handles {value, done} + // We use Python code defined in the event loop setup + PyObject* mainModule = PyImport_AddModule("__main__"); + if (!mainModule) { + Py_DECREF(pyPromise); + Py_DECREF(asyncioModule); + PyErr_SetString(PyExc_RuntimeError, "Failed to get __main__ module"); + return nullptr; + } + + PyObject* mainDict = PyModule_GetDict(mainModule); + PyObject* anextHelper = PyDict_GetItemString(mainDict, "_js_anext_helper"); + + if (!anextHelper) { + // Define the helper function if it doesn't exist + const char* helperCode = R"( +async def _js_anext_helper(promise): + result = await promise + if result.done: + raise StopAsyncIteration + return result.value +)"; + PyObject* result = PyRun_String(helperCode, Py_file_input, mainDict, mainDict); + if (!result) { + Py_DECREF(pyPromise); + Py_DECREF(asyncioModule); + PyErr_Print(); + PyErr_SetString(PyExc_RuntimeError, "Failed to define _js_anext_helper"); + return nullptr; + } + Py_DECREF(result); + anextHelper = PyDict_GetItemString(mainDict, "_js_anext_helper"); + } + + if (!anextHelper) { + Py_DECREF(pyPromise); + Py_DECREF(asyncioModule); + PyErr_SetString(PyExc_RuntimeError, "Failed to get _js_anext_helper"); + return nullptr; + } + + // Call _js_anext_helper(promise) to get a coroutine + PyObject* coro = PyObject_CallOneArg(anextHelper, pyPromise); + Py_DECREF(pyPromise); + Py_DECREF(asyncioModule); + + if (!coro) { + return nullptr; + } + + return coro; +} + +static PyMappingMethods PyJSValue_as_mapping = { + nullptr, + pyjsvalue_subscript, + pyjsvalue_ass_subscript, +}; + +static PyAsyncMethods PyJSValue_as_async = { + pyjsvalue_await, // am_await + pyjsvalue_aiter, // am_aiter + pyjsvalue_anext, // am_anext + nullptr, // am_send (Python 3.10+) +}; + +// ============================================================================ +// PyJSBoundMethod - Preserves 'this' context when calling JS methods +// ============================================================================ + +static void pyjsboundmethod_dealloc(PyObject* self); +static PyObject* pyjsboundmethod_repr(PyObject* self); +static PyObject* pyjsboundmethod_call(PyObject* self, PyObject* args, PyObject* kwargs); + +static PyTypeObject PyJSBoundMethod_Type = { + PyVarObject_HEAD_INIT(NULL, 0) "bun.JSBoundMethod", + sizeof(PyJSBoundMethod), + 0, + pyjsboundmethod_dealloc, + 0, + nullptr, + nullptr, + nullptr, + pyjsboundmethod_repr, + nullptr, + nullptr, + nullptr, + nullptr, + pyjsboundmethod_call, + nullptr, + nullptr, + nullptr, + nullptr, + Py_TPFLAGS_DEFAULT, + "JavaScript bound method wrapper", +}; + +static bool g_boundMethodTypeReady = false; + +PyJSBoundMethod* PyJSBoundMethod::New(JSGlobalObject* globalObject, JSValue function, JSValue thisObject) +{ + if (!g_boundMethodTypeReady) { + if (PyType_Ready(&PyJSBoundMethod_Type) < 0) { + return nullptr; + } + g_boundMethodTypeReady = true; + } + + PyJSBoundMethod* method = PyObject_New(PyJSBoundMethod, &PyJSBoundMethod_Type); + if (!method) { + return nullptr; + } + + method->function = function; + method->thisObject = thisObject; + method->globalObject = globalObject; + + // Protect both from GC + if (function.isCell()) { + gcProtect(function.asCell()); + } + if (thisObject.isCell()) { + gcProtect(thisObject.asCell()); + } + + return method; +} + +void PyJSBoundMethod::initType() +{ + if (!g_boundMethodTypeReady) { + if (PyType_Ready(&PyJSBoundMethod_Type) < 0) { + PyErr_Print(); + } + g_boundMethodTypeReady = true; + } +} + +static void pyjsboundmethod_dealloc(PyObject* self) +{ + PyJSBoundMethod* method = reinterpret_cast(self); + + if (method->function.isCell()) { + gcUnprotect(method->function.asCell()); + } + if (method->thisObject.isCell()) { + gcUnprotect(method->thisObject.asCell()); + } + + Py_TYPE(self)->tp_free(self); +} + +static PyObject* pyjsboundmethod_repr(PyObject* self) +{ + PyJSBoundMethod* method = reinterpret_cast(self); + JSGlobalObject* globalObject = method->globalObject; + + if (!globalObject) { + return PyUnicode_FromString(""); + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + // Try to get the function name + JSObject* funcObj = method->function.getObject(); + if (funcObj) { + JSValue nameVal = funcObj->get(globalObject, Identifier::fromString(vm, "name"_s)); + if (!scope.exception() && nameVal.isString()) { + auto name = nameVal.toWTFString(globalObject); + auto utf8 = name.utf8(); + return PyUnicode_FromFormat("", utf8.data()); + } + scope.clearException(); + } + + return PyUnicode_FromString(""); +} + +static PyObject* pyjsboundmethod_call(PyObject* self, PyObject* args, PyObject* kwargs) +{ + PyJSBoundMethod* method = reinterpret_cast(self); + JSGlobalObject* globalObject = method->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSValue calleeValue = method->function; + + Py_ssize_t argc = PyTuple_Size(args); + MarkedArgumentBuffer jsArgs; + + for (Py_ssize_t i = 0; i < argc; i++) { + PyObject* arg = PyTuple_GetItem(args, i); + jsArgs.append(Python::toJS(globalObject, arg)); + } + + JSValue result; + + // Get call and construct data + auto callData = JSC::getCallData(calleeValue); + auto constructData = JSC::getConstructData(calleeValue); + + // Determine if we should use 'new' semantics: + // 1. ES6 class - callData.js.functionExecutable->isClassConstructorFunction() is true + // 2. Native constructor - callData is Native type AND constructData is non-None + // (Native constructors that are not callable have a call handler that throws) + // 3. Not callable at all but constructable + bool useConstruct = false; + + if (callData.type == CallData::Type::None) { + // Not callable - must be construct-only + if (constructData.type != CallData::Type::None) { + useConstruct = true; + } else { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not callable"); + return nullptr; + } + } else if (callData.type == CallData::Type::JS && callData.js.functionExecutable) { + // JS function - check if it's an ES6 class constructor + useConstruct = callData.js.functionExecutable->isClassConstructorFunction(); + } else if (callData.type == CallData::Type::Native && constructData.type != CallData::Type::None) { + // Native function that is also constructable - prefer construct + // This handles Bun classes like Glob, File, etc. that require 'new' + useConstruct = true; + } + + if (useConstruct) { + // Use 'new' semantics + result = JSC::profiledConstruct(globalObject, ProfilingReason::API, calleeValue, constructData, jsArgs); + } else { + // Regular function call - use the stored thisObject + result = JSC::profiledCall(globalObject, ProfilingReason::API, calleeValue, callData, method->thisObject, jsArgs); + } + + if (scope.exception()) { + JSValue exception = scope.exception()->value(); + scope.clearException(); + + if (exception.isObject()) { + JSObject* errObj = exception.getObject(); + JSValue msgVal = errObj->get(globalObject, Identifier::fromString(vm, "message"_s)); + if (msgVal.isString()) { + auto msg = msgVal.toWTFString(globalObject); + PyErr_Format(PyExc_RuntimeError, "JavaScript error: %s", msg.utf8().data()); + return nullptr; + } + } + PyErr_SetString(PyExc_RuntimeError, "JavaScript error during call"); + return nullptr; + } + + return Python::fromJS(globalObject, result); +} + +// Iterator support for JS iterators/generators +static PyObject* pyjsvalue_iter(PyObject* self) +{ + PyJSValueObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSValue jsValue = wrapper->jsValue; + + // Check if it's already an iterator (has a 'next' method) + if (jsValue.isObject()) { + JSObject* jsObj = jsValue.getObject(); + JSValue nextMethod = jsObj->get(globalObject, Identifier::fromString(vm, "next"_s)); + if (scope.exception()) { + scope.clearException(); + } else if (nextMethod.isCallable()) { + // It's already an iterator, return self + Py_INCREF(self); + return self; + } + } + + // Try to get Symbol.iterator to make it iterable + if (jsValue.isObject()) { + JSObject* jsObj = jsValue.getObject(); + JSValue iteratorMethod = jsObj->get(globalObject, vm.propertyNames->iteratorSymbol); + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_TypeError, "JavaScript object is not iterable"); + return nullptr; + } + + if (iteratorMethod.isCallable()) { + // Call [Symbol.iterator]() to get the iterator + auto callData = JSC::getCallData(iteratorMethod); + MarkedArgumentBuffer args; + JSValue iterator = JSC::profiledCall(globalObject, ProfilingReason::API, iteratorMethod, callData, jsObj, args); + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error calling Symbol.iterator"); + return nullptr; + } + + // Wrap the iterator and return it + return Python::fromJS(globalObject, iterator); + } + } + + PyErr_SetString(PyExc_TypeError, "JavaScript object is not iterable"); + return nullptr; +} + +static PyObject* pyjsvalue_iternext(PyObject* self) +{ + PyJSValueObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSValue jsValue = wrapper->jsValue; + + if (!jsValue.isObject()) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an iterator"); + return nullptr; + } + + JSObject* jsObj = jsValue.getObject(); + + // Get the 'next' method + JSValue nextMethod = jsObj->get(globalObject, Identifier::fromString(vm, "next"_s)); + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_TypeError, "JavaScript iterator has no 'next' method"); + return nullptr; + } + + if (!nextMethod.isCallable()) { + PyErr_SetString(PyExc_TypeError, "JavaScript iterator 'next' is not callable"); + return nullptr; + } + + // Call next() + auto callData = JSC::getCallData(nextMethod); + MarkedArgumentBuffer args; + JSValue result = JSC::profiledCall(globalObject, ProfilingReason::API, nextMethod, callData, jsObj, args); + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error calling iterator.next()"); + return nullptr; + } + + // Result should be {value, done} + if (!result.isObject()) { + PyErr_SetString(PyExc_TypeError, "Iterator next() did not return an object"); + return nullptr; + } + + JSObject* resultObj = result.getObject(); + + // Check 'done' property + JSValue doneValue = resultObj->get(globalObject, Identifier::fromString(vm, "done"_s)); + if (scope.exception()) { + scope.clearException(); + } + + if (doneValue.toBoolean(globalObject)) { + // Iterator exhausted - signal StopIteration by returning NULL without setting error + PyErr_SetNone(PyExc_StopIteration); + return nullptr; + } + + // Get 'value' property + JSValue valueValue = resultObj->get(globalObject, Identifier::fromString(vm, "value"_s)); + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error getting iterator value"); + return nullptr; + } + + return Python::fromJS(globalObject, valueValue); +} + +static PyTypeObject PyJSValue_Type = { + PyVarObject_HEAD_INIT(NULL, 0) "bun.JSValue", // tp_name + sizeof(PyJSValueObject), // tp_basicsize + 0, // tp_itemsize + pyjsvalue_dealloc, // tp_dealloc + 0, // tp_vectorcall_offset + nullptr, // tp_getattr + nullptr, // tp_setattr + &PyJSValue_as_async, // tp_as_async - makes JSValue awaitable + pyjsvalue_repr, // tp_repr + nullptr, // tp_as_number + nullptr, // tp_as_sequence + &PyJSValue_as_mapping, // tp_as_mapping + nullptr, // tp_hash + pyjsvalue_call, // tp_call + nullptr, // tp_str + pyjsvalue_getattro, // tp_getattro + pyjsvalue_setattro, // tp_setattro + nullptr, // tp_as_buffer + Py_TPFLAGS_DEFAULT, // tp_flags + "JavaScript value wrapper", // tp_doc + nullptr, // tp_traverse + nullptr, // tp_clear + nullptr, // tp_richcompare + 0, // tp_weaklistoffset + pyjsvalue_iter, // tp_iter + pyjsvalue_iternext, // tp_iternext +}; + +// ============================================================================ +// PyJSDictObject - Dict subclass for JS objects +// ============================================================================ + +static void pyjsdict_dealloc(PyObject* self); +static PyObject* pyjsdict_repr(PyObject* self); +static Py_ssize_t pyjsdict_length(PyObject* self); +static PyObject* pyjsdict_subscript(PyObject* self, PyObject* key); +static int pyjsdict_ass_subscript(PyObject* self, PyObject* key, PyObject* value); +static PyObject* pyjsdict_getattro(PyObject* self, PyObject* name); +static int pyjsdict_setattro(PyObject* self, PyObject* name, PyObject* value); +static PyObject* pyjsdict_iter(PyObject* self); +static int pyjsdict_contains(PyObject* self, PyObject* key); +static PyObject* pyjsdict_keys(PyObject* self, PyObject* args); +static PyObject* pyjsdict_values(PyObject* self, PyObject* args); +static PyObject* pyjsdict_items(PyObject* self, PyObject* args); +static PyObject* pyjsdict_get(PyObject* self, PyObject* args); +static PyObject* pyjsdict_pop(PyObject* self, PyObject* args); +static PyObject* pyjsdict_update(PyObject* self, PyObject* args); +static PyObject* pyjsdict_setdefault(PyObject* self, PyObject* args); +static PyObject* pyjsdict_clear(PyObject* self, PyObject* args); + +static PyMethodDef pyjsdict_methods[] = { + { "keys", pyjsdict_keys, METH_NOARGS, "Return keys" }, + { "values", pyjsdict_values, METH_NOARGS, "Return values" }, + { "items", pyjsdict_items, METH_NOARGS, "Return items" }, + { "get", pyjsdict_get, METH_VARARGS, "Get item with default" }, + { "pop", pyjsdict_pop, METH_VARARGS, "Remove key and return value" }, + { "update", pyjsdict_update, METH_O, "Update dict with key/value pairs" }, + { "setdefault", pyjsdict_setdefault, METH_VARARGS, "Set default value for key" }, + { "clear", pyjsdict_clear, METH_NOARGS, "Remove all items" }, + { nullptr, nullptr, 0, nullptr } +}; + +static PyMappingMethods PyJSDict_as_mapping = { + pyjsdict_length, + pyjsdict_subscript, + pyjsdict_ass_subscript, +}; + +static PySequenceMethods PyJSDict_as_sequence = { + nullptr, // sq_length + nullptr, // sq_concat + nullptr, // sq_repeat + nullptr, // sq_item + nullptr, // was_sq_slice + nullptr, // sq_ass_item + nullptr, // was_sq_ass_slice + pyjsdict_contains, // sq_contains + nullptr, // sq_inplace_concat + nullptr, // sq_inplace_repeat +}; + +static PyTypeObject PyJSDict_Type = { + PyVarObject_HEAD_INIT(NULL, 0) "bun.JSObject", + sizeof(PyJSDictObject), + 0, + pyjsdict_dealloc, // tp_dealloc + 0, // tp_vectorcall_offset + nullptr, // tp_getattr + nullptr, // tp_setattr + nullptr, // tp_as_async + pyjsdict_repr, // tp_repr + nullptr, // tp_as_number + &PyJSDict_as_sequence, // tp_as_sequence + &PyJSDict_as_mapping, // tp_as_mapping + nullptr, // tp_hash + nullptr, // tp_call + nullptr, // tp_str + pyjsdict_getattro, // tp_getattro + pyjsdict_setattro, // tp_setattro + nullptr, // tp_as_buffer + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags + "JavaScript object wrapper (dict-like)", // tp_doc + nullptr, // tp_traverse + nullptr, // tp_clear + nullptr, // tp_richcompare + 0, // tp_weaklistoffset + pyjsdict_iter, // tp_iter + nullptr, // tp_iternext + pyjsdict_methods, // tp_methods + nullptr, // tp_members + nullptr, // tp_getset + &PyDict_Type, // tp_base - INHERIT FROM DICT +}; + +// ============================================================================ +// PyJSListObject - List subclass for JS arrays +// ============================================================================ + +static void pyjslist_dealloc(PyObject* self); +static PyObject* pyjslist_repr(PyObject* self); +static Py_ssize_t pyjslist_length(PyObject* self); +static PyObject* pyjslist_item(PyObject* self, Py_ssize_t index); +static int pyjslist_ass_item(PyObject* self, Py_ssize_t index, PyObject* value); +static PyObject* pyjslist_subscript(PyObject* self, PyObject* key); +static int pyjslist_ass_subscript(PyObject* self, PyObject* key, PyObject* value); +static PyObject* pyjslist_iter(PyObject* self); +static int pyjslist_contains(PyObject* self, PyObject* value); + +// List methods +static PyObject* pyjslist_append(PyObject* self, PyObject* value); +static PyObject* pyjslist_pop(PyObject* self, PyObject* args); +static PyObject* pyjslist_insert(PyObject* self, PyObject* args); +static PyObject* pyjslist_extend(PyObject* self, PyObject* iterable); +static PyObject* pyjslist_clear(PyObject* self, PyObject* args); +static PyObject* pyjslist_reverse(PyObject* self, PyObject* args); + +static PyMethodDef pyjslist_methods[] = { + { "append", pyjslist_append, METH_O, "Append object to the end of the list" }, + { "pop", pyjslist_pop, METH_VARARGS, "Remove and return item at index (default last)" }, + { "insert", pyjslist_insert, METH_VARARGS, "Insert object before index" }, + { "extend", pyjslist_extend, METH_O, "Extend list by appending elements from the iterable" }, + { "clear", pyjslist_clear, METH_NOARGS, "Remove all items from list" }, + { "reverse", pyjslist_reverse, METH_NOARGS, "Reverse list in place" }, + { nullptr, nullptr, 0, nullptr } +}; + +static PySequenceMethods PyJSList_as_sequence = { + pyjslist_length, // sq_length + nullptr, // sq_concat + nullptr, // sq_repeat + pyjslist_item, // sq_item + nullptr, // was_sq_slice + pyjslist_ass_item, // sq_ass_item + nullptr, // was_sq_ass_slice + pyjslist_contains, // sq_contains + nullptr, // sq_inplace_concat + nullptr, // sq_inplace_repeat +}; + +static PyMappingMethods PyJSList_as_mapping = { + pyjslist_length, + pyjslist_subscript, + pyjslist_ass_subscript, +}; + +static PyTypeObject PyJSList_Type = { + PyVarObject_HEAD_INIT(NULL, 0) "bun.JSArray", + sizeof(PyJSListObject), + 0, + pyjslist_dealloc, // tp_dealloc + 0, // tp_vectorcall_offset + nullptr, // tp_getattr + nullptr, // tp_setattr + nullptr, // tp_as_async + pyjslist_repr, // tp_repr + nullptr, // tp_as_number + &PyJSList_as_sequence, // tp_as_sequence + &PyJSList_as_mapping, // tp_as_mapping + nullptr, // tp_hash + nullptr, // tp_call + nullptr, // tp_str + PyObject_GenericGetAttr, // tp_getattro + nullptr, // tp_setattro + nullptr, // tp_as_buffer + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, // tp_flags + "JavaScript array wrapper (list-like)", // tp_doc + nullptr, // tp_traverse + nullptr, // tp_clear + nullptr, // tp_richcompare + 0, // tp_weaklistoffset + pyjslist_iter, // tp_iter + nullptr, // tp_iternext + pyjslist_methods, // tp_methods + nullptr, // tp_members + nullptr, // tp_getset + &PyList_Type, // tp_base - INHERIT FROM LIST +}; + +// ============================================================================ +// Type initialization +// ============================================================================ + +void PyJSValueObject::initType() +{ + if (PyType_Ready(&PyJSValue_Type) < 0) { + PyErr_Print(); + } + if (PyType_Ready(&PyJSDict_Type) < 0) { + PyErr_Print(); + } + if (PyType_Ready(&PyJSList_Type) < 0) { + PyErr_Print(); + } + PyJSBoundMethod::initType(); +} + +PyJSValueObject* PyJSValueObject::New() +{ + return PyObject_New(PyJSValueObject, &PyJSValue_Type); +} + +PyJSValueObject* PyJSValueObject::NewDict(JSGlobalObject* globalObject, JSValue value) +{ + // Use GC_New since we inherit from dict (which is GC-tracked) + PyJSDictObject* wrapper = PyObject_GC_New(PyJSDictObject, &PyJSDict_Type); + if (!wrapper) { + return nullptr; + } + + // Initialize dict internal fields - we don't use them but they must be valid + wrapper->dict.ma_used = 0; + wrapper->dict.ma_keys = nullptr; + wrapper->dict.ma_values = nullptr; + + wrapper->jsValue = value; + wrapper->globalObject = globalObject; + + if (value.isCell()) { + gcProtect(value.asCell()); + } + + // Untrack from Python's cyclic GC - we manage JS references via gcProtect + PyObject_GC_UnTrack(wrapper); + + return reinterpret_cast(wrapper); +} + +PyJSValueObject* PyJSValueObject::NewList(JSGlobalObject* globalObject, JSValue value) +{ + // Use GC_New since we inherit from list (which is GC-tracked) + PyJSListObject* wrapper = PyObject_GC_New(PyJSListObject, &PyJSList_Type); + if (!wrapper) { + return nullptr; + } + + // Initialize list internal fields - we don't use them but they must be valid + wrapper->list.ob_item = nullptr; + wrapper->list.allocated = 0; + Py_SET_SIZE(reinterpret_cast(&wrapper->list), 0); + + wrapper->jsValue = value; + wrapper->globalObject = globalObject; + + if (value.isCell()) { + gcProtect(value.asCell()); + } + + // Untrack from Python's cyclic GC - we manage JS references via gcProtect + PyObject_GC_UnTrack(wrapper); + + return reinterpret_cast(wrapper); +} + +// Try to unwrap a PyObject that wraps a JSValue back to the underlying JSValue +// Returns empty JSValue if the object is not a PyJSValueObject, PyJSDictObject, or PyJSListObject +JSValue tryUnwrapJSValue(PyObject* obj) +{ + if (!obj) { + return JSValue(); + } + + PyTypeObject* type = Py_TYPE(obj); + + // Check for PyJSValueObject + if (type == &PyJSValue_Type) { + PyJSValueObject* wrapper = reinterpret_cast(obj); + return wrapper->jsValue; + } + + // Check for PyJSDictObject + if (type == &PyJSDict_Type) { + PyJSDictObject* wrapper = reinterpret_cast(obj); + return wrapper->jsValue; + } + + // Check for PyJSListObject + if (type == &PyJSList_Type) { + PyJSListObject* wrapper = reinterpret_cast(obj); + return wrapper->jsValue; + } + + // Check for PyJSBoundMethod + if (type == &PyJSBoundMethod_Type) { + PyJSBoundMethod* wrapper = reinterpret_cast(obj); + return wrapper->function; + } + + return JSValue(); +} + +// ============================================================================ +// PyJSValueObject implementations +// ============================================================================ + +static void pyjsvalue_dealloc(PyObject* self) +{ + PyJSValueObject* wrapper = reinterpret_cast(self); + + if (wrapper->jsValue.isCell()) { + gcUnprotect(wrapper->jsValue.asCell()); + } + + Py_TYPE(self)->tp_free(self); +} + +static PyObject* pyjsvalue_repr(PyObject* self) +{ + PyJSValueObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + return PyUnicode_FromString(""); + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + auto str = wrapper->jsValue.toWTFString(globalObject); + if (scope.exception()) { + scope.clearException(); + return PyUnicode_FromString(""); + } + + auto utf8 = str.utf8(); + return PyUnicode_FromStringAndSize(utf8.data(), utf8.length()); +} + +static PyObject* pyjsvalue_getattro(PyObject* self, PyObject* name) +{ + PyJSValueObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!PyUnicode_Check(name)) { + PyErr_SetString(PyExc_TypeError, "attribute name must be string"); + return nullptr; + } + + const char* attrName = PyUnicode_AsUTF8(name); + if (!attrName) { + return nullptr; + } + + // For Python dunder attributes (__class__, __dict__, etc.), use generic lookup + if (attrName[0] == '_' && attrName[1] == '_') { + return PyObject_GenericGetAttr(self, name); + } + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return nullptr; + } + + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(attrName)); + + // Check if property exists - raise AttributeError if not + bool hasProperty = jsObj->hasProperty(globalObject, ident); + if (scope.exception()) { + scope.clearException(); + } + + if (!hasProperty) { + PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.400s'", + Py_TYPE(self)->tp_name, attrName); + return nullptr; + } + + JSValue result = jsObj->get(globalObject, ident); + + if (scope.exception()) { + scope.clearException(); + PyErr_Format(PyExc_AttributeError, "Error accessing '%s'", attrName); + return nullptr; + } + + // If the result is callable, return a bound method to preserve 'this' context + auto callData = JSC::getCallData(result); + if (callData.type != CallData::Type::None) { + return reinterpret_cast(PyJSBoundMethod::New(globalObject, result, wrapper->jsValue)); + } + + return Python::fromJS(globalObject, result); +} + +static int pyjsvalue_setattro(PyObject* self, PyObject* name, PyObject* value) +{ + PyJSValueObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!PyUnicode_Check(name)) { + PyErr_SetString(PyExc_TypeError, "attribute name must be string"); + return -1; + } + + const char* attrName = PyUnicode_AsUTF8(name); + if (!attrName) { + return -1; + } + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return -1; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return -1; + } + + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(attrName)); + JSValue jsVal = Python::toJS(globalObject, value); + + jsObj->putDirect(vm, ident, jsVal); + + if (scope.exception()) { + scope.clearException(); + PyErr_Format(PyExc_AttributeError, "Error setting '%s'", attrName); + return -1; + } + + return 0; +} + +static PyObject* pyjsvalue_call(PyObject* self, PyObject* args, PyObject* kwargs) +{ + PyJSValueObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSValue calleeValue = wrapper->jsValue; + + Py_ssize_t argc = PyTuple_Size(args); + MarkedArgumentBuffer jsArgs; + + for (Py_ssize_t i = 0; i < argc; i++) { + PyObject* arg = PyTuple_GetItem(args, i); + jsArgs.append(Python::toJS(globalObject, arg)); + } + + JSValue result; + + // Get call and construct data + auto callData = JSC::getCallData(calleeValue); + auto constructData = JSC::getConstructData(calleeValue); + + // Determine if we should use 'new' semantics: + // 1. ES6 class - callData.js.functionExecutable->isClassConstructorFunction() is true + // 2. Native constructor - callData is Native type AND constructData is non-None + // (Native constructors that are not callable have a call handler that throws) + // 3. Not callable at all but constructable + bool useConstruct = false; + + if (callData.type == CallData::Type::None) { + // Not callable - must be construct-only + if (constructData.type != CallData::Type::None) { + useConstruct = true; + } else { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not callable"); + return nullptr; + } + } else if (callData.type == CallData::Type::JS && callData.js.functionExecutable) { + // JS function - check if it's an ES6 class constructor + useConstruct = callData.js.functionExecutable->isClassConstructorFunction(); + } else if (callData.type == CallData::Type::Native && constructData.type != CallData::Type::None) { + // Native function that is also constructable - prefer construct + // This handles Bun classes like Glob, File, etc. that require 'new' + useConstruct = true; + } + + if (useConstruct) { + // Use 'new' semantics + result = JSC::profiledConstruct(globalObject, ProfilingReason::API, calleeValue, constructData, jsArgs); + } else { + // Regular function call + result = JSC::profiledCall(globalObject, ProfilingReason::API, calleeValue, callData, jsUndefined(), jsArgs); + } + + if (scope.exception()) { + JSValue exception = scope.exception()->value(); + scope.clearException(); + + if (exception.isObject()) { + JSObject* errObj = exception.getObject(); + JSValue msgVal = errObj->get(globalObject, Identifier::fromString(vm, "message"_s)); + if (msgVal.isString()) { + auto msg = msgVal.toWTFString(globalObject); + PyErr_Format(PyExc_RuntimeError, "JavaScript error: %s", msg.utf8().data()); + return nullptr; + } + } + PyErr_SetString(PyExc_RuntimeError, "JavaScript error during call"); + return nullptr; + } + + return Python::fromJS(globalObject, result); +} + +static PyObject* pyjsvalue_subscript(PyObject* self, PyObject* key) +{ + PyJSValueObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return nullptr; + } + + JSValue result; + + if (PyLong_Check(key)) { + long index = PyLong_AsLong(key); + if (index >= 0) { + result = jsObj->get(globalObject, static_cast(index)); + } else { + PyErr_SetString(PyExc_IndexError, "negative index not supported"); + return nullptr; + } + } else if (PyUnicode_Check(key)) { + const char* keyStr = PyUnicode_AsUTF8(key); + if (!keyStr) { + return nullptr; + } + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(keyStr)); + result = jsObj->get(globalObject, ident); + } else { + PyErr_SetString(PyExc_TypeError, "key must be string or integer"); + return nullptr; + } + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_KeyError, "Error accessing property"); + return nullptr; + } + + return Python::fromJS(globalObject, result); +} + +static int pyjsvalue_ass_subscript(PyObject* self, PyObject* key, PyObject* value) +{ + PyJSValueObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return -1; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return -1; + } + + JSValue jsVal = Python::toJS(globalObject, value); + + if (PyLong_Check(key)) { + long index = PyLong_AsLong(key); + if (index >= 0) { + jsObj->putDirectIndex(globalObject, static_cast(index), jsVal); + } else { + PyErr_SetString(PyExc_IndexError, "negative index not supported"); + return -1; + } + } else if (PyUnicode_Check(key)) { + const char* keyStr = PyUnicode_AsUTF8(key); + if (!keyStr) { + return -1; + } + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(keyStr)); + jsObj->putDirect(vm, ident, jsVal); + } else { + PyErr_SetString(PyExc_TypeError, "key must be string or integer"); + return -1; + } + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_KeyError, "Error setting property"); + return -1; + } + + return 0; +} + +// ============================================================================ +// PyJSDictObject implementations +// ============================================================================ + +static void pyjsdict_dealloc(PyObject* self) +{ + PyJSDictObject* wrapper = reinterpret_cast(self); + + if (wrapper->jsValue.isCell()) { + gcUnprotect(wrapper->jsValue.asCell()); + } + + // Use GC_Del since we allocated with GC_New + PyObject_GC_Del(self); +} + +static PyObject* pyjsdict_repr(PyObject* self) +{ + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + return PyUnicode_FromString("{}"); + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + auto str = wrapper->jsValue.toWTFString(globalObject); + if (scope.exception()) { + scope.clearException(); + return PyUnicode_FromString("{}"); + } + + auto utf8 = str.utf8(); + return PyUnicode_FromStringAndSize(utf8.data(), utf8.length()); +} + +static Py_ssize_t pyjsdict_length(PyObject* self) +{ + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + return 0; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + return 0; + } + + PropertyNameArrayBuilder propertyNames(vm, PropertyNameMode::Strings, PrivateSymbolMode::Exclude); + JSObject::getOwnPropertyNames(jsObj, globalObject, propertyNames, DontEnumPropertiesMode::Exclude); + + if (scope.exception()) { + scope.clearException(); + return 0; + } + + return static_cast(propertyNames.size()); +} + +static PyObject* pyjsdict_subscript(PyObject* self, PyObject* key) +{ + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + if (!PyUnicode_Check(key)) { + PyErr_SetString(PyExc_TypeError, "key must be string"); + return nullptr; + } + + const char* keyStr = PyUnicode_AsUTF8(key); + if (!keyStr) { + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return nullptr; + } + + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(keyStr)); + + // Check if property exists + if (!jsObj->hasProperty(globalObject, ident)) { + if (scope.exception()) { + scope.clearException(); + } + PyErr_SetObject(PyExc_KeyError, key); + return nullptr; + } + + JSValue result = jsObj->get(globalObject, ident); + + if (scope.exception()) { + scope.clearException(); + PyErr_SetObject(PyExc_KeyError, key); + return nullptr; + } + + return Python::fromJS(globalObject, result); +} + +static int pyjsdict_ass_subscript(PyObject* self, PyObject* key, PyObject* value) +{ + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return -1; + } + + if (!PyUnicode_Check(key)) { + PyErr_SetString(PyExc_TypeError, "key must be string"); + return -1; + } + + const char* keyStr = PyUnicode_AsUTF8(key); + if (!keyStr) { + return -1; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return -1; + } + + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(keyStr)); + + if (value == nullptr) { + // Delete + jsObj->deleteProperty(globalObject, ident); + } else { + JSValue jsVal = Python::toJS(globalObject, value); + jsObj->putDirect(vm, ident, jsVal); + } + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error modifying property"); + return -1; + } + + return 0; +} + +static PyObject* pyjsdict_getattro(PyObject* self, PyObject* name) +{ + // First try to find the attribute in the type (for methods like keys(), values(), etc.) + PyObject* result = PyObject_GenericGetAttr(self, name); + if (result || !PyErr_ExceptionMatches(PyExc_AttributeError)) { + return result; + } + PyErr_Clear(); + + // Fall back to JS property access + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!PyUnicode_Check(name)) { + PyErr_SetString(PyExc_TypeError, "attribute name must be string"); + return nullptr; + } + + const char* attrName = PyUnicode_AsUTF8(name); + if (!attrName) { + return nullptr; + } + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return nullptr; + } + + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(attrName)); + + // Check if property exists - raise AttributeError if not + bool hasProperty = jsObj->hasProperty(globalObject, ident); + if (scope.exception()) { + scope.clearException(); + } + + if (!hasProperty) { + PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.400s'", + Py_TYPE(self)->tp_name, attrName); + return nullptr; + } + + JSValue jsResult = jsObj->get(globalObject, ident); + + if (scope.exception()) { + scope.clearException(); + PyErr_Format(PyExc_AttributeError, "Error accessing '%s'", attrName); + return nullptr; + } + + // If the result is callable, return a bound method to preserve 'this' context + auto callData = JSC::getCallData(jsResult); + if (callData.type != CallData::Type::None) { + return reinterpret_cast(PyJSBoundMethod::New(globalObject, jsResult, wrapper->jsValue)); + } + + return Python::fromJS(globalObject, jsResult); +} + +static int pyjsdict_setattro(PyObject* self, PyObject* name, PyObject* value) +{ + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!PyUnicode_Check(name)) { + PyErr_SetString(PyExc_TypeError, "attribute name must be string"); + return -1; + } + + const char* attrName = PyUnicode_AsUTF8(name); + if (!attrName) { + return -1; + } + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return -1; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return -1; + } + + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(attrName)); + JSValue jsVal = Python::toJS(globalObject, value); + + jsObj->putDirect(vm, ident, jsVal); + + if (scope.exception()) { + scope.clearException(); + PyErr_Format(PyExc_AttributeError, "Error setting '%s'", attrName); + return -1; + } + + return 0; +} + +static int pyjsdict_contains(PyObject* self, PyObject* key) +{ + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject || !PyUnicode_Check(key)) { + return 0; + } + + const char* keyStr = PyUnicode_AsUTF8(key); + if (!keyStr) { + PyErr_Clear(); + return 0; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + return 0; + } + + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(keyStr)); + bool has = jsObj->hasProperty(globalObject, ident); + + if (scope.exception()) { + scope.clearException(); + return 0; + } + + return has ? 1 : 0; +} + +// Helper to get property names as a Python list +static PyObject* getPropertyNamesAsList(PyJSDictObject* wrapper) +{ + JSGlobalObject* globalObject = wrapper->globalObject; + if (!globalObject) { + return PyList_New(0); + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + return PyList_New(0); + } + + PropertyNameArrayBuilder propertyNames(vm, PropertyNameMode::Strings, PrivateSymbolMode::Exclude); + JSObject::getOwnPropertyNames(jsObj, globalObject, propertyNames, DontEnumPropertiesMode::Exclude); + + if (scope.exception()) { + scope.clearException(); + return PyList_New(0); + } + + PyObject* list = PyList_New(propertyNames.size()); + if (!list) { + return nullptr; + } + + for (size_t i = 0; i < propertyNames.size(); i++) { + auto& propName = propertyNames[i]; + auto str = propName.string().string(); // AtomString -> String + auto utf8 = str.utf8(); + PyObject* pyStr = PyUnicode_FromStringAndSize(utf8.data(), utf8.length()); + if (!pyStr) { + Py_DECREF(list); + return nullptr; + } + PyList_SET_ITEM(list, i, pyStr); + } + + return list; +} + +static PyObject* pyjsdict_keys(PyObject* self, PyObject* args) +{ + return getPropertyNamesAsList(reinterpret_cast(self)); +} + +static PyObject* pyjsdict_values(PyObject* self, PyObject* args) +{ + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + return PyList_New(0); + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + return PyList_New(0); + } + + PropertyNameArrayBuilder propertyNames(vm, PropertyNameMode::Strings, PrivateSymbolMode::Exclude); + JSObject::getOwnPropertyNames(jsObj, globalObject, propertyNames, DontEnumPropertiesMode::Exclude); + + if (scope.exception()) { + scope.clearException(); + return PyList_New(0); + } + + PyObject* list = PyList_New(propertyNames.size()); + if (!list) { + return nullptr; + } + + for (size_t i = 0; i < propertyNames.size(); i++) { + JSValue val = jsObj->get(globalObject, propertyNames[i]); + if (scope.exception()) { + scope.clearException(); + Py_DECREF(list); + return PyList_New(0); + } + PyObject* pyVal = Python::fromJS(globalObject, val); + if (!pyVal) { + Py_DECREF(list); + return nullptr; + } + PyList_SET_ITEM(list, i, pyVal); + } + + return list; +} + +static PyObject* pyjsdict_items(PyObject* self, PyObject* args) +{ + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + return PyList_New(0); + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + return PyList_New(0); + } + + PropertyNameArrayBuilder propertyNames(vm, PropertyNameMode::Strings, PrivateSymbolMode::Exclude); + JSObject::getOwnPropertyNames(jsObj, globalObject, propertyNames, DontEnumPropertiesMode::Exclude); + + if (scope.exception()) { + scope.clearException(); + return PyList_New(0); + } + + PyObject* list = PyList_New(propertyNames.size()); + if (!list) { + return nullptr; + } + + for (size_t i = 0; i < propertyNames.size(); i++) { + auto& propName = propertyNames[i]; + auto str = propName.string().string(); // AtomString -> String + auto utf8 = str.utf8(); + + JSValue val = jsObj->get(globalObject, propName); + if (scope.exception()) { + scope.clearException(); + Py_DECREF(list); + return PyList_New(0); + } + + PyObject* pyKey = PyUnicode_FromStringAndSize(utf8.data(), utf8.length()); + PyObject* pyVal = Python::fromJS(globalObject, val); + if (!pyKey || !pyVal) { + Py_XDECREF(pyKey); + Py_XDECREF(pyVal); + Py_DECREF(list); + return nullptr; + } + + PyObject* tuple = PyTuple_Pack(2, pyKey, pyVal); + Py_DECREF(pyKey); + Py_DECREF(pyVal); + if (!tuple) { + Py_DECREF(list); + return nullptr; + } + PyList_SET_ITEM(list, i, tuple); + } + + return list; +} + +static PyObject* pyjsdict_get(PyObject* self, PyObject* args) +{ + PyObject* key; + PyObject* defaultValue = Py_None; + + if (!PyArg_ParseTuple(args, "O|O", &key, &defaultValue)) { + return nullptr; + } + + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject || !PyUnicode_Check(key)) { + Py_INCREF(defaultValue); + return defaultValue; + } + + const char* keyStr = PyUnicode_AsUTF8(key); + if (!keyStr) { + PyErr_Clear(); + Py_INCREF(defaultValue); + return defaultValue; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + Py_INCREF(defaultValue); + return defaultValue; + } + + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(keyStr)); + + if (!jsObj->hasProperty(globalObject, ident)) { + if (scope.exception()) { + scope.clearException(); + } + Py_INCREF(defaultValue); + return defaultValue; + } + + JSValue result = jsObj->get(globalObject, ident); + + if (scope.exception()) { + scope.clearException(); + Py_INCREF(defaultValue); + return defaultValue; + } + + return Python::fromJS(globalObject, result); +} + +static PyObject* pyjsdict_pop(PyObject* self, PyObject* args) +{ + PyObject* key; + PyObject* defaultValue = nullptr; + + if (!PyArg_ParseTuple(args, "O|O", &key, &defaultValue)) { + return nullptr; + } + + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject || !PyUnicode_Check(key)) { + if (defaultValue) { + Py_INCREF(defaultValue); + return defaultValue; + } + PyErr_SetObject(PyExc_KeyError, key); + return nullptr; + } + + const char* keyStr = PyUnicode_AsUTF8(key); + if (!keyStr) { + PyErr_Clear(); + if (defaultValue) { + Py_INCREF(defaultValue); + return defaultValue; + } + PyErr_SetObject(PyExc_KeyError, key); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + if (defaultValue) { + Py_INCREF(defaultValue); + return defaultValue; + } + PyErr_SetObject(PyExc_KeyError, key); + return nullptr; + } + + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(keyStr)); + + if (!jsObj->hasProperty(globalObject, ident)) { + if (scope.exception()) { + scope.clearException(); + } + if (defaultValue) { + Py_INCREF(defaultValue); + return defaultValue; + } + PyErr_SetObject(PyExc_KeyError, key); + return nullptr; + } + + // Get the value first + JSValue result = jsObj->get(globalObject, ident); + if (scope.exception()) { + scope.clearException(); + if (defaultValue) { + Py_INCREF(defaultValue); + return defaultValue; + } + PyErr_SetObject(PyExc_KeyError, key); + return nullptr; + } + + // Delete the property + jsObj->deleteProperty(globalObject, ident); + if (scope.exception()) { + scope.clearException(); + } + + return Python::fromJS(globalObject, result); +} + +static PyObject* pyjsdict_update(PyObject* self, PyObject* other) +{ + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return nullptr; + } + + // Handle dict-like objects + if (PyDict_Check(other)) { + PyObject* key; + PyObject* value; + Py_ssize_t pos = 0; + + while (PyDict_Next(other, &pos, &key, &value)) { + if (!PyUnicode_Check(key)) { + continue; + } + const char* keyStr = PyUnicode_AsUTF8(key); + if (!keyStr) { + PyErr_Clear(); + continue; + } + + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(keyStr)); + JSValue jsVal = Python::toJS(globalObject, value); + jsObj->putDirect(vm, ident, jsVal); + + if (scope.exception()) { + scope.clearException(); + } + } + } else if (PyMapping_Check(other)) { + // Handle mapping protocol + PyObject* keys = PyMapping_Keys(other); + if (!keys) { + return nullptr; + } + + Py_ssize_t len = PyList_Size(keys); + for (Py_ssize_t i = 0; i < len; i++) { + PyObject* key = PyList_GetItem(keys, i); + if (!PyUnicode_Check(key)) { + continue; + } + const char* keyStr = PyUnicode_AsUTF8(key); + if (!keyStr) { + PyErr_Clear(); + continue; + } + + PyObject* value = PyObject_GetItem(other, key); + if (!value) { + PyErr_Clear(); + continue; + } + + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(keyStr)); + JSValue jsVal = Python::toJS(globalObject, value); + Py_DECREF(value); + jsObj->putDirect(vm, ident, jsVal); + + if (scope.exception()) { + scope.clearException(); + } + } + Py_DECREF(keys); + } else { + PyErr_SetString(PyExc_TypeError, "argument must be a mapping"); + return nullptr; + } + + Py_RETURN_NONE; +} + +static PyObject* pyjsdict_setdefault(PyObject* self, PyObject* args) +{ + PyObject* key; + PyObject* defaultValue = Py_None; + + if (!PyArg_ParseTuple(args, "O|O", &key, &defaultValue)) { + return nullptr; + } + + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject || !PyUnicode_Check(key)) { + Py_INCREF(defaultValue); + return defaultValue; + } + + const char* keyStr = PyUnicode_AsUTF8(key); + if (!keyStr) { + PyErr_Clear(); + Py_INCREF(defaultValue); + return defaultValue; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + Py_INCREF(defaultValue); + return defaultValue; + } + + Identifier ident = Identifier::fromString(vm, WTF::String::fromUTF8(keyStr)); + + if (jsObj->hasProperty(globalObject, ident)) { + if (scope.exception()) { + scope.clearException(); + } + JSValue result = jsObj->get(globalObject, ident); + if (scope.exception()) { + scope.clearException(); + Py_INCREF(defaultValue); + return defaultValue; + } + return Python::fromJS(globalObject, result); + } + + // Key doesn't exist, set default value + if (scope.exception()) { + scope.clearException(); + } + JSValue jsVal = Python::toJS(globalObject, defaultValue); + jsObj->putDirect(vm, ident, jsVal); + + if (scope.exception()) { + scope.clearException(); + } + + Py_INCREF(defaultValue); + return defaultValue; +} + +static PyObject* pyjsdict_clear(PyObject* self, PyObject* args) +{ + PyJSDictObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + Py_RETURN_NONE; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + Py_RETURN_NONE; + } + + // Get all property names and delete them + PropertyNameArrayBuilder propertyNames(vm, PropertyNameMode::Strings, PrivateSymbolMode::Exclude); + JSObject::getOwnPropertyNames(jsObj, globalObject, propertyNames, DontEnumPropertiesMode::Exclude); + + if (scope.exception()) { + scope.clearException(); + Py_RETURN_NONE; + } + + for (size_t i = 0; i < propertyNames.size(); i++) { + jsObj->deleteProperty(globalObject, propertyNames[i]); + if (scope.exception()) { + scope.clearException(); + } + } + + Py_RETURN_NONE; +} + +// Iterator for dict - iterates over keys +struct PyJSDictIterator { + PyObject_HEAD PyJSDictObject* dict; + PyObject* keys; // List of keys + Py_ssize_t index; // Current position +}; + +static void pyjsdictiter_dealloc(PyObject* self) +{ + PyJSDictIterator* iter = reinterpret_cast(self); + Py_XDECREF(iter->dict); + Py_XDECREF(iter->keys); + PyObject_Del(self); +} + +static PyObject* pyjsdictiter_next(PyObject* self) +{ + PyJSDictIterator* iter = reinterpret_cast(self); + + if (!iter->keys || iter->index >= PyList_Size(iter->keys)) { + return nullptr; // StopIteration + } + + PyObject* key = PyList_GetItem(iter->keys, iter->index); + iter->index++; + Py_INCREF(key); + return key; +} + +static PyTypeObject PyJSDictIterator_Type = { + PyVarObject_HEAD_INIT(NULL, 0) "bun.JSObjectIterator", + sizeof(PyJSDictIterator), + 0, + pyjsdictiter_dealloc, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + Py_TPFLAGS_DEFAULT, + "JavaScript object key iterator", + 0, + 0, + 0, + 0, + PyObject_SelfIter, + pyjsdictiter_next, +}; + +static PyObject* pyjsdict_iter(PyObject* self) +{ + if (PyType_Ready(&PyJSDictIterator_Type) < 0) { + return nullptr; + } + + PyJSDictIterator* iter = PyObject_New(PyJSDictIterator, &PyJSDictIterator_Type); + if (!iter) { + return nullptr; + } + + iter->dict = reinterpret_cast(self); + Py_INCREF(iter->dict); + iter->keys = getPropertyNamesAsList(iter->dict); + iter->index = 0; + + if (!iter->keys) { + Py_DECREF(iter); + return nullptr; + } + + return reinterpret_cast(iter); +} + +// ============================================================================ +// PyJSListObject implementations +// ============================================================================ + +static void pyjslist_dealloc(PyObject* self) +{ + PyJSListObject* wrapper = reinterpret_cast(self); + + if (wrapper->jsValue.isCell()) { + gcUnprotect(wrapper->jsValue.asCell()); + } + + // Use GC_Del since we allocated with GC_New + PyObject_GC_Del(self); +} + +static PyObject* pyjslist_repr(PyObject* self) +{ + PyJSListObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + return PyUnicode_FromString("[]"); + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + auto str = wrapper->jsValue.toWTFString(globalObject); + if (scope.exception()) { + scope.clearException(); + return PyUnicode_FromString("[]"); + } + + auto utf8 = str.utf8(); + return PyUnicode_FromStringAndSize(utf8.data(), utf8.length()); +} + +static Py_ssize_t pyjslist_length(PyObject* self) +{ + PyJSListObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + return 0; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + return 0; + } + + JSArray* jsArray = jsDynamicCast(jsObj); + if (!jsArray) { + return 0; + } + + unsigned length = jsArray->length(); + + if (scope.exception()) { + scope.clearException(); + return 0; + } + + return static_cast(length); +} + +static PyObject* pyjslist_item(PyObject* self, Py_ssize_t index) +{ + PyJSListObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + if (index < 0) { + // Convert negative index + Py_ssize_t len = pyjslist_length(self); + index = len + index; + if (index < 0) { + PyErr_SetString(PyExc_IndexError, "list index out of range"); + return nullptr; + } + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return nullptr; + } + + JSValue result = jsObj->get(globalObject, static_cast(index)); + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_IndexError, "list index out of range"); + return nullptr; + } + + if (result.isUndefined()) { + JSArray* jsArray = jsDynamicCast(jsObj); + if (jsArray && static_cast(index) >= jsArray->length()) { + PyErr_SetString(PyExc_IndexError, "list index out of range"); + return nullptr; + } + } + + return Python::fromJS(globalObject, result); +} + +static int pyjslist_ass_item(PyObject* self, Py_ssize_t index, PyObject* value) +{ + PyJSListObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return -1; + } + + if (index < 0) { + Py_ssize_t len = pyjslist_length(self); + index = len + index; + if (index < 0) { + PyErr_SetString(PyExc_IndexError, "list assignment index out of range"); + return -1; + } + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return -1; + } + + if (value == nullptr) { + // Delete - not directly supported, set to undefined + jsObj->putDirectIndex(globalObject, static_cast(index), jsUndefined()); + } else { + JSValue jsVal = Python::toJS(globalObject, value); + jsObj->putDirectIndex(globalObject, static_cast(index), jsVal); + } + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_IndexError, "Error setting list item"); + return -1; + } + + return 0; +} + +static PyObject* pyjslist_subscript(PyObject* self, PyObject* key) +{ + if (PyLong_Check(key)) { + Py_ssize_t index = PyLong_AsSsize_t(key); + if (index == -1 && PyErr_Occurred()) { + return nullptr; + } + return pyjslist_item(self, index); + } + + if (PySlice_Check(key)) { + // Handle slices - for now, create a new Python list + Py_ssize_t len = pyjslist_length(self); + Py_ssize_t start, stop, step, slicelength; + + if (PySlice_GetIndicesEx(key, len, &start, &stop, &step, &slicelength) < 0) { + return nullptr; + } + + PyObject* result = PyList_New(slicelength); + if (!result) { + return nullptr; + } + + for (Py_ssize_t i = 0, cur = start; i < slicelength; i++, cur += step) { + PyObject* item = pyjslist_item(self, cur); + if (!item) { + Py_DECREF(result); + return nullptr; + } + PyList_SET_ITEM(result, i, item); + } + + return result; + } + + PyErr_SetString(PyExc_TypeError, "list indices must be integers or slices"); + return nullptr; +} + +static int pyjslist_ass_subscript(PyObject* self, PyObject* key, PyObject* value) +{ + if (PyLong_Check(key)) { + Py_ssize_t index = PyLong_AsSsize_t(key); + if (index == -1 && PyErr_Occurred()) { + return -1; + } + return pyjslist_ass_item(self, index, value); + } + + PyErr_SetString(PyExc_TypeError, "list indices must be integers"); + return -1; +} + +static int pyjslist_contains(PyObject* self, PyObject* value) +{ + Py_ssize_t len = pyjslist_length(self); + + for (Py_ssize_t i = 0; i < len; i++) { + PyObject* item = pyjslist_item(self, i); + if (!item) { + PyErr_Clear(); + continue; + } + + int cmp = PyObject_RichCompareBool(item, value, Py_EQ); + Py_DECREF(item); + + if (cmp < 0) { + PyErr_Clear(); + continue; + } + if (cmp) { + return 1; + } + } + + return 0; +} + +// ============================================================================ +// PyJSListObject method implementations +// ============================================================================ + +static PyObject* pyjslist_append(PyObject* self, PyObject* value) +{ + PyJSListObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return nullptr; + } + + JSArray* jsArray = jsDynamicCast(jsObj); + if (!jsArray) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an array"); + return nullptr; + } + + unsigned length = jsArray->length(); + JSValue jsVal = Python::toJS(globalObject, value); + jsArray->putDirectIndex(globalObject, length, jsVal); + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error appending to array"); + return nullptr; + } + + Py_RETURN_NONE; +} + +static PyObject* pyjslist_pop(PyObject* self, PyObject* args) +{ + PyJSListObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + Py_ssize_t index = -1; + if (!PyArg_ParseTuple(args, "|n", &index)) { + return nullptr; + } + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return nullptr; + } + + JSArray* jsArray = jsDynamicCast(jsObj); + if (!jsArray) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an array"); + return nullptr; + } + + unsigned length = jsArray->length(); + if (length == 0) { + PyErr_SetString(PyExc_IndexError, "pop from empty list"); + return nullptr; + } + + // Handle negative index + if (index < 0) { + index = static_cast(length) + index; + } + + if (index < 0 || static_cast(index) >= length) { + PyErr_SetString(PyExc_IndexError, "pop index out of range"); + return nullptr; + } + + // Get the item to return + JSValue result = jsArray->get(globalObject, static_cast(index)); + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error getting item"); + return nullptr; + } + + // Use Array.prototype.splice to remove the item + JSValue spliceMethod = jsArray->get(globalObject, Identifier::fromString(vm, "splice"_s)); + if (scope.exception() || !spliceMethod.isObject()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Cannot access splice method"); + return nullptr; + } + + auto callData = JSC::getCallData(spliceMethod); + if (callData.type == CallData::Type::None) { + PyErr_SetString(PyExc_RuntimeError, "splice is not callable"); + return nullptr; + } + + MarkedArgumentBuffer spliceArgs; + spliceArgs.append(jsNumber(index)); + spliceArgs.append(jsNumber(1)); + + JSC::profiledCall(globalObject, ProfilingReason::API, spliceMethod, callData, jsArray, spliceArgs); + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error removing item"); + return nullptr; + } + + return Python::fromJS(globalObject, result); +} + +static PyObject* pyjslist_insert(PyObject* self, PyObject* args) +{ + PyJSListObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + Py_ssize_t index; + PyObject* value; + if (!PyArg_ParseTuple(args, "nO", &index, &value)) { + return nullptr; + } + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return nullptr; + } + + JSArray* jsArray = jsDynamicCast(jsObj); + if (!jsArray) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an array"); + return nullptr; + } + + unsigned length = jsArray->length(); + + // Handle negative index + if (index < 0) { + index = static_cast(length) + index; + if (index < 0) { + index = 0; + } + } else if (static_cast(index) > length) { + index = static_cast(length); + } + + // Use Array.prototype.splice to insert the item + JSValue spliceMethod = jsArray->get(globalObject, Identifier::fromString(vm, "splice"_s)); + if (scope.exception() || !spliceMethod.isObject()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Cannot access splice method"); + return nullptr; + } + + auto callData = JSC::getCallData(spliceMethod); + if (callData.type == CallData::Type::None) { + PyErr_SetString(PyExc_RuntimeError, "splice is not callable"); + return nullptr; + } + + JSValue jsVal = Python::toJS(globalObject, value); + + MarkedArgumentBuffer spliceArgs; + spliceArgs.append(jsNumber(index)); + spliceArgs.append(jsNumber(0)); + spliceArgs.append(jsVal); + + JSC::profiledCall(globalObject, ProfilingReason::API, spliceMethod, callData, jsArray, spliceArgs); + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error inserting item"); + return nullptr; + } + + Py_RETURN_NONE; +} + +static PyObject* pyjslist_extend(PyObject* self, PyObject* iterable) +{ + PyJSListObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return nullptr; + } + + JSArray* jsArray = jsDynamicCast(jsObj); + if (!jsArray) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an array"); + return nullptr; + } + + // Iterate over the Python iterable and append each item + PyObject* iter = PyObject_GetIter(iterable); + if (!iter) { + return nullptr; + } + + PyObject* item; + while ((item = PyIter_Next(iter))) { + unsigned length = jsArray->length(); + JSValue jsVal = Python::toJS(globalObject, item); + Py_DECREF(item); + + jsArray->putDirectIndex(globalObject, length, jsVal); + + if (scope.exception()) { + scope.clearException(); + Py_DECREF(iter); + PyErr_SetString(PyExc_RuntimeError, "Error extending array"); + return nullptr; + } + } + + Py_DECREF(iter); + + if (PyErr_Occurred()) { + return nullptr; + } + + Py_RETURN_NONE; +} + +static PyObject* pyjslist_clear(PyObject* self, PyObject* args) +{ + PyJSListObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return nullptr; + } + + JSArray* jsArray = jsDynamicCast(jsObj); + if (!jsArray) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an array"); + return nullptr; + } + + // Set length to 0 to clear the array + jsArray->setLength(globalObject, 0, true); + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error clearing array"); + return nullptr; + } + + Py_RETURN_NONE; +} + +static PyObject* pyjslist_reverse(PyObject* self, PyObject* args) +{ + PyJSListObject* wrapper = reinterpret_cast(self); + JSGlobalObject* globalObject = wrapper->globalObject; + + if (!globalObject) { + PyErr_SetString(PyExc_RuntimeError, "JavaScript global not available"); + return nullptr; + } + + VM& vm = globalObject->vm(); + auto scope = DECLARE_TOP_EXCEPTION_SCOPE(vm); + + JSObject* jsObj = wrapper->jsValue.getObject(); + if (!jsObj) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an object"); + return nullptr; + } + + JSArray* jsArray = jsDynamicCast(jsObj); + if (!jsArray) { + PyErr_SetString(PyExc_TypeError, "JavaScript value is not an array"); + return nullptr; + } + + // Use Array.prototype.reverse + JSValue reverseMethod = jsArray->get(globalObject, Identifier::fromString(vm, "reverse"_s)); + if (scope.exception() || !reverseMethod.isObject()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Cannot access reverse method"); + return nullptr; + } + + auto callData = JSC::getCallData(reverseMethod); + if (callData.type == CallData::Type::None) { + PyErr_SetString(PyExc_RuntimeError, "reverse is not callable"); + return nullptr; + } + + MarkedArgumentBuffer noArgs; + JSC::profiledCall(globalObject, ProfilingReason::API, reverseMethod, callData, jsArray, noArgs); + + if (scope.exception()) { + scope.clearException(); + PyErr_SetString(PyExc_RuntimeError, "Error reversing array"); + return nullptr; + } + + Py_RETURN_NONE; +} + +// Iterator for list +struct PyJSListIterator { + PyObject_HEAD PyJSListObject* list; + Py_ssize_t index; + Py_ssize_t length; +}; + +static void pyjslistiter_dealloc(PyObject* self) +{ + PyJSListIterator* iter = reinterpret_cast(self); + Py_XDECREF(iter->list); + PyObject_Del(self); +} + +static PyObject* pyjslistiter_next(PyObject* self) +{ + PyJSListIterator* iter = reinterpret_cast(self); + + if (iter->index >= iter->length) { + return nullptr; // StopIteration + } + + PyObject* item = pyjslist_item(reinterpret_cast(iter->list), iter->index); + iter->index++; + return item; +} + +static PyTypeObject PyJSListIterator_Type = { + PyVarObject_HEAD_INIT(NULL, 0) "bun.JSArrayIterator", + sizeof(PyJSListIterator), + 0, + pyjslistiter_dealloc, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + Py_TPFLAGS_DEFAULT, + "JavaScript array iterator", + 0, + 0, + 0, + 0, + PyObject_SelfIter, + pyjslistiter_next, +}; + +static PyObject* pyjslist_iter(PyObject* self) +{ + if (PyType_Ready(&PyJSListIterator_Type) < 0) { + return nullptr; + } + + PyJSListIterator* iter = PyObject_New(PyJSListIterator, &PyJSListIterator_Type); + if (!iter) { + return nullptr; + } + + iter->list = reinterpret_cast(self); + Py_INCREF(iter->list); + iter->index = 0; + iter->length = pyjslist_length(self); + + return reinterpret_cast(iter); +} + +} // namespace Bun diff --git a/src/bun.js/bindings/PyJSValueObject.h b/src/bun.js/bindings/PyJSValueObject.h new file mode 100644 index 0000000000..73c1de16a5 --- /dev/null +++ b/src/bun.js/bindings/PyJSValueObject.h @@ -0,0 +1,53 @@ +#pragma once + +#include "root.h" +#include "Python.h" + +namespace Bun { + +using namespace JSC; + +// Base wrapper for JS values in Python - used for functions and other non-container types +struct PyJSValueObject { + PyObject_HEAD + JSValue jsValue; + JSGlobalObject* globalObject; + + static PyJSValueObject* New(); + static PyJSValueObject* NewDict(JSGlobalObject* globalObject, JSValue value); + static PyJSValueObject* NewList(JSGlobalObject* globalObject, JSValue value); + static void initType(); +}; + +// Dict subclass wrapper - makes isinstance(obj, dict) return True +// Uses same memory layout as PyJSValueObject but with dict as base type +struct PyJSDictObject { + PyDictObject dict; // Must be first - inherits from dict + JSValue jsValue; + JSGlobalObject* globalObject; +}; + +// List subclass wrapper - makes isinstance(obj, list) return True +struct PyJSListObject { + PyListObject list; // Must be first - inherits from list + JSValue jsValue; + JSGlobalObject* globalObject; +}; + +// Bound method wrapper - preserves 'this' context when accessing methods on JS objects +// When you do `obj.method()` in Python, we need to call method with `this` = obj +struct PyJSBoundMethod { + PyObject_HEAD + JSValue function; // The JS function + JSValue thisObject; // The object the function was accessed from + JSGlobalObject* globalObject; + + static PyJSBoundMethod* New(JSGlobalObject* globalObject, JSValue function, JSValue thisObject); + static void initType(); +}; + +// Try to unwrap a PyObject that wraps a JSValue back to the underlying JSValue +// Returns empty JSValue if the object is not a PyJSValueObject, PyJSDictObject, or PyJSListObject +JSValue tryUnwrapJSValue(PyObject* obj); + +} // namespace Bun diff --git a/src/bun.js/bindings/ZigGlobalObject.cpp b/src/bun.js/bindings/ZigGlobalObject.cpp index 996b007e54..20149a1d0c 100644 --- a/src/bun.js/bindings/ZigGlobalObject.cpp +++ b/src/bun.js/bindings/ZigGlobalObject.cpp @@ -302,7 +302,7 @@ extern "C" void JSCInitialize(const char* envp[], size_t envc, void (*onCrash)(c JSC::Options::useJITCage() = false; JSC::Options::useShadowRealm() = true; JSC::Options::useV8DateParser() = true; - JSC::Options::useMathSumPreciseMethod() = true; + // JSC::Options::useMathSumPreciseMethod() = true; JSC::Options::evalMode() = evalMode; JSC::Options::heapGrowthSteepnessFactor() = 1.0; JSC::Options::heapGrowthMaxIncrease() = 2.0; diff --git a/src/bun.js/bindings/ZigGlobalObject.h b/src/bun.js/bindings/ZigGlobalObject.h index fc2ad0baec..b987bc2f78 100644 --- a/src/bun.js/bindings/ZigGlobalObject.h +++ b/src/bun.js/bindings/ZigGlobalObject.h @@ -638,7 +638,11 @@ public: V(public, LazyPropertyOfGlobalObject, m_nodeVMDontContextify) \ V(public, LazyPropertyOfGlobalObject, m_nodeVMUseMainContextDefaultLoader) \ V(public, LazyPropertyOfGlobalObject, m_ipcSerializeFunction) \ - V(public, LazyPropertyOfGlobalObject, m_ipcParseHandleFunction) + V(public, LazyPropertyOfGlobalObject, m_ipcParseHandleFunction) \ + \ + /* Python integration */ \ + V(public, WriteBarrier, m_JSPyObjectStructure) \ + V(public, WriteBarrier, m_JSPyArrayStructure) #define DECLARE_GLOBALOBJECT_GC_MEMBER(visibility, T, name) \ visibility: \ diff --git a/src/bun.js/bindings/generated_perf_trace_events.h b/src/bun.js/bindings/generated_perf_trace_events.h index bea83c7545..696499ba7c 100644 --- a/src/bun.js/bindings/generated_perf_trace_events.h +++ b/src/bun.js/bindings/generated_perf_trace_events.h @@ -50,14 +50,15 @@ macro(JSPrinter.printWithSourceMap, 46) \ macro(ModuleResolver.resolve, 47) \ macro(PackageInstaller.install, 48) \ - macro(PackageManifest.Serializer.loadByFile, 49) \ - macro(PackageManifest.Serializer.save, 50) \ - macro(RuntimeTranspilerCache.fromFile, 51) \ - macro(RuntimeTranspilerCache.save, 52) \ - macro(RuntimeTranspilerCache.toFile, 53) \ - macro(StandaloneModuleGraph.serialize, 54) \ - macro(Symbols.followAll, 55) \ - macro(TestCommand.printCodeCoverageLCov, 56) \ - macro(TestCommand.printCodeCoverageLCovAndText, 57) \ - macro(TestCommand.printCodeCoverageText, 58) \ + macro(PackageInstaller.installPythonPackage, 49) \ + macro(PackageManifest.Serializer.loadByFile, 50) \ + macro(PackageManifest.Serializer.save, 51) \ + macro(RuntimeTranspilerCache.fromFile, 52) \ + macro(RuntimeTranspilerCache.save, 53) \ + macro(RuntimeTranspilerCache.toFile, 54) \ + macro(StandaloneModuleGraph.serialize, 55) \ + macro(Symbols.followAll, 56) \ + macro(TestCommand.printCodeCoverageLCov, 57) \ + macro(TestCommand.printCodeCoverageLCovAndText, 58) \ + macro(TestCommand.printCodeCoverageText, 59) \ // end diff --git a/src/bun.js/bindings/webcore/DOMClientIsoSubspaces.h b/src/bun.js/bindings/webcore/DOMClientIsoSubspaces.h index 316f0848a7..c061fb320a 100644 --- a/src/bun.js/bindings/webcore/DOMClientIsoSubspaces.h +++ b/src/bun.js/bindings/webcore/DOMClientIsoSubspaces.h @@ -954,5 +954,6 @@ public: std::unique_ptr m_clientSubspaceForJSConnectionsList; std::unique_ptr m_clientSubspaceForJSHTTPParser; + std::unique_ptr m_clientSubspaceForPyObject; }; } // namespace WebCore diff --git a/src/bun.js/bindings/webcore/DOMIsoSubspaces.h b/src/bun.js/bindings/webcore/DOMIsoSubspaces.h index b44973cb53..cfd643973f 100644 --- a/src/bun.js/bindings/webcore/DOMIsoSubspaces.h +++ b/src/bun.js/bindings/webcore/DOMIsoSubspaces.h @@ -957,6 +957,7 @@ public: std::unique_ptr m_subspaceForJSConnectionsList; std::unique_ptr m_subspaceForJSHTTPParser; + std::unique_ptr m_subspaceForPyObject; }; } // namespace WebCore diff --git a/src/bundler/LinkerContext.zig b/src/bundler/LinkerContext.zig index edfd1e62d5..e07a6ae28f 100644 --- a/src/bundler/LinkerContext.zig +++ b/src/bundler/LinkerContext.zig @@ -510,7 +510,7 @@ pub const LinkerContext = struct { const loader = loaders[record.source_index.get()]; switch (loader) { - .jsx, .js, .ts, .tsx, .napi, .sqlite, .json, .jsonc, .json5, .yaml, .html, .sqlite_embedded, .md => { + .jsx, .js, .ts, .tsx, .napi, .sqlite, .json, .jsonc, .json5, .yaml, .html, .sqlite_embedded, .md, .py => { log.addErrorFmt( source, record.range.loc, diff --git a/src/bundler/ParseTask.zig b/src/bundler/ParseTask.zig index bf87206c27..2c421f0201 100644 --- a/src/bundler/ParseTask.zig +++ b/src/bundler/ParseTask.zig @@ -606,7 +606,7 @@ fn getAST( return ast; }, // TODO: - .dataurl, .base64, .bunsh => { + .dataurl, .base64, .bunsh, .py => { return try getEmptyAST(log, transpiler, opts, allocator, source, E.String); }, .file, .wasm => { diff --git a/src/codegen/bundle-modules.ts b/src/codegen/bundle-modules.ts index c282da4560..469d3996ca 100644 --- a/src/codegen/bundle-modules.ts +++ b/src/codegen/bundle-modules.ts @@ -424,6 +424,10 @@ pub const ResolvedSourceTag = enum(u32) { export_default_object = 9, /// Signal upwards that the matching value in 'require.extensions' should be used. common_js_custom_extension = 10, + /// Python module - execute via embedded Python interpreter + python = 11, + /// Python builtin module - import a module from Python's standard library + python_builtin = 12, // Built in modules are loaded through InternalModuleRegistry by numerical ID. // In this enum are represented as \`(1 << 9) & id\` @@ -454,6 +458,8 @@ writeIfNotChanged( ExportsObject = 8, ExportDefaultObject = 9, CommonJSCustomExtension = 10, + Python = 11, + PythonBuiltin = 12, // Built in modules are loaded through InternalModuleRegistry by numerical ID. // In this enum are represented as \`(1 << 9) & id\` InternalModuleRegistryFlag = 1 << 9, diff --git a/src/generated_perf_trace_events.zig b/src/generated_perf_trace_events.zig index 3fd6b53a57..36604be73b 100644 --- a/src/generated_perf_trace_events.zig +++ b/src/generated_perf_trace_events.zig @@ -49,6 +49,7 @@ pub const PerfEvent = enum(i32) { @"JSPrinter.printWithSourceMap", @"ModuleResolver.resolve", @"PackageInstaller.install", + @"PackageInstaller.installPythonPackage", @"PackageManifest.Serializer.loadByFile", @"PackageManifest.Serializer.save", @"RuntimeTranspilerCache.fromFile", diff --git a/src/install/NetworkTask.zig b/src/install/NetworkTask.zig index e7c27e838c..36aea1b5c0 100644 --- a/src/install/NetworkTask.zig +++ b/src/install/NetworkTask.zig @@ -17,6 +17,9 @@ callback: union(Task.Tag) { git_clone: void, git_checkout: void, local_tarball: void, + pypi_manifest: struct { + name: strings.StringOrTinyString, + }, }, /// Key in patchedDependencies in package.json apply_patch_task: ?*PatchTask = null, @@ -244,6 +247,71 @@ pub fn getCompletionCallback(this: *NetworkTask) HTTP.HTTPClientResult.Callback return HTTP.HTTPClientResult.Callback.New(*NetworkTask, notify).init(this); } +/// Configure the network task to fetch a PyPI manifest +pub fn forPyPIManifest( + this: *NetworkTask, + name: string, + version: ?string, + allocator: std.mem.Allocator, +) ForManifestError!void { + // PyPI JSON API URL: https://pypi.org/pypi/{package}/json + // or with version: https://pypi.org/pypi/{package}/{version}/json + const pypi_base = "https://pypi.org/pypi/"; + + // Build URL: base + name + [/version] + /json + const version_len = if (version) |v| v.len + 1 else 0; // +1 for leading slash + const url_len = pypi_base.len + name.len + version_len + "/json".len; + const url_buf = try allocator.alloc(u8, url_len); + var pos: usize = 0; + + @memcpy(url_buf[pos..][0..pypi_base.len], pypi_base); + pos += pypi_base.len; + + @memcpy(url_buf[pos..][0..name.len], name); + pos += name.len; + + if (version) |v| { + url_buf[pos] = '/'; + pos += 1; + @memcpy(url_buf[pos..][0..v.len], v); + pos += v.len; + } + + @memcpy(url_buf[pos..][0.."/json".len], "/json"); + + this.url_buf = url_buf; + + // Simple headers - just Accept: application/json + var header_builder = HeaderBuilder{}; + header_builder.count("Accept", "application/json"); + try header_builder.allocate(allocator); + header_builder.append("Accept", "application/json"); + + this.response_buffer = try MutableString.init(allocator, 0); + this.allocator = allocator; + + const url = URL.parse(this.url_buf); + this.unsafe_http_client = AsyncHTTP.init(allocator, .GET, url, header_builder.entries, header_builder.content.ptr.?[0..header_builder.content.len], &this.response_buffer, "", this.getCompletionCallback(), HTTP.FetchRedirect.follow, .{ + .http_proxy = this.package_manager.httpProxy(url), + }); + this.unsafe_http_client.client.flags.reject_unauthorized = this.package_manager.tlsRejectUnauthorized(); + + if (PackageManager.verbose_install) { + this.unsafe_http_client.client.verbose = .headers; + } + + this.callback = .{ + .pypi_manifest = .{ + .name = try strings.StringOrTinyString.initAppendIfNeeded(name, *FileSystem.FilenameStore, FileSystem.FilenameStore.instance), + }, + }; + + if (PackageManager.verbose_install) { + this.unsafe_http_client.verbose = .headers; + this.unsafe_http_client.client.verbose = .headers; + } +} + pub fn schedule(this: *NetworkTask, batch: *ThreadPool.Batch) void { this.unsafe_http_client.schedule(this.allocator, batch); } diff --git a/src/install/PackageInstall.zig b/src/install/PackageInstall.zig index 686ca0cee3..0fdad180a9 100644 --- a/src/install/PackageInstall.zig +++ b/src/install/PackageInstall.zig @@ -1463,6 +1463,157 @@ pub const PackageInstall = struct { // TODO: linux io_uring return this.installWithCopyfile(destination_dir); } + + /// Install a Python package from a wheel cache to site-packages. + /// Unlike npm packages, wheel contents (package dirs + dist-info) are copied directly + /// to site-packages, not wrapped in a subdirectory. + pub fn installPythonPackage(this: *@This(), site_packages_dir: std.fs.Dir, method_: Method) Result { + const tracer = bun.perf.trace("PackageInstaller.installPythonPackage"); + defer tracer.end(); + + // Open the cache directory containing the extracted wheel + var cached_wheel_dir = bun.openDir(this.cache_dir, this.cache_dir_subpath) catch |err| { + return Result.fail(err, .opening_cache_dir, @errorReturnTrace()); + }; + defer cached_wheel_dir.close(); + + // Save original values + const original_cache_dir = this.cache_dir; + const original_cache_subpath = this.cache_dir_subpath; + const original_dest_subpath = this.destination_dir_subpath; + defer { + this.cache_dir = original_cache_dir; + this.cache_dir_subpath = original_cache_subpath; + this.destination_dir_subpath = original_dest_subpath; + } + + // Set cache_dir to the wheel directory + this.cache_dir = cached_wheel_dir; + + // Iterate through all entries in the wheel cache and copy each entry + var iter = cached_wheel_dir.iterate(); + while (iter.next() catch |err| { + return Result.fail(err, .opening_cache_dir, @errorReturnTrace()); + }) |entry| { + if (entry.kind == .directory) { + // Build null-terminated subdir name for both source and dest + if (entry.name.len >= this.destination_dir_subpath_buf.len) continue; + @memcpy(this.destination_dir_subpath_buf[0..entry.name.len], entry.name); + this.destination_dir_subpath_buf[entry.name.len] = 0; + const subdir_name_z: [:0]u8 = this.destination_dir_subpath_buf[0..entry.name.len :0]; + + // Set paths to point to this subdirectory + this.cache_dir_subpath = subdir_name_z; + this.destination_dir_subpath = subdir_name_z; + + // Use the existing install method which handles clonefile/hardlink/copy fallback + const result = this.install(false, site_packages_dir, method_, .pypi); + if (result != .success) { + return result; + } + } else if (entry.kind == .file) { + // Copy individual files at the wheel root (e.g., typing_extensions.py, six.py) + // Build null-terminated filename + if (entry.name.len >= this.destination_dir_subpath_buf.len) continue; + @memcpy(this.destination_dir_subpath_buf[0..entry.name.len], entry.name); + this.destination_dir_subpath_buf[entry.name.len] = 0; + const filename_z: [:0]const u8 = this.destination_dir_subpath_buf[0..entry.name.len :0]; + + // Copy the file from cache to site-packages + if (comptime Environment.isMac) { + // Try clonefile first, then fall back to fcopyfile + switch (bun.c.clonefileat( + cached_wheel_dir.fd, + filename_z, + site_packages_dir.fd, + filename_z, + 0, + )) { + 0 => continue, + else => |errno| switch (std.posix.errno(errno)) { + .EXIST => continue, + else => { + // Fall back to fcopyfile + var in_file = bun.sys.openat(.fromStdDir(cached_wheel_dir), filename_z, bun.O.RDONLY, 0).unwrap() catch |open_err| { + return Result.fail(open_err, .copyfile, @errorReturnTrace()); + }; + defer in_file.close(); + + var out_file = site_packages_dir.createFile(entry.name, .{}) catch |create_err| { + return Result.fail(create_err, .copyfile, @errorReturnTrace()); + }; + defer out_file.close(); + + switch (bun.sys.fcopyfile(in_file, .fromStdFile(out_file), std.posix.system.COPYFILE{ .DATA = true })) { + .result => continue, + .err => |copy_err| switch (copy_err.getErrno()) { + .EXIST => continue, + else => return Result.fail(copy_err.toZigErr(), .copyfile, @errorReturnTrace()), + }, + } + }, + }, + } + } else if (comptime Environment.isLinux) { + // Try hardlink first, then fall back to copy + switch (bun.sys.linkat( + .fromStdDir(cached_wheel_dir), + filename_z, + .fromStdDir(site_packages_dir), + filename_z, + )) { + .result => continue, + .err => |err| switch (err.getErrno()) { + .EXIST => continue, + else => { + // Fall back to copy + var in_file = bun.sys.openat(.fromStdDir(cached_wheel_dir), filename_z, bun.O.RDONLY, 0).unwrap() catch |open_err| { + return Result.fail(open_err, .copyfile, @errorReturnTrace()); + }; + defer in_file.close(); + + var out_file = site_packages_dir.createFile(entry.name, .{}) catch |create_err| { + return Result.fail(create_err, .copyfile, @errorReturnTrace()); + }; + defer out_file.close(); + + var copy_state: bun.CopyFileState = .{}; + bun.copyFileWithState(in_file, .fromStdFile(out_file), ©_state).unwrap() catch |copy_err| { + return Result.fail(copy_err, .copyfile, @errorReturnTrace()); + }; + }, + }, + } + } else if (comptime Environment.isWindows) { + // Use Windows CopyFileW + var src_buf: bun.WPathBuffer = undefined; + var dst_buf: bun.WPathBuffer = undefined; + + const src_path = bun.strings.toWPathNormalized(&src_buf, filename_z); + const dst_path = bun.strings.toWPathNormalized(&dst_buf, filename_z); + + src_buf[src_path.len] = 0; + dst_buf[dst_path.len] = 0; + + if (bun.windows.CopyFileExW( + src_buf[0..src_path.len :0].ptr, + dst_buf[0..dst_path.len :0].ptr, + null, + null, + null, + 0, + ) == 0) { + const win_err = bun.windows.Win32Error.get(); + if (win_err != .ERROR_FILE_EXISTS) { + return Result.fail(win_err.toSystemErrno().?.toZigErr(), .copyfile, @errorReturnTrace()); + } + } + } + } + } + + return .success; + } }; const string = []const u8; diff --git a/src/install/PackageInstaller.zig b/src/install/PackageInstaller.zig index 7fec486868..b796cab4e7 100644 --- a/src/install/PackageInstaller.zig +++ b/src/install/PackageInstaller.zig @@ -10,6 +10,8 @@ pub const PackageInstaller = struct { skip_delete: bool, force_install: bool, root_node_modules_folder: std.fs.Dir, + /// .venv/lib/python{version}/site-packages/ directory for Python packages + site_packages_folder: ?std.fs.Dir, summary: *PackageInstall.Summary, options: *const PackageManager.Options, metas: []const Lockfile.Package.Meta, @@ -942,6 +944,10 @@ pub const PackageInstaller = struct { installer.cache_dir = directory; } }, + .pypi => { + installer.cache_dir_subpath = this.manager.cachedTarballFolderName(resolution.value.pypi.url, patch_contents_hash); + installer.cache_dir = this.manager.getCacheDirectory(); + }, else => { if (comptime Environment.allow_assert) { @panic("Internal assertion failure: unexpected resolution tag"); @@ -1110,6 +1116,13 @@ pub const PackageInstaller = struct { const install_result: PackageInstall.Result = switch (resolution.tag) { .symlink, .workspace => installer.installFromLink(this.skip_delete, destination_dir), + .pypi => result: { + // Python packages are installed to .venv/lib/python{version}/site-packages/ + const site_packages = this.site_packages_folder orelse { + break :result .fail(error.FileNotFound, .opening_cache_dir, null); + }; + break :result installer.installPythonPackage(site_packages, installer.getInstallMethod()); + }, else => result: { if (resolution.tag == .root or (resolution.tag == .folder and !this.lockfile.isWorkspaceTreeId(this.current_tree_id))) { // This is a transitive folder dependency. It is installed with a single symlink to the target folder/file, @@ -1530,6 +1543,7 @@ const PackageInstall = install.PackageInstall; const PackageNameHash = install.PackageNameHash; const PatchTask = install.PatchTask; const PostinstallOptimizer = install.PostinstallOptimizer; +const pypi = install.PyPI; const Resolution = install.Resolution; const Task = install.Task; const TaskCallbackContext = install.TaskCallbackContext; diff --git a/src/install/PackageManager.zig b/src/install/PackageManager.zig index 935c3730d7..38ae4b79de 100644 --- a/src/install/PackageManager.zig +++ b/src/install/PackageManager.zig @@ -50,6 +50,7 @@ task_batch: ThreadPool.Batch = .{}, task_queue: TaskDependencyQueue = .{}, manifests: PackageManifestMap = .{}, +pypi_manifests: PyPIManifestMap = .{}, folders: FolderResolution.Map = .{}, git_repositories: RepositoryMap = .{}, @@ -1196,6 +1197,7 @@ pub const enqueueGitForCheckout = enqueue.enqueueGitForCheckout; pub const enqueueNetworkTask = enqueue.enqueueNetworkTask; pub const enqueuePackageForDownload = enqueue.enqueuePackageForDownload; pub const enqueueParseNPMPackage = enqueue.enqueueParseNPMPackage; +pub const enqueueParsePyPIPackage = enqueue.enqueueParsePyPIPackage; pub const enqueuePatchTask = enqueue.enqueuePatchTask; pub const enqueuePatchTaskPre = enqueue.enqueuePatchTaskPre; pub const enqueueTarballForDownload = enqueue.enqueueTarballForDownload; @@ -1313,6 +1315,8 @@ const PackageID = bun.install.PackageID; const PackageManager = bun.install.PackageManager; const PackageManifestMap = bun.install.PackageManifestMap; const PackageNameAndVersionHash = bun.install.PackageNameAndVersionHash; +const PyPI = bun.install.PyPI; +const PyPIManifestMap = std.HashMapUnmanaged(PackageNameHash, PyPI.PackageManifest, IdentityContext(PackageNameHash), 80); const PackageNameHash = bun.install.PackageNameHash; const PatchTask = bun.install.PatchTask; const PostinstallOptimizer = bun.install.PostinstallOptimizer; diff --git a/src/install/PackageManager/PackageJSONEditor.zig b/src/install/PackageManager/PackageJSONEditor.zig index cf03e110d1..6cd1c94ed1 100644 --- a/src/install/PackageManager/PackageJSONEditor.zig +++ b/src/install/PackageManager/PackageJSONEditor.zig @@ -407,7 +407,7 @@ pub fn edit( var i: usize = 0; loop: while (i < updates.len) { var request = &updates.*[i]; - inline for ([_]string{ "dependencies", "devDependencies", "optionalDependencies", "peerDependencies" }) |list| { + inline for ([_]string{ "dependencies", "devDependencies", "optionalDependencies", "peerDependencies", "pythonDependencies" }) |list| { if (current_package_json.asProperty(list)) |query| { if (query.expr.data == .e_object) { const name = request.getName(); @@ -421,7 +421,7 @@ pub fn edit( const version_literal = try value.expr.asStringCloned(allocator) orelse break :add_packages_to_update; var tag = Dependency.Version.Tag.infer(version_literal); - if (tag != .npm and tag != .dist_tag) break :add_packages_to_update; + if (tag != .npm and tag != .dist_tag and tag != .pypi) break :add_packages_to_update; const entry = bun.handleOom(manager.updating_packages.getOrPut(allocator, name)); diff --git a/src/install/PackageManager/PackageManagerEnqueue.zig b/src/install/PackageManager/PackageManagerEnqueue.zig index 9df5508e74..8a43148e91 100644 --- a/src/install/PackageManager/PackageManagerEnqueue.zig +++ b/src/install/PackageManager/PackageManagerEnqueue.zig @@ -230,6 +230,29 @@ pub fn enqueueParseNPMPackage( return &task.threadpool_task; } +pub fn enqueueParsePyPIPackage( + this: *PackageManager, + task_id: Task.Id, + name: strings.StringOrTinyString, + network_task: *NetworkTask, +) *ThreadPool.Task { + var task = this.preallocated_resolve_tasks.get(); + task.* = Task{ + .package_manager = this, + .log = logger.Log.init(this.allocator), + .tag = Task.Tag.pypi_manifest, + .request = .{ + .pypi_manifest = .{ + .network = network_task, + .name = name, + }, + }, + .id = task_id, + .data = undefined, + }; + return &task.threadpool_task; +} + pub fn enqueuePackageForDownload( this: *PackageManager, name: []const u8, @@ -447,7 +470,7 @@ pub fn enqueueDependencyWithMainAndSuccessFn( var name = dependency.realname(); var name_hash = switch (dependency.version.tag) { - .dist_tag, .git, .github, .npm, .tarball, .workspace => String.Builder.stringHash(this.lockfile.str(&name)), + .dist_tag, .git, .github, .npm, .tarball, .workspace, .pypi => String.Builder.stringHash(this.lockfile.str(&name)), else => dependency.name_hash, }; @@ -1154,6 +1177,103 @@ pub fn enqueueDependencyWithMainAndSuccessFn( }, } }, + .pypi => { + // PyPI package - fetch manifest from PyPI JSON API + const name_str = this.lockfile.str(&name); + const task_id = Task.Id.forPyPIManifest(name_str); + + if (comptime Environment.allow_assert) bun.assert(task_id.get() != 0); + + if (comptime Environment.allow_assert) + debug( + "enqueueDependency({d}, {s}, {s}, {s}) = pypi task {d}", + .{ + id, + @tagName(version.tag), + this.lockfile.str(&name), + this.lockfile.str(&version.literal), + task_id, + }, + ); + + // Check if we already have the manifest - if so, resolve immediately + if (this.pypi_manifests.contains(name_hash)) { + // Manifest already downloaded, resolve the package now + const resolve_result = try getOrPutResolvedPackage( + this, + name_hash, + name, + dependency, + version, + dependency.behavior, + id, + resolution, + install_peer, + successFn, + ); + + if (resolve_result) |result| { + // Queue transitive dependencies (just like npm does) + if (result.is_first_time and result.package.dependencies.len > 0) { + try this.lockfile.scratch.dependency_list_queue.writeItem(result.package.dependencies); + } + if (result.task) |task| { + switch (task) { + .network_task => |network_task| this.enqueueNetworkTask(network_task), + .patch_task => |patch_task| this.enqueuePatchTask(patch_task), + } + } + } + return; + } + + if (!this.hasCreatedNetworkTask(task_id, dependency.behavior.isRequired())) { + if (PackageManager.verbose_install) { + Output.prettyErrorln("Enqueue PyPI package manifest for download: {s}", .{name_str}); + } + + var network_task = this.getNetworkTask(); + network_task.* = .{ + .package_manager = this, + .callback = undefined, + .task_id = task_id, + .allocator = this.allocator, + }; + + // Get version string from literal if specified (e.g., "2.3.5" from "pypi:numpy@2.3.5") + const version_str: ?[]const u8 = if (version.literal.len() > 0) + this.lockfile.str(&version.literal) + else + null; + + network_task.forPyPIManifest( + name_str, + version_str, + this.allocator, + ) catch |err| { + if (dependency.behavior.isRequired()) { + this.log.addErrorFmt( + null, + logger.Loc.Empty, + this.allocator, + "Failed to create PyPI manifest request for {s}: {s}", + .{ name_str, @errorName(err) }, + ) catch unreachable; + } + return; + }; + + this.enqueueNetworkTask(network_task); + } + + var manifest_entry_parse = this.task_queue.getOrPutContext(this.allocator, task_id, .{}) catch return; + if (!manifest_entry_parse.found_existing) { + manifest_entry_parse.value_ptr.* = TaskCallbackList{}; + } + + const callback_tag = comptime if (successFn == assignRootResolution) "root_dependency" else "dependency"; + manifest_entry_parse.value_ptr.append(this.allocator, @unionInit(TaskCallbackContext, callback_tag, id)) catch return; + }, else => {}, } } @@ -1841,6 +1961,210 @@ fn getOrPutResolvedPackage( } }, + .pypi => { + // Look up the PyPI manifest + const name_str = this.lockfile.str(&name); + debug("getOrPutResolvedPackage .pypi: {s}", .{name_str}); + const manifest = this.pypi_manifests.getPtr(name_hash) orelse { + debug(" no manifest found for pypi package {s}", .{name_str}); + return null; + }; + + // Find the best wheel for the current platform + const best_wheel = manifest.findBestWheel(PyPI.PlatformTarget.current()) orelse { + if (behavior.isPeer()) { + return null; + } + // No compatible wheel found + this.log.addErrorFmt( + null, + logger.Loc.Empty, + this.allocator, + "No compatible wheel found for PyPI package {s}", + .{name_str}, + ) catch unreachable; + return null; + }; + + const wheel_url = best_wheel.url.slice(manifest.string_buf); + const version_str = manifest.latestVersion(); + debug(" found wheel for {s}: {s} (version {s})", .{ name_str, wheel_url, version_str }); + + // Parse the version string into a Semver.Version + // First normalize Python version (PEP 440) to strip .postN, .devN suffixes + var normalized_version_buf: [64]u8 = undefined; + const normalized_version = PyPI.DependencySpecifier.normalizeVersion(version_str, &normalized_version_buf); + const sliced_version = Semver.SlicedString.init(normalized_version, normalized_version); + const parsed_version = Semver.Version.parse(sliced_version); + if (!parsed_version.valid) { + this.log.addErrorFmt( + null, + logger.Loc.Empty, + this.allocator, + "Invalid version {s} for PyPI package {s}", + .{ version_str, name_str }, + ) catch unreachable; + return null; + } + const resolved_version = parsed_version.version.min(); + + // Check if there's already a package with this name and version for PyPI + if (this.lockfile.package_index.get(name_hash)) |index| { + switch (index) { + .id => |existing_id| { + const existing_resolution = this.lockfile.packages.items(.resolution)[existing_id]; + if (existing_resolution.tag == .pypi and existing_resolution.value.pypi.version.eql(resolved_version)) { + successFn(this, dependency_id, existing_id); + return .{ + .package = this.lockfile.packages.get(existing_id), + .is_first_time = false, + }; + } + }, + .ids => |list| { + for (list.items) |existing_id| { + const existing_resolution = this.lockfile.packages.items(.resolution)[existing_id]; + if (existing_resolution.tag == .pypi and existing_resolution.value.pypi.version.eql(resolved_version)) { + successFn(this, dependency_id, existing_id); + return .{ + .package = this.lockfile.packages.get(existing_id), + .is_first_time = false, + }; + } + } + }, + } + } + + if (behavior.isPeer() and !install_peer) { + return null; + } + + // Create a new package entry + var package = Lockfile.Package{}; + + // Build strings for the package + // Use manifest.name() which points to manifest's own buffer, not lockfile's buffer. + // This is important because string_builder.allocate() may resize lockfile's buffer. + const manifest_name = manifest.name(); + var string_builder = this.lockfile.stringBuilder(); + string_builder.count(manifest_name); + resolved_version.count(manifest.string_buf, *Lockfile.StringBuilder, &string_builder); + string_builder.count(wheel_url); + + // Count transitive dependencies from requires_dist + var dep_iter = manifest.iterDependencies(PyPI.PlatformTarget.current()); + const total_dependencies_count: u32 = @intCast(dep_iter.count()); + + if (PackageManager.verbose_install) { + Output.prettyErrorln("PyPI package {s} has {d} transitive dependencies", .{ manifest_name, total_dependencies_count }); + } + + // Count strings for each dependency name (normalized) + dep_iter = manifest.iterDependencies(PyPI.PlatformTarget.current()); + while (dep_iter.next()) |spec| { + var dep_name_buf: [256]u8 = undefined; + const normalized = PyPI.DependencySpecifier.normalizeName(spec.name, &dep_name_buf); + string_builder.count(normalized); + } + + try string_builder.allocate(); + defer string_builder.clamp(); + + const name_string = string_builder.append(ExternalString, manifest_name); + package.name = name_string.value; + package.name_hash = name_string.hash; + + // Store version and URL in resolution + package.resolution = Resolution.init(.{ + .pypi = .{ + .version = resolved_version.append(manifest.string_buf, *Lockfile.StringBuilder, &string_builder), + .url = string_builder.append(String, wheel_url), + }, + }); + + // PyPI packages don't have install scripts by default + package.scripts.filled = true; + package.meta.setHasInstallScript(false); + + // Parse and store transitive dependencies from requires_dist + if (total_dependencies_count > 0) { + var dependencies_list = &this.lockfile.buffers.dependencies; + var resolutions_list = &this.lockfile.buffers.resolutions; + + try dependencies_list.ensureUnusedCapacity(this.allocator, total_dependencies_count); + try resolutions_list.ensureUnusedCapacity(this.allocator, total_dependencies_count); + + package.dependencies.off = @intCast(dependencies_list.items.len); + package.dependencies.len = total_dependencies_count; + package.resolutions.off = package.dependencies.off; + package.resolutions.len = package.dependencies.len; + + dep_iter = manifest.iterDependencies(PyPI.PlatformTarget.current()); + while (dep_iter.next()) |spec| { + var dep_name_buf: [256]u8 = undefined; + const normalized = PyPI.DependencySpecifier.normalizeName(spec.name, &dep_name_buf); + const dep_name = string_builder.append(ExternalString, normalized); + + if (PackageManager.verbose_install) { + Output.prettyErrorln(" Adding PyPI dependency: {s}", .{normalized}); + } + + dependencies_list.appendAssumeCapacity(.{ + .name = dep_name.value, + .name_hash = dep_name.hash, + .behavior = .{ .python = true, .prod = true }, + .version = .{ .tag = .pypi, .value = .{ .pypi = .{ .name = dep_name.value } } }, + }); + resolutions_list.appendAssumeCapacity(invalid_package_id); + } + } + + // Append the package to the lockfile + package = try this.lockfile.appendPackage(package); + + if (comptime Environment.allow_assert) bun.assert(package.meta.id != invalid_package_id); + debug(" created pypi package {s} with id {d}", .{ manifest_name, package.meta.id }); + defer successFn(this, dependency_id, package.meta.id); + + // Check if wheel is already cached + var name_and_version_hash: ?u64 = null; + var patchfile_hash: ?u64 = null; + return switch (this.determinePreinstallState( + package, + this.lockfile, + &name_and_version_hash, + &patchfile_hash, + )) { + .done => .{ .package = package, .is_first_time = true }, + .extract => extract: { + // Skip wheel download when prefetch_resolved_tarballs is disabled + if (!this.options.do.prefetch_resolved_tarballs) { + break :extract .{ .package = package, .is_first_time = true }; + } + + const task_id = Task.Id.forTarball(wheel_url); + + break :extract .{ + .package = package, + .is_first_time = true, + .task = .{ + .network_task = try this.generateNetworkTaskForTarball( + task_id, + wheel_url, + dependency.behavior.isRequired(), + dependency_id, + package, + null, + .no_authorization, + ) orelse unreachable, + }, + }; + }, + else => .{ .package = package, .is_first_time = true }, + }; + }, + else => return null, } } @@ -1876,6 +2200,7 @@ const strings = bun.strings; const Semver = bun.Semver; const String = Semver.String; +const ExternalString = Semver.ExternalString; const Fs = bun.fs; const FileSystem = Fs.FileSystem; @@ -1887,6 +2212,7 @@ const ExtractTarball = bun.install.ExtractTarball; const Features = bun.install.Features; const FolderResolution = bun.install.FolderResolution; const Npm = bun.install.Npm; +const PyPI = bun.install.PyPI; const PackageID = bun.install.PackageID; const PackageNameHash = bun.install.PackageNameHash; const PatchTask = bun.install.PatchTask; diff --git a/src/install/PackageManager/PackageManagerLifecycle.zig b/src/install/PackageManager/PackageManagerLifecycle.zig index 7dd406bc8a..bc2e522c0d 100644 --- a/src/install/PackageManager/PackageManagerLifecycle.zig +++ b/src/install/PackageManager/PackageManagerLifecycle.zig @@ -116,6 +116,7 @@ pub fn determinePreinstallState( .npm => manager.cachedNPMPackageFolderName(lockfile.str(&pkg.name), pkg.resolution.value.npm.version, patch_hash), .local_tarball => manager.cachedTarballFolderName(pkg.resolution.value.local_tarball, patch_hash), .remote_tarball => manager.cachedTarballFolderName(pkg.resolution.value.remote_tarball, patch_hash), + .pypi => manager.cachedTarballFolderName(pkg.resolution.value.pypi.url, patch_hash), else => "", }; diff --git a/src/install/PackageManager/PackageManagerOptions.zig b/src/install/PackageManager/PackageManagerOptions.zig index 88fd29712b..e651c1bf94 100644 --- a/src/install/PackageManager/PackageManagerOptions.zig +++ b/src/install/PackageManager/PackageManagerOptions.zig @@ -22,11 +22,13 @@ dry_run: bool = false, link_workspace_packages: bool = true, remote_package_features: Features = .{ .optional_dependencies = true, + .python_dependencies = true, }, local_package_features: Features = .{ .optional_dependencies = true, .dev_dependencies = true, .workspaces = true, + .python_dependencies = true, }, patch_features: union(enum) { nothing: struct {}, diff --git a/src/install/PackageManager/UpdateRequest.zig b/src/install/PackageManager/UpdateRequest.zig index ab843d9540..41f5a5ac80 100644 --- a/src/install/PackageManager/UpdateRequest.zig +++ b/src/install/PackageManager/UpdateRequest.zig @@ -141,7 +141,22 @@ fn parseWithError( var value = input; var alias: ?string = null; - if (!Dependency.isTarball(input) and strings.isNPMPackageName(input)) { + var is_pypi = false; + + // Check for pypi: prefix first - extract name and version + if (strings.hasPrefixComptime(input, "pypi:")) { + is_pypi = true; + const after_prefix = input["pypi:".len..]; + // Find @ separator between package name and version + if (strings.indexOfChar(after_prefix, '@')) |at| { + alias = after_prefix[0..at]; // "cowsay" + value = after_prefix[at + 1 ..]; // "^6.1" + } else { + // No version specified, just package name + alias = after_prefix; + value = input[input.len..]; // Empty slice at end of input (must be within input for SlicedString) + } + } else if (!Dependency.isTarball(input) and strings.isNPMPackageName(input)) { alias = input; value = input[input.len..]; } else if (input.len > 1) { @@ -160,7 +175,7 @@ fn parseWithError( if (alias) |name| String.init(input, name) else placeholder, if (alias) |name| String.Builder.stringHash(name) else null, value, - null, + if (is_pypi) .pypi else null, &SlicedString.init(input, value), log, pm, @@ -214,7 +229,14 @@ fn parseWithError( .version = version, .version_buf = input, }; - if (alias) |name| { + if (version.tag == .pypi) { + // For pypi packages, get the name from the parsed version + const pypi_name = version.value.pypi.name.slice(input); + // Set is_aliased so getName() returns the name, not the literal + request.is_aliased = true; + request.name = allocator.dupe(u8, pypi_name) catch unreachable; + request.name_hash = String.Builder.stringHash(pypi_name); + } else if (alias) |name| { request.is_aliased = true; request.name = allocator.dupe(u8, name) catch unreachable; request.name_hash = String.Builder.stringHash(name); diff --git a/src/install/PackageManager/runTasks.zig b/src/install/PackageManager/runTasks.zig index 051d941137..e566495f08 100644 --- a/src/install/PackageManager/runTasks.zig +++ b/src/install/PackageManager/runTasks.zig @@ -500,6 +500,104 @@ pub fn runTasks( manager.task_batch.push(ThreadPool.Batch.from(manager.enqueueExtractNPMPackage(extract, task))); }, + .pypi_manifest => |*manifest_req| { + const name = manifest_req.name; + if (log_level.showProgress()) { + if (!has_updated_this_run) { + manager.setNodeName(manager.downloads_node.?, name.slice(), ProgressStrings.download_emoji, true); + has_updated_this_run = true; + } + } + + if (!has_network_error and task.response.metadata == null) { + has_network_error = true; + const min = manager.options.min_simultaneous_requests; + const max = AsyncHTTP.max_simultaneous_requests.load(.monotonic); + if (max > min) { + AsyncHTTP.max_simultaneous_requests.store(@max(min, max / 2), .monotonic); + } + } + + // Handle retry-able errors. + if (task.response.metadata == null or task.response.metadata.?.response.status_code > 499) { + const err = task.response.fail orelse error.HTTPError; + + if (task.retried < manager.options.max_retry_count) { + task.retried += 1; + manager.enqueueNetworkTask(task); + + if (manager.options.log_level.isVerbose()) { + manager.log.addWarningFmt( + null, + logger.Loc.Empty, + manager.allocator, + "{s} downloading PyPI package manifest {s}. Retry {d}/{d}...", + .{ bun.span(@errorName(err)), name.slice(), task.retried, manager.options.max_retry_count }, + ) catch unreachable; + } + + continue; + } + } + + const metadata = task.response.metadata orelse { + // Handle non-retry-able errors. + const err = task.response.fail orelse error.HTTPError; + + if (manager.isNetworkTaskRequired(task.task_id)) { + manager.log.addErrorFmt( + null, + logger.Loc.Empty, + manager.allocator, + "{s} downloading PyPI package manifest {s}", + .{ @errorName(err), name.slice() }, + ) catch |e| bun.handleOom(e); + } else { + manager.log.addWarningFmt( + null, + logger.Loc.Empty, + manager.allocator, + "{s} downloading PyPI package manifest {s}", + .{ @errorName(err), name.slice() }, + ) catch |e| bun.handleOom(e); + } + + continue; + }; + const response = metadata.response; + + if (response.status_code > 399) { + if (manager.isNetworkTaskRequired(task.task_id)) { + manager.log.addErrorFmt( + null, + logger.Loc.Empty, + manager.allocator, + "GET {s} - {d}", + .{ metadata.url, response.status_code }, + ) catch |err| bun.handleOom(err); + } else { + manager.log.addWarningFmt( + null, + logger.Loc.Empty, + manager.allocator, + "GET {s} - {d}", + .{ metadata.url, response.status_code }, + ) catch |err| bun.handleOom(err); + } + + continue; + } + + if (log_level.isVerbose()) { + Output.prettyError(" ", .{}); + Output.printElapsed(@as(f64, @floatFromInt(task.unsafe_http_client.elapsed)) / std.time.ns_per_ms); + Output.prettyError("\nDownloaded {s} PyPI manifest\n", .{name.slice()}); + Output.flush(); + } + + // Enqueue parsing task + manager.task_batch.push(ThreadPool.Batch.from(manager.enqueueParsePyPIPackage(task.task_id, name, task))); + }, else => unreachable, } } @@ -886,6 +984,62 @@ pub fn runTasks( } } }, + .pypi_manifest => { + defer manager.preallocated_network_tasks.put(task.request.pypi_manifest.network); + + if (task.status == .fail) { + const name = task.request.pypi_manifest.name; + const err = task.err orelse error.Failed; + + manager.log.addErrorFmt( + null, + logger.Loc.Empty, + manager.allocator, + "{s} parsing PyPI package manifest for {s}", + .{ + @errorName(err), + name.slice(), + }, + ) catch |e| bun.handleOom(e); + + continue; + } + + const manifest = &task.data.pypi_manifest; + const name = task.request.pypi_manifest.name.slice(); + const name_hash = String.Builder.stringHash(name); + + if (log_level.isVerbose()) { + Output.prettyErrorln("PyPI manifest parsed for {s}, version {s}", .{ + manifest.name(), + manifest.latestVersion(), + }); + } + + // Store the PyPI manifest for later resolution + try manager.pypi_manifests.put(bun.default_allocator, name_hash, manifest.*); + + if (@hasField(@TypeOf(callbacks), "manifests_only") and callbacks.manifests_only) { + continue; + } + + const dependency_list_entry = manager.task_queue.getEntry(task.id).?; + const dependency_list = dependency_list_entry.value_ptr.*; + dependency_list_entry.value_ptr.* = .{}; + + if (log_level.isVerbose()) { + Output.prettyErrorln("Processing {d} PyPI dependencies", .{dependency_list.items.len}); + } + + try manager.processDependencyList(dependency_list, Ctx, extract_ctx, callbacks, install_peer); + + if (log_level.showProgress()) { + if (!has_updated_this_run) { + manager.setNodeName(manager.downloads_node.?, name, ProgressStrings.download_emoji, true); + has_updated_this_run = true; + } + } + }, } } } @@ -1103,6 +1257,9 @@ const FileSystem = Fs.FileSystem; const HTTP = bun.http; const AsyncHTTP = HTTP.AsyncHTTP; +const Semver = bun.Semver; +const String = Semver.String; + const DependencyID = bun.install.DependencyID; const Features = bun.install.Features; const NetworkTask = bun.install.NetworkTask; diff --git a/src/install/PackageManager/updatePackageJSONAndInstall.zig b/src/install/PackageManager/updatePackageJSONAndInstall.zig index 2e504d6815..96d804681e 100644 --- a/src/install/PackageManager/updatePackageJSONAndInstall.zig +++ b/src/install/PackageManager/updatePackageJSONAndInstall.zig @@ -137,7 +137,7 @@ fn updatePackageJSONAndInstallWithManagerWithUpdates( // if we're removing, they don't have to specify where it is installed in the dependencies list // they can even put it multiple times and we will just remove all of them for (updates.*) |request| { - inline for ([_]string{ "dependencies", "devDependencies", "optionalDependencies", "peerDependencies" }) |list| { + inline for ([_]string{ "dependencies", "devDependencies", "optionalDependencies", "peerDependencies", "pythonDependencies" }) |list| { if (current_package_json.root.asProperty(list)) |query| { if (query.expr.data == .e_object) { var dependencies = query.expr.data.e_object.properties.slice(); @@ -186,16 +186,47 @@ fn updatePackageJSONAndInstallWithManagerWithUpdates( // update will not exceed the current dependency range if it exists if (updates.len != 0) { - try PackageJSONEditor.edit( - manager, - updates, - ¤t_package_json.root, - dependency_list, - .{ - .exact_versions = manager.options.enable.exact_versions, - .before_install = true, - }, - ); + // Separate pypi packages from npm packages + var npm_updates = UpdateRequest.Array{}; + var pypi_updates = UpdateRequest.Array{}; + + for (updates.*) |update| { + if (update.version.tag == .pypi) { + pypi_updates.append(manager.allocator, update) catch bun.outOfMemory(); + } else { + npm_updates.append(manager.allocator, update) catch bun.outOfMemory(); + } + } + + // Edit npm packages in the appropriate dependency list + if (npm_updates.items.len > 0) { + var npm_slice = npm_updates.items; + try PackageJSONEditor.edit( + manager, + &npm_slice, + ¤t_package_json.root, + dependency_list, + .{ + .exact_versions = manager.options.enable.exact_versions, + .before_install = true, + }, + ); + } + + // Edit pypi packages in pythonDependencies + if (pypi_updates.items.len > 0) { + var pypi_slice = pypi_updates.items; + try PackageJSONEditor.edit( + manager, + &pypi_slice, + ¤t_package_json.root, + "pythonDependencies", + .{ + .exact_versions = manager.options.enable.exact_versions, + .before_install = true, + }, + ); + } } else if (subcommand == .update) { try PackageJSONEditor.editUpdateNoArgs( manager, diff --git a/src/install/PackageManagerTask.zig b/src/install/PackageManagerTask.zig index ffe72f4b94..d03264cec1 100644 --- a/src/install/PackageManagerTask.zig +++ b/src/install/PackageManagerTask.zig @@ -66,6 +66,13 @@ pub const Id = enum(u64) { hasher.update(resolved); return @enumFromInt(@as(u64, 5 << 61) | @as(u64, @as(u61, @truncate(hasher.final())))); } + + pub fn forPyPIManifest(name: string) Id { + var hasher = bun.Wyhash11.init(0); + hasher.update("pypi-manifest:"); + hasher.update(name); + return @enumFromInt(hasher.final()); + } }; pub fn callback(task: *ThreadPool.Task) void { @@ -288,6 +295,53 @@ pub fn callback(task: *ThreadPool.Task) void { this.data = .{ .extract = result }; this.status = Status.success; }, + .pypi_manifest => { + const allocator = bun.default_allocator; + var manifest_req = &this.request.pypi_manifest; + + const body = &manifest_req.network.response_buffer; + defer body.deinit(); + + // Check for HTTP errors + if (manifest_req.network.response.metadata) |metadata| { + const status = metadata.response.status_code; + if (status >= 400) { + this.log.addErrorFmt(null, logger.Loc.Empty, allocator, "PyPI error: {d} - GET {s}", .{ + status, + manifest_req.name.slice(), + }) catch unreachable; + this.status = Status.fail; + this.data = .{ .pypi_manifest = .{} }; + return; + } + } + + // Parse the PyPI JSON response + const package_manifest = PyPI.PackageManifest.parse( + allocator, + &this.log, + body.slice(), + manifest_req.name.slice(), + ) catch |err| { + bun.handleErrorReturnTrace(err, @errorReturnTrace()); + + this.err = err; + this.status = Status.fail; + this.data = .{ .pypi_manifest = .{} }; + return; + }; + + if (package_manifest) |result| { + this.status = Status.success; + this.data = .{ .pypi_manifest = result }; + } else { + this.log.addErrorFmt(null, logger.Loc.Empty, allocator, "Failed to parse PyPI manifest for {s}", .{ + manifest_req.name.slice(), + }) catch unreachable; + this.status = Status.fail; + this.data = .{ .pypi_manifest = .{} }; + } + }, } } @@ -312,6 +366,7 @@ pub const Tag = enum(u3) { git_clone = 2, git_checkout = 3, local_tarball = 4, + pypi_manifest = 5, }; pub const Status = enum { @@ -325,6 +380,7 @@ pub const Data = union { extract: ExtractData, git_clone: bun.FileDescriptor, git_checkout: ExtractData, + pypi_manifest: PyPI.PackageManifest, }; pub const Request = union { @@ -357,6 +413,10 @@ pub const Request = union { local_tarball: struct { tarball: ExtractTarball, }, + pypi_manifest: struct { + name: strings.StringOrTinyString, + network: *NetworkTask, + }, }; const string = []const u8; @@ -369,6 +429,7 @@ const ExtractData = install.ExtractData; const ExtractTarball = install.ExtractTarball; const NetworkTask = install.NetworkTask; const Npm = install.Npm; +const PyPI = install.PyPI; const PackageID = install.PackageID; const PackageManager = install.PackageManager; const PatchTask = install.PatchTask; diff --git a/src/install/dependency.zig b/src/install/dependency.zig index c415dca88d..fc5ba4e721 100644 --- a/src/install/dependency.zig +++ b/src/install/dependency.zig @@ -110,6 +110,7 @@ pub inline fn realname(this: *const Dependency) String { .github => this.version.value.github.package_name, .npm => this.version.value.npm.name, .tarball => this.version.value.tarball.package_name, + .pypi => this.version.value.pypi.name, else => this.name, }; } @@ -329,6 +330,9 @@ pub const Version = struct { }, } }, + .pypi => { + object.put(globalThis, "name", try dep.value.pypi.name.toJS(buf, globalThis)); + }, else => { return globalThis.throwTODO("Unsupported dependency type"); }, @@ -428,6 +432,7 @@ pub const Version = struct { .tarball => lhs.value.tarball.eql(rhs.value.tarball, lhs_buf, rhs_buf), .symlink => lhs.value.symlink.eql(rhs.value.symlink, lhs_buf, rhs_buf), .workspace => lhs.value.workspace.eql(rhs.value.workspace, lhs_buf, rhs_buf), + .pypi => lhs.value.pypi.eql(rhs.value.pypi, lhs_buf, rhs_buf), else => true, }; } @@ -463,6 +468,9 @@ pub const Version = struct { catalog = 9, + /// PyPI package (Python Package Index) + pypi = 10, + pub const map = bun.ComptimeStringMap(Tag, .{ .{ "npm", .npm }, .{ "dist_tag", .dist_tag }, @@ -473,6 +481,7 @@ pub const Version = struct { .{ "git", .git }, .{ "github", .github }, .{ "catalog", .catalog }, + .{ "pypi", .pypi }, }); pub const fromJS = map.fromJS; @@ -741,6 +750,8 @@ pub const Version = struct { // TODO(dylan-conway): apply .patch files on packages. In the future this could // return `Tag.git` or `Tag.npm`. if (strings.hasPrefixComptime(dependency, "patch:")) return .npm; + // pypi:package@version - Python package from PyPI + if (strings.hasPrefixComptime(dependency, "pypi:")) return .pypi; }, else => {}, } @@ -818,6 +829,16 @@ pub const Version = struct { } }; + /// PyPI package information (Python Package Index) + pub const PypiInfo = struct { + /// Package name (normalized for PyPI lookups) + name: String, + + fn eql(this: PypiInfo, that: PypiInfo, this_buf: []const u8, that_buf: []const u8) bool { + return this.name.eql(that.name, this_buf, that_buf); + } + }; + pub const Value = union { uninitialized: void, @@ -836,6 +857,9 @@ pub const Version = struct { // dep version without 'catalog:' protocol // empty string == default catalog catalog: String, + + /// PyPI package + pypi: PypiInfo, }; }; @@ -1250,6 +1274,15 @@ pub fn parseWithTag( .literal = sliced.value(), }; }, + .pypi => { + // For PyPI dependencies, the name comes from the alias (dependency key) + // and the version specifier is in the dependency string + return .{ + .value = .{ .pypi = .{ .name = alias } }, + .tag = .pypi, + .literal = sliced.value(), + }; + }, } } @@ -1311,7 +1344,8 @@ pub fn fromJS(globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) bun.JS } pub const Behavior = packed struct(u8) { - _unused_1: u1 = 0, + /// Python dependency from pythonDependencies + python: bool = false, prod: bool = false, optional: bool = false, dev: bool = false, @@ -1349,6 +1383,10 @@ pub const Behavior = packed struct(u8) { return this.bundled; } + pub inline fn isPython(this: Behavior) bool { + return this.python; + } + pub inline fn eq(lhs: Behavior, rhs: Behavior) bool { return @as(u8, @bitCast(lhs)) == @as(u8, @bitCast(rhs)); } @@ -1422,7 +1460,8 @@ pub const Behavior = packed struct(u8) { (features.optional_dependencies and this.isOptional()) or (features.dev_dependencies and this.isDev()) or (features.peer_dependencies and this.isPeer()) or - (features.workspaces and this.isWorkspace()); + (features.workspaces and this.isWorkspace()) or + (features.python_dependencies and this.isPython()); } comptime { diff --git a/src/install/extract_tarball.zig b/src/install/extract_tarball.zig index acd9ac8dbd..321530e1ba 100644 --- a/src/install/extract_tarball.zig +++ b/src/install/extract_tarball.zig @@ -167,62 +167,79 @@ fn extract(this: *const ExtractTarball, log: *logger.Log, tgz_bytes: []const u8) zlib_pool.data.reset(); defer Npm.Registry.BodyPool.release(zlib_pool); - var esimated_output_size: usize = 0; - const time_started_for_verbose_logs: u64 = if (PackageManager.verbose_install) bun.getRoughTickCount(.allow_mocked_time).ns() else 0; - { - // Last 4 bytes of a gzip-compressed file are the uncompressed size. - if (tgz_bytes.len > 16) { - // If the file claims to be larger than 16 bytes and smaller than 64 MB, we'll preallocate the buffer. - // If it's larger than that, we'll do it incrementally. We want to avoid OOMing. - const last_4_bytes: u32 = @bitCast(tgz_bytes[tgz_bytes.len - 4 ..][0..4].*); - if (last_4_bytes > 16 and last_4_bytes < 64 * 1024 * 1024) { - // It's okay if this fails. We will just allocate as we go and that will error if we run out of memory. - esimated_output_size = last_4_bytes; - if (zlib_pool.data.list.capacity == 0) { - zlib_pool.data.list.ensureTotalCapacityPrecise(zlib_pool.data.allocator, last_4_bytes) catch {}; - } else { - zlib_pool.data.ensureUnusedCapacity(last_4_bytes) catch {}; + // Wheels (PyPI packages) are ZIP files, not gzipped tarballs + const is_wheel = this.resolution.tag == .pypi; + + // Data to extract from - either decompressed tar data or raw wheel data + var extract_data: []const u8 = undefined; + + if (is_wheel) { + // Wheels are ZIP files - no decompression needed + extract_data = tgz_bytes; + + if (PackageManager.verbose_install) { + Output.prettyErrorln("[{s}] Extract wheel {s} ({f})", .{ name, tmpname, bun.fmt.size(tgz_bytes.len, .{}) }); + } + } else { + var esimated_output_size: usize = 0; + + { + // Last 4 bytes of a gzip-compressed file are the uncompressed size. + if (tgz_bytes.len > 16) { + // If the file claims to be larger than 16 bytes and smaller than 64 MB, we'll preallocate the buffer. + // If it's larger than that, we'll do it incrementally. We want to avoid OOMing. + const last_4_bytes: u32 = @bitCast(tgz_bytes[tgz_bytes.len - 4 ..][0..4].*); + if (last_4_bytes > 16 and last_4_bytes < 64 * 1024 * 1024) { + // It's okay if this fails. We will just allocate as we go and that will error if we run out of memory. + esimated_output_size = last_4_bytes; + if (zlib_pool.data.list.capacity == 0) { + zlib_pool.data.list.ensureTotalCapacityPrecise(zlib_pool.data.allocator, last_4_bytes) catch {}; + } else { + zlib_pool.data.ensureUnusedCapacity(last_4_bytes) catch {}; + } } } } - } - var needs_to_decompress = true; - if (bun.FeatureFlags.isLibdeflateEnabled() and zlib_pool.data.list.capacity > 16 and esimated_output_size > 0) use_libdeflate: { - const decompressor = bun.libdeflate.Decompressor.alloc() orelse break :use_libdeflate; - defer decompressor.deinit(); + var needs_to_decompress = true; + if (bun.FeatureFlags.isLibdeflateEnabled() and zlib_pool.data.list.capacity > 16 and esimated_output_size > 0) use_libdeflate: { + const decompressor = bun.libdeflate.Decompressor.alloc() orelse break :use_libdeflate; + defer decompressor.deinit(); - const result = decompressor.gzip(tgz_bytes, zlib_pool.data.list.allocatedSlice()); + const result = decompressor.gzip(tgz_bytes, zlib_pool.data.list.allocatedSlice()); - if (result.status == .success) { - zlib_pool.data.list.items.len = result.written; - needs_to_decompress = false; + if (result.status == .success) { + zlib_pool.data.list.items.len = result.written; + needs_to_decompress = false; + } + + // If libdeflate fails for any reason, fallback to zlib. } - // If libdeflate fails for any reason, fallback to zlib. - } + if (needs_to_decompress) { + zlib_pool.data.list.clearRetainingCapacity(); + var zlib_entry = try Zlib.ZlibReaderArrayList.init(tgz_bytes, &zlib_pool.data.list, default_allocator); + zlib_entry.readAll(true) catch |err| { + log.addErrorFmt( + null, + logger.Loc.Empty, + bun.default_allocator, + "{s} decompressing \"{s}\" to \"{f}\"", + .{ @errorName(err), name, bun.fmt.fmtPath(u8, tmpname, .{}) }, + ) catch unreachable; + return error.InstallFailed; + }; + } - if (needs_to_decompress) { - zlib_pool.data.list.clearRetainingCapacity(); - var zlib_entry = try Zlib.ZlibReaderArrayList.init(tgz_bytes, &zlib_pool.data.list, default_allocator); - zlib_entry.readAll(true) catch |err| { - log.addErrorFmt( - null, - logger.Loc.Empty, - bun.default_allocator, - "{s} decompressing \"{s}\" to \"{f}\"", - .{ @errorName(err), name, bun.fmt.fmtPath(u8, tmpname, .{}) }, - ) catch unreachable; - return error.InstallFailed; - }; - } + if (PackageManager.verbose_install) { + const decompressing_ended_at: u64 = bun.getRoughTickCount(.allow_mocked_time).ns(); + const elapsed = decompressing_ended_at - time_started_for_verbose_logs; + Output.prettyErrorln("[{s}] Extract {s} (decompressed {f} tgz file in {D})", .{ name, tmpname, bun.fmt.size(tgz_bytes.len, .{}), elapsed }); + } - if (PackageManager.verbose_install) { - const decompressing_ended_at: u64 = bun.getRoughTickCount(.allow_mocked_time).ns(); - const elapsed = decompressing_ended_at - time_started_for_verbose_logs; - Output.prettyErrorln("[{s}] Extract {s} (decompressed {f} tgz file in {D})", .{ name, tmpname, bun.fmt.size(tgz_bytes.len, .{}), elapsed }); + extract_data = zlib_pool.data.list.items; } switch (this.resolution.tag) { @@ -240,7 +257,7 @@ fn extract(this: *const ExtractTarball, log: *logger.Log, tgz_bytes: []const u8) switch (PackageManager.verbose_install) { inline else => |verbose_log| _ = try Archiver.extractToDir( - zlib_pool.data.list.items, + extract_data, extract_destination, null, *DirnameReader, @@ -264,9 +281,24 @@ fn extract(this: *const ExtractTarball, log: *logger.Log, tgz_bytes: []const u8) }; } }, + .pypi => switch (PackageManager.verbose_install) { + // Wheels are ZIP files with no root directory to skip + inline else => |verbose_log| _ = try Archiver.extractToDir( + extract_data, + extract_destination, + null, + void, + {}, + .{ + .log = verbose_log, + .depth_to_skip = 0, + .format = .zip, + }, + ), + }, else => switch (PackageManager.verbose_install) { inline else => |verbose_log| _ = try Archiver.extractToDir( - zlib_pool.data.list.items, + extract_data, extract_destination, null, void, @@ -292,6 +324,7 @@ fn extract(this: *const ExtractTarball, log: *logger.Log, tgz_bytes: []const u8) .npm => this.package_manager.cachedNPMPackageFolderNamePrint(&folder_name_buf, name, this.resolution.value.npm.version, null), .github => PackageManager.cachedGitHubFolderNamePrint(&folder_name_buf, resolved, null), .local_tarball, .remote_tarball => PackageManager.cachedTarballFolderNamePrint(&folder_name_buf, this.url.slice(), null), + .pypi => PackageManager.cachedTarballFolderNamePrint(&folder_name_buf, this.package_manager.lockfile.str(&this.resolution.value.pypi.url), null), else => unreachable, }; if (folder_name.len == 0 or (folder_name.len == 1 and folder_name[0] == '/')) @panic("Tried to delete root and stopped it"); diff --git a/src/install/hoisted_install.zig b/src/install/hoisted_install.zig index 87e27b42d5..ed65d9b46c 100644 --- a/src/install/hoisted_install.zig +++ b/src/install/hoisted_install.zig @@ -81,6 +81,17 @@ pub fn installHoistedPackages( skip_delete = false; } + // Create .venv/lib/python{version}/site-packages/ for Python packages + const site_packages_folder: ?std.fs.Dir = brk: { + // Create directory structure using version from pypi constants + bun.sys.mkdir(".venv", 0o755).unwrap() catch {}; + bun.sys.mkdir(".venv/lib", 0o755).unwrap() catch {}; + bun.sys.mkdir(pypi.venv_lib_dir, 0o755).unwrap() catch {}; + bun.sys.mkdir(pypi.venv_site_packages, 0o755).unwrap() catch {}; + + break :brk bun.openDir(cwd.stdDir(), pypi.venv_site_packages) catch null; + }; + var summary = PackageInstall.Summary{}; { @@ -147,6 +158,7 @@ pub fn installHoistedPackages( .metas = parts.items(.meta), .bins = parts.items(.bin), .root_node_modules_folder = node_modules_folder, + .site_packages_folder = site_packages_folder, .names = parts.items(.name), .pkg_name_hashes = parts.items(.name_hash), .resolutions = parts.items(.resolution), @@ -371,6 +383,7 @@ const Bin = install.Bin; const Lockfile = install.Lockfile; const PackageID = install.PackageID; const PackageInstall = install.PackageInstall; +const pypi = @import("pypi.zig"); const PackageManager = install.PackageManager; const ProgressStrings = PackageManager.ProgressStrings; diff --git a/src/install/install.zig b/src/install/install.zig index 05b460025e..bbb9acb6f1 100644 --- a/src/install/install.zig +++ b/src/install/install.zig @@ -141,6 +141,7 @@ pub const Features = struct { trusted_dependencies: bool = false, workspaces: bool = false, patched_dependencies: bool = false, + python_dependencies: bool = false, check_for_duplicate_dependencies: bool = false, @@ -162,6 +163,7 @@ pub const Features = struct { .trusted_dependencies = true, .patched_dependencies = true, .workspaces = true, + .python_dependencies = true, }; pub const folder = Features{ @@ -243,6 +245,8 @@ pub const PackageManifestError = error{ pub const ExtractTarball = @import("./extract_tarball.zig"); pub const NetworkTask = @import("./NetworkTask.zig"); pub const Npm = @import("./npm.zig"); +pub const Pep440 = @import("./pep440.zig"); +pub const PyPI = @import("./pypi.zig"); pub const PackageManager = @import("./PackageManager.zig"); pub const PackageManifestMap = @import("./PackageManifestMap.zig"); pub const Task = @import("./PackageManagerTask.zig"); diff --git a/src/install/isolated_install.zig b/src/install/isolated_install.zig index 7be3f05fa6..8f84b32641 100644 --- a/src/install/isolated_install.zig +++ b/src/install/isolated_install.zig @@ -911,6 +911,7 @@ pub fn installIsolatedPackages( .github, .local_tarball, .remote_tarball, + .pypi, => |pkg_res_tag| { const patch_info = try installer.packagePatchInfo(pkg_name, pkg_name_hash, &pkg_res); @@ -959,6 +960,7 @@ pub fn installIsolatedPackages( .github => manager.cachedGitHubFolderName(&pkg_res.value.github, patch_info.contentsHash()), .local_tarball => manager.cachedTarballFolderName(pkg_res.value.local_tarball, patch_info.contentsHash()), .remote_tarball => manager.cachedTarballFolderName(pkg_res.value.remote_tarball, patch_info.contentsHash()), + .pypi => manager.cachedTarballFolderName(pkg_res.value.pypi.url, patch_info.contentsHash()), else => comptime unreachable, }); @@ -1113,6 +1115,32 @@ pub fn installIsolatedPackages( }, }; }, + .pypi => { + manager.enqueueTarballForDownload( + dep_id, + pkg_id, + pkg_res.value.pypi.url.slice(string_buf), + ctx, + patch_info.nameAndVersionHash(), + ) catch |err| switch (err) { + error.OutOfMemory => bun.outOfMemory(), + error.InvalidURL => { + Output.err(err, "failed to enqueue pypi tarball for download: {s}@{f}", .{ + pkg_name.slice(string_buf), + pkg_res.fmt(string_buf, .auto), + }); + Output.flush(); + if (manager.options.enable.fail_early) { + Global.exit(1); + } + // .monotonic is okay because an error means the task isn't + // running on another thread. + entry_steps[entry_id.get()].store(.done, .monotonic); + installer.onTaskComplete(entry_id, .fail); + continue; + }, + }; + }, else => comptime unreachable, } }, diff --git a/src/install/isolated_install/Installer.zig b/src/install/isolated_install/Installer.zig index 3012987caf..e4394c79e7 100644 --- a/src/install/isolated_install/Installer.zig +++ b/src/install/isolated_install/Installer.zig @@ -156,6 +156,7 @@ pub const Installer = struct { .root, .workspace, .symlink, + .pypi, => {}, _ => {}, @@ -488,6 +489,56 @@ pub const Installer = struct { }; }, + .pypi => { + // Python packages are installed to .venv/lib/python{version}/site-packages/ + const cache_dir_subpath = manager.cachedTarballFolderName(pkg_res.value.pypi.url, null); + + // Create .venv directory structure if it doesn't exist + _ = bun.sys.mkdir(".venv", 0o755); + _ = bun.sys.mkdir(".venv/lib", 0o755); + _ = bun.sys.mkdir(pypi.venv_lib_dir, 0o755); + _ = bun.sys.mkdir(pypi.venv_site_packages, 0o755); + + const cache_dir, const cache_dir_path = manager.getCacheDirectoryAndAbsPath(); + defer cache_dir_path.deinit(); + + // Open the wheel cache directory + const cached_wheel_dir = switch (bun.openDirForIteration(cache_dir, cache_dir_subpath)) { + .result => |fd| fd, + .err => |err| return .failure(.{ .link_package = err }), + }; + defer cached_wheel_dir.close(); + + // Build source path (wheel cache directory) + var src_path: bun.AbsPath(.{ .sep = .auto, .unit = .os }) = .fromLongPath(cache_dir_path.slice()); + defer src_path.deinit(); + src_path.appendJoin(@as([]const u8, cache_dir_subpath)); + + // Destination is site-packages + var dest_path: bun.RelPath(.{ .sep = .auto, .unit = .os }) = .init(); + defer dest_path.deinit(); + dest_path.append(pypi.venv_site_packages); + + // Copy entire wheel contents to site-packages + var hardlinker: Hardlinker = try .init( + cached_wheel_dir, + src_path, + dest_path, + &.{}, + ); + defer hardlinker.deinit(); + + switch (try hardlinker.link()) { + .result => {}, + .err => |err| return .failure(.{ .link_package = err }), + } + + // Python packages don't need symlinks, binaries, or scripts - skip to done + // Set the step to .done since we're skipping directly to it + this.installer.store.entries.items(.step)[this.entry_id.get()].store(.done, .release); + continue :next_step .done; + }, + .folder, .root => { const path = switch (pkg_res.tag) { .folder => pkg_res.value.folder.slice(string_buf), @@ -883,6 +934,7 @@ pub const Installer = struct { .folder, .symlink, .single_file_module, + .pypi, => {}, _ => {}, @@ -1612,6 +1664,7 @@ const PackageInstall = install.PackageInstall; const PackageManager = install.PackageManager; const PackageNameHash = install.PackageNameHash; const PostinstallOptimizer = install.PostinstallOptimizer; +const pypi = @import("../pypi.zig"); const Resolution = install.Resolution; const Store = install.Store; const TruncatedPackageNameHash = install.TruncatedPackageNameHash; diff --git a/src/install/lockfile/Package.zig b/src/install/lockfile/Package.zig index 48c8970699..53f02c55cb 100644 --- a/src/install/lockfile/Package.zig +++ b/src/install/lockfile/Package.zig @@ -55,6 +55,7 @@ pub fn Package(comptime SemverIntType: type) type { pub const optional = DependencyGroup{ .prop = "optionalDependencies", .field = "optional_dependencies", .behavior = .{ .optional = true } }; pub const peer = DependencyGroup{ .prop = "peerDependencies", .field = "peer_dependencies", .behavior = .{ .peer = true } }; pub const workspaces = DependencyGroup{ .prop = "workspaces", .field = "workspaces", .behavior = .{ .workspace = true } }; + pub const python = DependencyGroup{ .prop = "pythonDependencies", .field = "python_dependencies", .behavior = .{ .python = true } }; }; pub inline fn isDisabled(this: *const @This(), cpu: Npm.Architecture, os: Npm.OperatingSystem) bool { @@ -998,9 +999,12 @@ pub fn Package(comptime SemverIntType: type) type { key_loc: logger.Loc, value_loc: logger.Loc, ) !?Dependency { + // For python dependencies, always use .pypi tag + const effective_tag: ?Dependency.Version.Tag = comptime if (group.behavior.python) .pypi else tag; + const external_version = brk: { if (comptime Environment.isWindows) { - switch (tag orelse Dependency.Version.Tag.infer(version)) { + switch (effective_tag orelse Dependency.Version.Tag.infer(version)) { .workspace, .folder, .symlink, .tarball => { if (String.canInline(version)) { var copy = string_builder.append(String, version); @@ -1028,7 +1032,7 @@ pub fn Package(comptime SemverIntType: type) type { external_alias.value, external_alias.hash, sliced.slice, - tag, + effective_tag, &sliced, log, pm, @@ -1062,12 +1066,12 @@ pub fn Package(comptime SemverIntType: type) type { var workspace_path: ?String = null; var workspace_version = workspace_ver; - if (comptime tag == null) { + if (comptime effective_tag == null) { workspace_path = lockfile.workspace_paths.get(name_hash); workspace_version = lockfile.workspace_versions.get(name_hash); } - if (comptime tag != null) { + if (comptime effective_tag != null) { bun.assert(dependency_version.tag != .npm and dependency_version.tag != .dist_tag); } @@ -1378,7 +1382,8 @@ pub fn Package(comptime SemverIntType: type) type { @as(usize, @intFromBool(features.dependencies)) + @as(usize, @intFromBool(features.dev_dependencies)) + @as(usize, @intFromBool(features.optional_dependencies)) + - @as(usize, @intFromBool(features.peer_dependencies)) + @as(usize, @intFromBool(features.peer_dependencies)) + + @as(usize, @intFromBool(features.python_dependencies)) ]DependencyGroup = undefined; var out_group_i: usize = 0; @@ -1406,6 +1411,11 @@ pub fn Package(comptime SemverIntType: type) type { out_group_i += 1; } + if (features.python_dependencies) { + out_groups[out_group_i] = DependencyGroup.python; + out_group_i += 1; + } + break :brk out_groups; }; @@ -2145,6 +2155,7 @@ pub fn Package(comptime SemverIntType: type) type { .workspace => .init(.{ .workspace = old.resolution.value.workspace }), .remote_tarball => .init(.{ .remote_tarball = old.resolution.value.remote_tarball }), .single_file_module => .init(.{ .single_file_module = old.resolution.value.single_file_module }), + .pypi => .init(.{ .pypi = old.resolution.value.pypi.migrate() }), else => .init(.{ .uninitialized = {} }), }, }; diff --git a/src/install/lockfile/bun.lock.zig b/src/install/lockfile/bun.lock.zig index de72aaf9a7..3d599e3ae4 100644 --- a/src/install/lockfile/bun.lock.zig +++ b/src/install/lockfile/bun.lock.zig @@ -406,7 +406,7 @@ pub const Stringifier = struct { const res = pkg_resolutions[pkg_id]; switch (res.tag) { - .root, .npm, .folder, .local_tarball, .github, .git, .symlink, .workspace, .remote_tarball => {}, + .root, .npm, .folder, .local_tarball, .github, .git, .symlink, .workspace, .remote_tarball, .pypi => {}, .uninitialized => continue, // should not be possible, just being safe .single_file_module => continue, @@ -648,6 +648,35 @@ pub const Stringifier = struct { repo.resolved.fmtJson(buf, .{}), }); }, + .pypi => { + // Format: ["name@version", "wheel_url", { info }] + try writer.print("[\"{f}@{f}\", ", .{ + pkg_name.fmtJson(buf, .{ .quote = false }), + res.value.pypi.version.fmt(buf), + }); + + // Write the wheel URL + try writer.print("\"{s}\", ", .{ + res.value.pypi.url.slice(buf), + }); + + try writePackageInfoObject( + writer, + dep.behavior, + deps_buf, + pkg_deps_sort_buf.items, + &pkg_meta, + &pkg_bin, + buf, + &optional_peers_buf, + extern_strings, + &pkg_map, + relative_path, + &path_buf, + ); + + try writer.writeByte(']'); + }, else => unreachable, } } diff --git a/src/install/lockfile/lockfile_json_stringify_for_debugging.zig b/src/install/lockfile/lockfile_json_stringify_for_debugging.zig index f55ac9bb6c..e22e980136 100644 --- a/src/install/lockfile/lockfile_json_stringify_for_debugging.zig +++ b/src/install/lockfile/lockfile_json_stringify_for_debugging.zig @@ -110,6 +110,15 @@ fn jsonStringifyDependency(this: *const Lockfile, w: anytype, dep_id: Dependency try w.objectField("version"); try w.print("\"catalog:{f}\"", .{info.fmtJson(sb, .{ .quote = false })}); }, + .pypi => { + try w.beginObject(); + defer w.endObject() catch {}; + + const info = dep.version.value.pypi; + + try w.objectField("name"); + try w.write(info.name.slice(sb)); + }, } try w.objectField("package_id"); diff --git a/src/install/migration.zig b/src/install/migration.zig index 3ca057ee54..09385a8a03 100644 --- a/src/install/migration.zig +++ b/src/install/migration.zig @@ -954,6 +954,8 @@ pub fn migrateNPMLockfile( }, }); }, + // npm does not support PyPI packages + .pypi => return error.InvalidNPMLockfile, }; }; debug("-> {f}", .{res.fmtForDebug(string_buf.bytes.items)}); diff --git a/src/install/pep440.zig b/src/install/pep440.zig new file mode 100644 index 0000000000..1159d0a51d --- /dev/null +++ b/src/install/pep440.zig @@ -0,0 +1,696 @@ +//! PEP 440 Version Parsing and Comparison +//! +//! Implements version parsing and range matching according to PEP 440: +//! https://peps.python.org/pep-0440/ +//! +//! Version format: [N!]N(.N)*[{a|b|rc}N][.postN][.devN][+local] +//! Examples: 1.0, 2.0.0, 1.0a1, 1.0b2, 1.0rc1, 1.0.post1, 1.0.dev1, 1.0+local +//! +//! Specifier operators: ==, !=, <=, >=, <, >, ~=, === + +const Pep440 = @This(); + +const std = @import("std"); + +/// PEP 440 Version +/// Stores version components for comparison +pub const Version = struct { + /// Epoch (rarely used, default 0) + epoch: u32 = 0, + /// Release segments (e.g., [1, 2, 3] for "1.2.3") + /// We store up to 4 segments inline for common cases + major: u32 = 0, + minor: u32 = 0, + micro: u32 = 0, + extra: u32 = 0, + /// Number of release segments (1-4 for inline, 0 means unset) + segment_count: u8 = 0, + + /// Pre-release type + pre_type: PreType = .none, + /// Pre-release number (e.g., 1 for "a1") + pre_num: u32 = 0, + + /// Post-release number (0 means no post-release) + post: u32 = 0, + /// Whether .post was explicitly specified + has_post: bool = false, + + /// Dev release number (0 means no dev release) + dev: u32 = 0, + /// Whether .dev was explicitly specified + has_dev: bool = false, + + pub const PreType = enum(u8) { + none = 0, + dev = 1, // .devN (lowest precedence in pre-releases) + alpha = 2, // aN or alphaN + beta = 3, // bN or betaN + rc = 4, // rcN or cN + final = 5, // no pre-release suffix (highest) + }; + + /// Compare two versions + /// Returns: .lt, .eq, or .gt + pub fn order(self: Version, other: Version) std.math.Order { + // Compare epoch first + if (self.epoch != other.epoch) { + return std.math.order(self.epoch, other.epoch); + } + + // Compare release segments + const self_segments = [4]u32{ self.major, self.minor, self.micro, self.extra }; + const other_segments = [4]u32{ other.major, other.minor, other.micro, other.extra }; + + const max_segments = @max(self.segment_count, other.segment_count); + var i: usize = 0; + while (i < max_segments) : (i += 1) { + const self_seg = if (i < self.segment_count) self_segments[i] else 0; + const other_seg = if (i < other.segment_count) other_segments[i] else 0; + if (self_seg != other_seg) { + return std.math.order(self_seg, other_seg); + } + } + + // Compare pre-release (none/final > rc > beta > alpha > dev) + // But dev without pre-release is LESS than final + const self_pre = self.effectivePreType(); + const other_pre = other.effectivePreType(); + + if (@intFromEnum(self_pre) != @intFromEnum(other_pre)) { + return std.math.order(@intFromEnum(self_pre), @intFromEnum(other_pre)); + } + + // Same pre-type, compare pre-number + if (self_pre != .none and self_pre != .final) { + if (self.pre_num != other.pre_num) { + return std.math.order(self.pre_num, other.pre_num); + } + } + + // Compare post-release + if (self.has_post != other.has_post) { + return if (self.has_post) .gt else .lt; + } + if (self.has_post and self.post != other.post) { + return std.math.order(self.post, other.post); + } + + // Compare dev release (has_dev means it's a dev version, which is less than non-dev) + if (self.has_dev != other.has_dev) { + return if (self.has_dev) .lt else .gt; + } + if (self.has_dev and self.dev != other.dev) { + return std.math.order(self.dev, other.dev); + } + + return .eq; + } + + fn effectivePreType(self: Version) PreType { + if (self.pre_type != .none) return self.pre_type; + // If no pre-release suffix, it's a final release + return .final; + } + + pub fn eql(self: Version, other: Version) bool { + return self.order(other) == .eq; + } + + /// Parse a PEP 440 version string + pub fn parse(input: []const u8) ?Version { + var result = Version{}; + var remaining = input; + + // Skip leading 'v' or 'V' if present (common but not in spec) + if (remaining.len > 0 and (remaining[0] == 'v' or remaining[0] == 'V')) { + remaining = remaining[1..]; + } + + // Parse epoch (N!) + if (std.mem.indexOfScalar(u8, remaining, '!')) |bang_idx| { + result.epoch = std.fmt.parseInt(u32, remaining[0..bang_idx], 10) catch return null; + remaining = remaining[bang_idx + 1 ..]; + } + + // Parse release segments (N.N.N...) + var segment_idx: u8 = 0; + while (remaining.len > 0 and segment_idx < 4) { + // Find end of this segment + var seg_end: usize = 0; + while (seg_end < remaining.len and remaining[seg_end] >= '0' and remaining[seg_end] <= '9') { + seg_end += 1; + } + + if (seg_end == 0) break; // No more digits + + const segment = std.fmt.parseInt(u32, remaining[0..seg_end], 10) catch return null; + + switch (segment_idx) { + 0 => result.major = segment, + 1 => result.minor = segment, + 2 => result.micro = segment, + 3 => result.extra = segment, + else => {}, + } + segment_idx += 1; + result.segment_count = segment_idx; + + remaining = remaining[seg_end..]; + + // Check for dot separator + if (remaining.len > 0 and remaining[0] == '.') { + // Peek ahead - if next char is a digit, continue parsing segments + if (remaining.len > 1 and remaining[1] >= '0' and remaining[1] <= '9') { + remaining = remaining[1..]; + continue; + } + } + break; + } + + if (result.segment_count == 0) return null; + + // Parse pre-release, post-release, dev, local + while (remaining.len > 0) { + // Skip separator (., -, _) + if (remaining[0] == '.' or remaining[0] == '-' or remaining[0] == '_') { + remaining = remaining[1..]; + if (remaining.len == 0) break; + } + + // Local version (+...) + if (remaining[0] == '+') { + // We don't store local version for comparison purposes + break; + } + + // Pre-release: a, alpha, b, beta, c, rc, preview, pre + if (parsePreRelease(remaining)) |pre_result| { + result.pre_type = pre_result.pre_type; + result.pre_num = pre_result.pre_num; + remaining = pre_result.remaining; + continue; + } + + // Post-release: post, rev, r + if (parsePostRelease(remaining)) |post_result| { + result.has_post = true; + result.post = post_result.post; + remaining = post_result.remaining; + continue; + } + + // Dev release: dev + if (parseDevRelease(remaining)) |dev_result| { + result.has_dev = true; + result.dev = dev_result.dev; + remaining = dev_result.remaining; + continue; + } + + // Unknown suffix, stop parsing + break; + } + + return result; + } + + const PreResult = struct { + pre_type: PreType, + pre_num: u32, + remaining: []const u8, + }; + + fn parsePreRelease(input: []const u8) ?PreResult { + const prefixes = [_]struct { prefix: []const u8, pre_type: PreType }{ + .{ .prefix = "alpha", .pre_type = .alpha }, + .{ .prefix = "beta", .pre_type = .beta }, + .{ .prefix = "preview", .pre_type = .rc }, + .{ .prefix = "pre", .pre_type = .rc }, + .{ .prefix = "rc", .pre_type = .rc }, + .{ .prefix = "a", .pre_type = .alpha }, + .{ .prefix = "b", .pre_type = .beta }, + .{ .prefix = "c", .pre_type = .rc }, + }; + + for (prefixes) |p| { + if (startsWithIgnoreCase(input, p.prefix)) { + var remaining = input[p.prefix.len..]; + // Skip optional separator + if (remaining.len > 0 and (remaining[0] == '.' or remaining[0] == '-' or remaining[0] == '_')) { + remaining = remaining[1..]; + } + // Parse number + var num_end: usize = 0; + while (num_end < remaining.len and remaining[num_end] >= '0' and remaining[num_end] <= '9') { + num_end += 1; + } + const num = if (num_end > 0) + std.fmt.parseInt(u32, remaining[0..num_end], 10) catch 0 + else + 0; + + return .{ + .pre_type = p.pre_type, + .pre_num = num, + .remaining = remaining[num_end..], + }; + } + } + return null; + } + + const PostResult = struct { + post: u32, + remaining: []const u8, + }; + + fn parsePostRelease(input: []const u8) ?PostResult { + const prefixes = [_][]const u8{ "post", "rev", "r" }; + + for (prefixes) |prefix| { + if (startsWithIgnoreCase(input, prefix)) { + var remaining = input[prefix.len..]; + // Skip optional separator + if (remaining.len > 0 and (remaining[0] == '.' or remaining[0] == '-' or remaining[0] == '_')) { + remaining = remaining[1..]; + } + // Parse number + var num_end: usize = 0; + while (num_end < remaining.len and remaining[num_end] >= '0' and remaining[num_end] <= '9') { + num_end += 1; + } + const num = if (num_end > 0) + std.fmt.parseInt(u32, remaining[0..num_end], 10) catch 0 + else + 0; + + return .{ + .post = num, + .remaining = remaining[num_end..], + }; + } + } + return null; + } + + const DevResult = struct { + dev: u32, + remaining: []const u8, + }; + + fn parseDevRelease(input: []const u8) ?DevResult { + if (startsWithIgnoreCase(input, "dev")) { + var remaining = input[3..]; + // Skip optional separator + if (remaining.len > 0 and (remaining[0] == '.' or remaining[0] == '-' or remaining[0] == '_')) { + remaining = remaining[1..]; + } + // Parse number + var num_end: usize = 0; + while (num_end < remaining.len and remaining[num_end] >= '0' and remaining[num_end] <= '9') { + num_end += 1; + } + const num = if (num_end > 0) + std.fmt.parseInt(u32, remaining[0..num_end], 10) catch 0 + else + 0; + + return .{ + .dev = num, + .remaining = remaining[num_end..], + }; + } + return null; + } + + fn startsWithIgnoreCase(haystack: []const u8, needle: []const u8) bool { + if (haystack.len < needle.len) return false; + for (haystack[0..needle.len], needle) |h, n| { + if (std.ascii.toLower(h) != std.ascii.toLower(n)) return false; + } + return true; + } + + pub fn format(self: Version, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { + if (self.epoch != 0) { + try writer.print("{d}!", .{self.epoch}); + } + + try writer.print("{d}", .{self.major}); + if (self.segment_count >= 2) try writer.print(".{d}", .{self.minor}); + if (self.segment_count >= 3) try writer.print(".{d}", .{self.micro}); + if (self.segment_count >= 4) try writer.print(".{d}", .{self.extra}); + + switch (self.pre_type) { + .alpha => try writer.print("a{d}", .{self.pre_num}), + .beta => try writer.print("b{d}", .{self.pre_num}), + .rc => try writer.print("rc{d}", .{self.pre_num}), + .dev => try writer.print(".dev{d}", .{self.dev}), + .none, .final => {}, + } + + if (self.has_post) { + try writer.print(".post{d}", .{self.post}); + } + + if (self.has_dev and self.pre_type != .dev) { + try writer.print(".dev{d}", .{self.dev}); + } + } +}; + +/// Comparison operator +pub const Op = enum(u8) { + unset = 0, + /// == (exact match, or prefix match with .*) + eql = 1, + /// != + neq = 2, + /// < + lt = 3, + /// <= + lte = 4, + /// > + gt = 5, + /// >= + gte = 6, + /// ~= (compatible release) + compat = 7, + /// === (arbitrary equality, string match) + arbitrary = 8, +}; + +/// A single version specifier (e.g., ">=1.0" or "!=1.5.0") +pub const Specifier = struct { + op: Op = .unset, + version: Version = .{}, + /// For == with wildcard (e.g., ==1.0.*) + /// 0 = no wildcard, 1 = major.*, 2 = major.minor.*, etc. + wildcard_segments: u8 = 0, + + /// Check if a version satisfies this specifier + pub fn satisfies(self: Specifier, version: Version) bool { + if (self.op == .unset) return true; + + const cmp = version.order(self.version); + + return switch (self.op) { + .unset => true, + .eql => if (self.wildcard_segments > 0) + self.wildcardMatch(version) + else + cmp == .eq, + .neq => if (self.wildcard_segments > 0) + !self.wildcardMatch(version) + else + cmp != .eq, + .lt => cmp == .lt, + .lte => cmp == .lt or cmp == .eq, + .gt => cmp == .gt, + .gte => cmp == .gt or cmp == .eq, + .compat => self.compatibleMatch(version), + .arbitrary => false, // Not supported, would need string comparison + }; + } + + fn wildcardMatch(self: Specifier, version: Version) bool { + // Match up to wildcard_segments + const self_segs = [4]u32{ self.version.major, self.version.minor, self.version.micro, self.version.extra }; + const other_segs = [4]u32{ version.major, version.minor, version.micro, version.extra }; + + var i: usize = 0; + while (i < self.wildcard_segments) : (i += 1) { + if (self_segs[i] != other_segs[i]) return false; + } + return true; + } + + fn compatibleMatch(self: Specifier, version: Version) bool { + // ~=X.Y is equivalent to >=X.Y,<(X+1).0 + // ~=X.Y.Z is equivalent to >=X.Y.Z,= 2) { + // Increment the second-to-last segment + if (self.version.segment_count == 2) { + upper.major += 1; + upper.minor = 0; + } else if (self.version.segment_count == 3) { + upper.minor += 1; + upper.micro = 0; + } else { + upper.micro += 1; + upper.extra = 0; + } + } else { + // Single segment version, no upper bound restriction + return true; + } + + return version.order(upper) == .lt; + } +}; + +/// A version range consisting of multiple specifiers (AND'd together) +/// e.g., ">=1.0,<2.0,!=1.5.0" +pub const Range = struct { + /// Specifiers are AND'd together (all must match) + /// Stored inline for common case (up to 4 specifiers) + specs: [4]Specifier = [_]Specifier{.{}} ** 4, + count: u8 = 0, + + /// Parse a version range string + /// e.g., ">=1.0,<2.0" or "~=1.4.2" or ">=1.0,!=1.5.0" + pub fn parse(input: []const u8) ?Range { + var result = Range{}; + var remaining = std.mem.trim(u8, input, " \t\n\r"); + + while (remaining.len > 0 and result.count < 4) { + // Skip whitespace and commas + while (remaining.len > 0 and (remaining[0] == ',' or remaining[0] == ' ')) { + remaining = remaining[1..]; + } + if (remaining.len == 0) break; + + // Parse operator + var spec = Specifier{}; + + if (std.mem.startsWith(u8, remaining, "===")) { + spec.op = .arbitrary; + remaining = remaining[3..]; + } else if (std.mem.startsWith(u8, remaining, "==")) { + spec.op = .eql; + remaining = remaining[2..]; + } else if (std.mem.startsWith(u8, remaining, "!=")) { + spec.op = .neq; + remaining = remaining[2..]; + } else if (std.mem.startsWith(u8, remaining, "~=")) { + spec.op = .compat; + remaining = remaining[2..]; + } else if (std.mem.startsWith(u8, remaining, "<=")) { + spec.op = .lte; + remaining = remaining[2..]; + } else if (std.mem.startsWith(u8, remaining, ">=")) { + spec.op = .gte; + remaining = remaining[2..]; + } else if (std.mem.startsWith(u8, remaining, "<")) { + spec.op = .lt; + remaining = remaining[1..]; + } else if (std.mem.startsWith(u8, remaining, ">")) { + spec.op = .gt; + remaining = remaining[1..]; + } else { + // No operator means implicit == + spec.op = .eql; + } + + // Skip whitespace after operator + remaining = std.mem.trim(u8, remaining, " \t\n\r"); + + // Find end of version (comma or end of string) + var ver_end: usize = 0; + while (ver_end < remaining.len and remaining[ver_end] != ',') { + ver_end += 1; + } + + var ver_str = std.mem.trim(u8, remaining[0..ver_end], " \t\n\r"); + + // Check for wildcard (e.g., ==1.0.*) + if (ver_str.len > 2 and std.mem.endsWith(u8, ver_str, ".*")) { + // Count segments before .* + var seg_count: u8 = 1; + for (ver_str[0 .. ver_str.len - 2]) |c| { + if (c == '.') seg_count += 1; + } + spec.wildcard_segments = seg_count; + ver_str = ver_str[0 .. ver_str.len - 2]; + } + + // Parse version + if (Version.parse(ver_str)) |v| { + spec.version = v; + } else { + return null; + } + + result.specs[result.count] = spec; + result.count += 1; + + remaining = remaining[ver_end..]; + } + + return if (result.count > 0) result else null; + } + + /// Check if a version satisfies all specifiers in this range + pub fn satisfies(self: Range, version: Version) bool { + if (self.count == 0) return true; + + for (self.specs[0..self.count]) |spec| { + if (!spec.satisfies(version)) return false; + } + return true; + } + + /// Check if this is a "match any" range (empty or *) + pub fn isAny(self: Range) bool { + return self.count == 0; + } +}; + +// ============================================================================ +// Tests +// ============================================================================ + +test "Version.parse basic" { + const v1 = Version.parse("1.0").?; + try std.testing.expectEqual(@as(u32, 1), v1.major); + try std.testing.expectEqual(@as(u32, 0), v1.minor); + try std.testing.expectEqual(@as(u8, 2), v1.segment_count); + + const v2 = Version.parse("1.2.3").?; + try std.testing.expectEqual(@as(u32, 1), v2.major); + try std.testing.expectEqual(@as(u32, 2), v2.minor); + try std.testing.expectEqual(@as(u32, 3), v2.micro); + try std.testing.expectEqual(@as(u8, 3), v2.segment_count); + + const v3 = Version.parse("2.0.0.1").?; + try std.testing.expectEqual(@as(u32, 2), v3.major); + try std.testing.expectEqual(@as(u32, 0), v3.minor); + try std.testing.expectEqual(@as(u32, 0), v3.micro); + try std.testing.expectEqual(@as(u32, 1), v3.extra); + try std.testing.expectEqual(@as(u8, 4), v3.segment_count); +} + +test "Version.parse pre-release" { + const v1 = Version.parse("1.0a1").?; + try std.testing.expectEqual(Version.PreType.alpha, v1.pre_type); + try std.testing.expectEqual(@as(u32, 1), v1.pre_num); + + const v2 = Version.parse("1.0b2").?; + try std.testing.expectEqual(Version.PreType.beta, v2.pre_type); + try std.testing.expectEqual(@as(u32, 2), v2.pre_num); + + const v3 = Version.parse("1.0rc1").?; + try std.testing.expectEqual(Version.PreType.rc, v3.pre_type); + try std.testing.expectEqual(@as(u32, 1), v3.pre_num); + + const v4 = Version.parse("1.0.alpha.2").?; + try std.testing.expectEqual(Version.PreType.alpha, v4.pre_type); + try std.testing.expectEqual(@as(u32, 2), v4.pre_num); +} + +test "Version.parse post and dev" { + const v1 = Version.parse("1.0.post1").?; + try std.testing.expect(v1.has_post); + try std.testing.expectEqual(@as(u32, 1), v1.post); + + const v2 = Version.parse("1.0.dev1").?; + try std.testing.expect(v2.has_dev); + try std.testing.expectEqual(@as(u32, 1), v2.dev); + + const v3 = Version.parse("1.0a1.post2.dev3").?; + try std.testing.expectEqual(Version.PreType.alpha, v3.pre_type); + try std.testing.expectEqual(@as(u32, 1), v3.pre_num); + try std.testing.expect(v3.has_post); + try std.testing.expectEqual(@as(u32, 2), v3.post); + try std.testing.expect(v3.has_dev); + try std.testing.expectEqual(@as(u32, 3), v3.dev); +} + +test "Version.parse epoch" { + const v1 = Version.parse("1!2.0").?; + try std.testing.expectEqual(@as(u32, 1), v1.epoch); + try std.testing.expectEqual(@as(u32, 2), v1.major); + try std.testing.expectEqual(@as(u32, 0), v1.minor); +} + +test "Version.order" { + const v1 = Version.parse("1.0").?; + const v2 = Version.parse("2.0").?; + try std.testing.expectEqual(std.math.Order.lt, v1.order(v2)); + try std.testing.expectEqual(std.math.Order.gt, v2.order(v1)); + try std.testing.expectEqual(std.math.Order.eq, v1.order(v1)); + + // Pre-release < final + const v3 = Version.parse("1.0a1").?; + const v4 = Version.parse("1.0").?; + try std.testing.expectEqual(std.math.Order.lt, v3.order(v4)); + + // alpha < beta < rc + const va = Version.parse("1.0a1").?; + const vb = Version.parse("1.0b1").?; + const vrc = Version.parse("1.0rc1").?; + try std.testing.expectEqual(std.math.Order.lt, va.order(vb)); + try std.testing.expectEqual(std.math.Order.lt, vb.order(vrc)); + + // dev < final + const vdev = Version.parse("1.0.dev1").?; + const vfinal = Version.parse("1.0").?; + try std.testing.expectEqual(std.math.Order.lt, vdev.order(vfinal)); + + // post > final + const vpost = Version.parse("1.0.post1").?; + try std.testing.expectEqual(std.math.Order.gt, vpost.order(vfinal)); +} + +test "Range.parse and satisfies" { + // Simple >= range + const r1 = Range.parse(">=1.0").?; + try std.testing.expect(r1.satisfies(Version.parse("1.0").?)); + try std.testing.expect(r1.satisfies(Version.parse("2.0").?)); + try std.testing.expect(!r1.satisfies(Version.parse("0.9").?)); + + // Combined range + const r2 = Range.parse(">=1.0,<2.0").?; + try std.testing.expect(r2.satisfies(Version.parse("1.0").?)); + try std.testing.expect(r2.satisfies(Version.parse("1.5").?)); + try std.testing.expect(!r2.satisfies(Version.parse("2.0").?)); + try std.testing.expect(!r2.satisfies(Version.parse("0.5").?)); + + // Exclusion + const r3 = Range.parse(">=1.0,!=1.5.0").?; + try std.testing.expect(r3.satisfies(Version.parse("1.0").?)); + try std.testing.expect(r3.satisfies(Version.parse("1.4").?)); + try std.testing.expect(!r3.satisfies(Version.parse("1.5.0").?)); + try std.testing.expect(r3.satisfies(Version.parse("1.6").?)); + + // Compatible release + const r4 = Range.parse("~=1.4.2").?; + try std.testing.expect(r4.satisfies(Version.parse("1.4.2").?)); + try std.testing.expect(r4.satisfies(Version.parse("1.4.5").?)); + try std.testing.expect(!r4.satisfies(Version.parse("1.5.0").?)); + try std.testing.expect(!r4.satisfies(Version.parse("1.4.1").?)); +} + +test "Range wildcard" { + const r1 = Range.parse("==1.0.*").?; + try std.testing.expect(r1.satisfies(Version.parse("1.0.0").?)); + try std.testing.expect(r1.satisfies(Version.parse("1.0.5").?)); + try std.testing.expect(!r1.satisfies(Version.parse("1.1.0").?)); +} diff --git a/src/install/pypi.zig b/src/install/pypi.zig new file mode 100644 index 0000000000..5dd7221e1c --- /dev/null +++ b/src/install/pypi.zig @@ -0,0 +1,994 @@ +//! PyPI (Python Package Index) client and wheel selection +//! +//! This module handles: +//! - Parsing PyPI JSON API responses (https://pypi.org/pypi/{package}/json) +//! - Selecting the best wheel for the current platform +//! - Parsing PEP 440 version specifiers from requires_dist + +const PyPI = @This(); + +/// Python version constants - must match the version Bun is linked against +/// These are used for wheel compatibility checking and venv path construction +pub const python_version_major = 3; +pub const python_version_minor = 13; +pub const python_version_string = std.fmt.comptimePrint("{d}.{d}", .{ python_version_major, python_version_minor }); + +/// Virtual environment paths for Python packages +/// Structure: .venv/lib/python{major}.{minor}/site-packages/ +pub const venv_lib_dir = ".venv/lib/python" ++ python_version_string; +pub const venv_site_packages = venv_lib_dir ++ "/site-packages"; + +const std = @import("std"); +const bun = @import("bun"); +const strings = bun.strings; +const String = bun.Semver.String; +const Allocator = std.mem.Allocator; +const logger = bun.logger; +const JSON = bun.json; +const Environment = bun.Environment; +const OOM = bun.OOM; +const default_allocator = bun.default_allocator; +const initializeStore = @import("./install.zig").initializeMiniStore; + +/// Platform target for wheel compatibility checking +pub const PlatformTarget = struct { + os: Os, + arch: Arch, + /// Python version (e.g., 3.12 = { .major = 3, .minor = 12 }) + python_version: PythonVersion, + + pub const Os = enum { + macos, + linux, + windows, + unknown, + }; + + pub const Arch = enum { + x86_64, + aarch64, + unknown, + }; + + pub const PythonVersion = struct { + major: u8 = 3, + minor: u8 = 12, + + pub fn format(self: PythonVersion, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { + try writer.print("{d}.{d}", .{ self.major, self.minor }); + } + }; + + /// Detect current platform from compile-time target + pub fn current() PlatformTarget { + return .{ + .os = comptime if (Environment.isMac) + Os.macos + else if (Environment.isLinux) + Os.linux + else if (Environment.isWindows) + Os.windows + else + Os.unknown, + .arch = comptime if (Environment.isAarch64) + Arch.aarch64 + else if (Environment.isX64) + Arch.x86_64 + else + Arch.unknown, + // Use the Python version constants defined at module level + .python_version = .{ .major = python_version_major, .minor = python_version_minor }, + }; + } + + /// Check if a platform tag is compatible with this target + pub fn isPlatformCompatible(self: PlatformTarget, platform_tag: []const u8) bool { + // "any" is always compatible + if (strings.eqlComptime(platform_tag, "any")) return true; + + // Check OS-specific tags + switch (self.os) { + .macos => { + // macOS tags: macosx_X_Y_arch, macosx_X_Y_universal, macosx_X_Y_universal2 + if (strings.hasPrefixComptime(platform_tag, "macosx_")) { + // Check architecture suffix + if (self.arch == .aarch64) { + return strings.hasSuffixComptime(platform_tag, "_arm64") or + strings.hasSuffixComptime(platform_tag, "_universal2") or + strings.hasSuffixComptime(platform_tag, "_universal"); + } else if (self.arch == .x86_64) { + return strings.hasSuffixComptime(platform_tag, "_x86_64") or + strings.hasSuffixComptime(platform_tag, "_universal2") or + strings.hasSuffixComptime(platform_tag, "_universal") or + strings.hasSuffixComptime(platform_tag, "_intel"); + } + } + }, + .linux => { + // Linux tags: linux_x86_64, manylinux1_x86_64, manylinux2010_x86_64, + // manylinux2014_x86_64, manylinux_2_17_x86_64, musllinux_1_1_x86_64 + const has_arch_suffix = if (self.arch == .aarch64) + strings.hasSuffixComptime(platform_tag, "_aarch64") + else + strings.hasSuffixComptime(platform_tag, "_x86_64"); + if (has_arch_suffix) { + if (strings.hasPrefixComptime(platform_tag, "linux_") or + strings.hasPrefixComptime(platform_tag, "manylinux") or + strings.hasPrefixComptime(platform_tag, "musllinux")) + { + return true; + } + } + }, + .windows => { + // Windows tags: win32, win_amd64, win_arm64 + if (self.arch == .x86_64) { + return strings.eqlComptime(platform_tag, "win_amd64") or + strings.eqlComptime(platform_tag, "win32"); + } else if (self.arch == .aarch64) { + return strings.eqlComptime(platform_tag, "win_arm64"); + } + }, + .unknown => {}, + } + return false; + } + + /// Check if a Python version tag is compatible + pub fn isPythonCompatible(self: PlatformTarget, python_tag: []const u8) bool { + // "py3" matches any Python 3.x + if (strings.eqlComptime(python_tag, "py3")) return self.python_version.major == 3; + if (strings.eqlComptime(python_tag, "py2.py3")) return true; + if (strings.eqlComptime(python_tag, "py2")) return self.python_version.major == 2; + + // "cpXY" matches CPython X.Y specifically (compiled extensions require exact match) + if (strings.hasPrefixComptime(python_tag, "cp")) { + const version_part = python_tag[2..]; + if (version_part.len >= 2) { + const major = std.fmt.parseInt(u8, version_part[0..1], 10) catch return false; + const minor = std.fmt.parseInt(u8, version_part[1..], 10) catch return false; + return self.python_version.major == major and self.python_version.minor == minor; + } + } + + // "pyXY" matches Python X.Y or higher minor versions + if (strings.hasPrefixComptime(python_tag, "py")) { + const version_part = python_tag[2..]; + if (version_part.len >= 2) { + const major = std.fmt.parseInt(u8, version_part[0..1], 10) catch return false; + const minor = std.fmt.parseInt(u8, version_part[1..], 10) catch return false; + return self.python_version.major == major and self.python_version.minor >= minor; + } + } + + return false; + } + + /// Check if an ABI tag is compatible + pub fn isAbiCompatible(self: PlatformTarget, abi_tag: []const u8) bool { + // "none" means no ABI dependency (pure Python or uses stable ABI) + if (strings.eqlComptime(abi_tag, "none")) return true; + + // "abi3" is the stable ABI, compatible with Python 3.2+ + if (strings.eqlComptime(abi_tag, "abi3")) return self.python_version.major == 3 and self.python_version.minor >= 2; + + // "cpXY" or "cpXYm" matches specific CPython ABI + if (strings.hasPrefixComptime(abi_tag, "cp")) { + var version_part = abi_tag[2..]; + // Remove trailing 'm' if present (legacy ABI marker) + if (version_part.len > 0 and version_part[version_part.len - 1] == 'm') { + version_part = version_part[0 .. version_part.len - 1]; + } + if (version_part.len >= 2) { + const major = std.fmt.parseInt(u8, version_part[0..1], 10) catch return false; + const minor = std.fmt.parseInt(u8, version_part[1..], 10) catch return false; + return self.python_version.major == major and self.python_version.minor == minor; + } + } + + return false; + } +}; + +/// Parsed wheel filename components +/// Format: {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl +pub const WheelTag = struct { + python: []const u8, + abi: []const u8, + platform: []const u8, + + /// Parse wheel tags from a wheel filename + /// Returns null if not a valid wheel filename + pub fn parse(filename: []const u8) ?WheelTag { + // Must end with .whl + if (!strings.hasSuffixComptime(filename, ".whl")) return null; + + const name_without_ext = filename[0 .. filename.len - 4]; + + // Split by '-' and get the last 3 components (python-abi-platform) + var parts: [8][]const u8 = undefined; + var part_count: usize = 0; + + var iter = std.mem.splitScalar(u8, name_without_ext, '-'); + while (iter.next()) |part| { + if (part_count >= 8) return null; // Too many parts + parts[part_count] = part; + part_count += 1; + } + + // Minimum: name-version-python-abi-platform = 5 parts + if (part_count < 5) return null; + + return .{ + .platform = parts[part_count - 1], + .abi = parts[part_count - 2], + .python = parts[part_count - 3], + }; + } + + /// Calculate a compatibility score (higher is better) + /// Returns null if not compatible + pub fn compatibilityScore(self: WheelTag, target: PlatformTarget) ?u32 { + // Check basic compatibility first + if (!target.isPythonCompatible(self.python)) return null; + if (!target.isAbiCompatible(self.abi)) return null; + if (!target.isPlatformCompatible(self.platform)) return null; + + var score: u32 = 100; + + // Prefer platform-specific wheels over "any" + if (!strings.eqlComptime(self.platform, "any")) { + score += 50; + } + + // Prefer specific Python version over generic "py3" + if (strings.hasPrefixComptime(self.python, "cp")) { + score += 30; + } + + // Prefer specific ABI over "none" or "abi3" + if (!strings.eqlComptime(self.abi, "none") and !strings.eqlComptime(self.abi, "abi3")) { + score += 20; + } + + // Prefer newer manylinux versions + if (strings.hasPrefixComptime(self.platform, "manylinux_2_")) { + score += 10; + } else if (strings.hasPrefixComptime(self.platform, "manylinux2014")) { + score += 8; + } else if (strings.hasPrefixComptime(self.platform, "manylinux2010")) { + score += 5; + } + + return score; + } +}; + +/// A file (wheel or source distribution) from PyPI +pub const File = struct { + filename: String, + url: String, + sha256: String, + python_version: String, + requires_python: String, + packagetype: PackageType, + size: u64, + + pub const PackageType = enum(u8) { + bdist_wheel = 0, + sdist = 1, + bdist_egg = 2, + other = 3, + + pub fn fromString(s: []const u8) PackageType { + if (strings.eqlComptime(s, "bdist_wheel")) return .bdist_wheel; + if (strings.eqlComptime(s, "sdist")) return .sdist; + if (strings.eqlComptime(s, "bdist_egg")) return .bdist_egg; + return .other; + } + }; + + /// Check if this file is a wheel + pub fn isWheel(self: File, buf: []const u8) bool { + return self.packagetype == .bdist_wheel or + strings.hasSuffixComptime(self.filename.slice(buf), ".whl"); + } + + /// Get wheel tags for this file (only valid for wheels) + pub fn wheelTag(self: File, buf: []const u8) ?WheelTag { + return WheelTag.parse(self.filename.slice(buf)); + } +}; + +/// Parsed PyPI package manifest +pub const PackageManifest = struct { + pkg: Package = .{}, + string_buf: []const u8 = "", + files: []const File = &.{}, + + pub const Package = struct { + name: String = .{}, + latest_version: String = .{}, + requires_python: String = .{}, + requires_dist_off: u32 = 0, + requires_dist_len: u32 = 0, + }; + + pub fn name(self: *const PackageManifest) []const u8 { + return self.pkg.name.slice(self.string_buf); + } + + pub fn latestVersion(self: *const PackageManifest) []const u8 { + return self.pkg.latest_version.slice(self.string_buf); + } + + /// Get the requires_dist (dependencies) as a slice of the string buffer + pub fn requiresDist(self: *const PackageManifest) []const u8 { + if (self.pkg.requires_dist_len == 0) return ""; + return self.string_buf[self.pkg.requires_dist_off..][0..self.pkg.requires_dist_len]; + } + + /// Iterator over applicable dependencies (filtered by platform/python version) + pub const DependencyIterator = struct { + remaining: []const u8, + target: PlatformTarget, + + pub fn next(self: *DependencyIterator) ?DependencySpecifier { + while (self.remaining.len > 0) { + // Find next newline + const end = strings.indexOfChar(self.remaining, '\n') orelse self.remaining.len; + const line = strings.trim(self.remaining[0..end], &strings.whitespace_chars); + self.remaining = if (end < self.remaining.len) self.remaining[end + 1 ..] else ""; + + if (line.len == 0) continue; + + if (DependencySpecifier.parse(line)) |spec| { + if (spec.name.len > 0 and spec.isApplicable(self.target)) { + return spec; + } + } + } + return null; + } + + /// Count the number of applicable dependencies + pub fn count(self: *DependencyIterator) usize { + var n: usize = 0; + var iter = self.*; + while (iter.next()) |_| { + n += 1; + } + return n; + } + }; + + /// Get an iterator over applicable dependencies + pub fn iterDependencies(self: *const PackageManifest, target: PlatformTarget) DependencyIterator { + return .{ + .remaining = self.requiresDist(), + .target = target, + }; + } + + /// Find the best wheel for the given target platform + /// Returns null if no compatible wheel is found + pub fn findBestWheel(self: *const PackageManifest, target: PlatformTarget) ?*const File { + var best_file: ?*const File = null; + var best_score: u32 = 0; + + for (self.files) |*file| { + if (!file.isWheel(self.string_buf)) continue; + + if (file.wheelTag(self.string_buf)) |tag| { + if (tag.compatibilityScore(target)) |score| { + if (score > best_score) { + best_score = score; + best_file = file; + } + } + } + } + + return best_file; + } + + /// Parse a PyPI JSON API response + pub fn parse( + allocator: Allocator, + log: *logger.Log, + json_buffer: []const u8, + expected_name: []const u8, + ) OOM!?PackageManifest { + const source = &logger.Source.initPathString(expected_name, json_buffer); + initializeStore(); + defer bun.ast.Stmt.Data.Store.memory_allocator.?.pop(); + + var arena = bun.ArenaAllocator.init(allocator); + defer arena.deinit(); + + const json = JSON.parseUTF8( + source, + log, + arena.allocator(), + ) catch { + return null; + }; + + // Check for error response + if (json.asProperty("message")) |msg| { + if (msg.expr.asString(allocator)) |err_msg| { + log.addErrorFmt(source, logger.Loc.Empty, allocator, "PyPI error: {s}", .{err_msg}) catch {}; + return null; + } + } + + var result: PackageManifest = .{ + .pkg = .{ + .name = .{}, + .latest_version = .{}, + .requires_python = .{}, + .requires_dist_off = 0, + .requires_dist_len = 0, + }, + .string_buf = &.{}, + .files = &.{}, + }; + + var string_pool = String.Builder.StringPool.init(default_allocator); + defer string_pool.deinit(); + + var string_builder = String.Builder{ + .string_pool = string_pool, + }; + + // Count strings needed + const info = json.asProperty("info") orelse return null; + + // Name + if (info.expr.asProperty("name")) |name_prop| { + if (name_prop.expr.asString(allocator)) |n| { + string_builder.count(n); + } + } + + // Version + if (info.expr.asProperty("version")) |version_prop| { + if (version_prop.expr.asString(allocator)) |v| { + string_builder.count(v); + } + } + + // requires_python + if (info.expr.asProperty("requires_python")) |rp| { + if (rp.expr.asString(allocator)) |rp_str| { + string_builder.count(rp_str); + } + } + + // requires_dist (dependencies) + var requires_dist_total_len: usize = 0; + if (info.expr.asProperty("requires_dist")) |rd| { + if (rd.expr.data == .e_array) { + for (rd.expr.data.e_array.slice()) |item| { + if (item.asString(allocator)) |dep| { + requires_dist_total_len += dep.len + 1; // +1 for newline separator + } + } + } + } + if (requires_dist_total_len > 0) { + string_builder.cap += requires_dist_total_len; + } + + // Count files from "urls" (files for latest version) + var file_count: usize = 0; + if (json.asProperty("urls")) |urls| { + if (urls.expr.data == .e_array) { + for (urls.expr.data.e_array.slice()) |file_obj| { + if (file_obj.data != .e_object) continue; + + file_count += 1; + + if (file_obj.asProperty("filename")) |f| { + if (f.expr.asString(allocator)) |filename| { + string_builder.count(filename); + } + } + if (file_obj.asProperty("url")) |u| { + if (u.expr.asString(allocator)) |url| { + string_builder.count(url); + } + } + if (file_obj.asProperty("digests")) |d| { + if (d.expr.asProperty("sha256")) |sha| { + if (sha.expr.asString(allocator)) |sha_str| { + string_builder.count(sha_str); + } + } + } + if (file_obj.asProperty("python_version")) |pv| { + if (pv.expr.asString(allocator)) |pv_str| { + string_builder.count(pv_str); + } + } + if (file_obj.asProperty("requires_python")) |rp| { + if (rp.expr.asString(allocator)) |rp_str| { + string_builder.count(rp_str); + } + } + } + } + } + + // Allocate + try string_builder.allocate(default_allocator); + errdefer if (string_builder.ptr) |ptr| default_allocator.free(ptr[0..string_builder.cap]); + + const files = try default_allocator.alloc(File, file_count); + errdefer default_allocator.free(files); + + // Second pass: populate data + if (info.expr.asProperty("name")) |name_prop| { + if (name_prop.expr.asString(allocator)) |n| { + result.pkg.name = string_builder.append(String, n); + } + } + + if (info.expr.asProperty("version")) |version_prop| { + if (version_prop.expr.asString(allocator)) |v| { + result.pkg.latest_version = string_builder.append(String, v); + } + } + + if (info.expr.asProperty("requires_python")) |rp| { + if (rp.expr.asString(allocator)) |rp_str| { + result.pkg.requires_python = string_builder.append(String, rp_str); + } + } + + // requires_dist - write directly to the buffer + if (info.expr.asProperty("requires_dist")) |rd| { + if (rd.expr.data == .e_array) { + result.pkg.requires_dist_off = @intCast(string_builder.len); + const buf_slice = string_builder.ptr.?[string_builder.len..string_builder.cap]; + var write_pos: usize = 0; + for (rd.expr.data.e_array.slice()) |item| { + if (item.asString(allocator)) |dep| { + @memcpy(buf_slice[write_pos..][0..dep.len], dep); + write_pos += dep.len; + buf_slice[write_pos] = '\n'; + write_pos += 1; + } + } + string_builder.len += write_pos; + result.pkg.requires_dist_len = @intCast(write_pos); + } + } + + // Populate files + var file_idx: usize = 0; + if (json.asProperty("urls")) |urls| { + if (urls.expr.data == .e_array) { + for (urls.expr.data.e_array.slice()) |file_obj| { + if (file_obj.data != .e_object) continue; + if (file_idx >= file_count) break; + + var file = File{ + .filename = .{}, + .url = .{}, + .sha256 = .{}, + .python_version = .{}, + .requires_python = .{}, + .packagetype = .other, + .size = 0, + }; + + if (file_obj.asProperty("filename")) |f| { + if (f.expr.asString(allocator)) |filename| { + file.filename = string_builder.append(String, filename); + } + } + if (file_obj.asProperty("url")) |u| { + if (u.expr.asString(allocator)) |url| { + file.url = string_builder.append(String, url); + } + } + if (file_obj.asProperty("digests")) |d| { + if (d.expr.asProperty("sha256")) |sha| { + if (sha.expr.asString(allocator)) |sha_str| { + file.sha256 = string_builder.append(String, sha_str); + } + } + } + if (file_obj.asProperty("python_version")) |pv| { + if (pv.expr.asString(allocator)) |pv_str| { + file.python_version = string_builder.append(String, pv_str); + } + } + if (file_obj.asProperty("requires_python")) |rp| { + if (rp.expr.asString(allocator)) |rp_str| { + file.requires_python = string_builder.append(String, rp_str); + } + } + if (file_obj.asProperty("packagetype")) |pt| { + if (pt.expr.asString(allocator)) |pt_str| { + file.packagetype = File.PackageType.fromString(pt_str); + } + } + if (file_obj.asProperty("size")) |sz| { + if (sz.expr.data == .e_number) { + file.size = @intFromFloat(sz.expr.data.e_number.value); + } + } + + files[file_idx] = file; + file_idx += 1; + } + } + } + + result.string_buf = string_builder.allocatedSlice(); + result.files = files[0..file_idx]; + + return result; + } + + pub fn deinit(self: *PackageManifest) void { + if (self.string_buf.len > 0) { + default_allocator.free(self.string_buf); + } + if (self.files.len > 0) { + default_allocator.free(self.files); + } + self.* = .{ + .pkg = .{ + .name = .{}, + .latest_version = .{}, + .requires_python = .{}, + .requires_dist_off = 0, + .requires_dist_len = 0, + }, + .string_buf = &.{}, + .files = &.{}, + }; + } +}; + +/// Parse a PEP 440 dependency specifier from requires_dist +/// Format: "package_name (>=1.0,<2.0) ; extra == 'dev'" +pub const DependencySpecifier = struct { + name: []const u8, + version_spec: []const u8, + extras: []const u8, + markers: []const u8, + + pub fn parse(spec: []const u8) ?DependencySpecifier { + var result = DependencySpecifier{ + .name = "", + .version_spec = "", + .extras = "", + .markers = "", + }; + + var remaining = strings.trim(spec, &strings.whitespace_chars); + + // Find the end of the package name (first space, [, (, or ;) + var name_end: usize = 0; + for (remaining, 0..) |c, i| { + if (c == ' ' or c == '[' or c == '(' or c == ';' or c == '<' or c == '>' or c == '=' or c == '!' or c == '~') { + name_end = i; + break; + } + } else { + // Entire string is the package name + result.name = remaining; + return result; + } + + result.name = remaining[0..name_end]; + remaining = remaining[name_end..]; + remaining = strings.trim(remaining, &strings.whitespace_chars); + + // Check for extras [extra1,extra2] + if (remaining.len > 0 and remaining[0] == '[') { + if (strings.indexOfChar(remaining, ']')) |end| { + result.extras = remaining[1..end]; + remaining = remaining[end + 1 ..]; + remaining = strings.trim(remaining, &strings.whitespace_chars); + } + } + + // Check for version specifier (>=1.0,<2.0) or just >=1.0 + if (remaining.len > 0) { + if (remaining[0] == '(') { + if (strings.indexOfChar(remaining, ')')) |end| { + result.version_spec = remaining[1..end]; + remaining = remaining[end + 1 ..]; + remaining = strings.trim(remaining, &strings.whitespace_chars); + } + } else if (remaining[0] == '>' or remaining[0] == '<' or remaining[0] == '=' or remaining[0] == '!' or remaining[0] == '~') { + // Version spec without parens - find the end (space or ;) + var spec_end: usize = remaining.len; + for (remaining, 0..) |c, i| { + if (c == ' ' or c == ';') { + spec_end = i; + break; + } + } + result.version_spec = remaining[0..spec_end]; + remaining = remaining[spec_end..]; + remaining = strings.trim(remaining, &strings.whitespace_chars); + } + } + + // Check for environment markers ; python_version >= "3.8" + if (remaining.len > 0 and remaining[0] == ';') { + result.markers = strings.trim(remaining[1..], &strings.whitespace_chars); + } + + return result; + } + + /// Check if this dependency should be included for the given Python version. + /// Returns false for dependencies that: + /// - Require extras (e.g., "extra == 'socks'") + /// - Have unsatisfied Python version markers + /// - Have unsatisfied platform markers (platform_system, sys_platform) + pub fn isApplicable(self: DependencySpecifier, target: PlatformTarget) bool { + if (self.markers.len == 0) return true; + + // Skip dependencies that require extras (e.g., "; extra == 'socks'") + // These are optional dependencies that the user must explicitly request + if (strings.containsComptime(self.markers, "extra")) return false; + + // Parse python_version markers + // Common formats: python_version >= "3.8", python_version < "3.10" + // For now, we're permissive - include unless we can definitively exclude + if (strings.containsComptime(self.markers, "python_version")) { + // Try to parse simple python_version constraints + // Format: python_version "X.Y" + if (strings.indexOf(self.markers, "python_version")) |idx| { + var marker_remaining = self.markers[idx + "python_version".len ..]; + marker_remaining = strings.trim(marker_remaining, &strings.whitespace_chars); + + // Parse operator + var op: enum { lt, lte, gt, gte, eq, neq } = .gte; + if (strings.hasPrefixComptime(marker_remaining, ">=")) { + op = .gte; + marker_remaining = marker_remaining[2..]; + } else if (strings.hasPrefixComptime(marker_remaining, "<=")) { + op = .lte; + marker_remaining = marker_remaining[2..]; + } else if (strings.hasPrefixComptime(marker_remaining, "==")) { + op = .eq; + marker_remaining = marker_remaining[2..]; + } else if (strings.hasPrefixComptime(marker_remaining, "!=")) { + op = .neq; + marker_remaining = marker_remaining[2..]; + } else if (strings.hasPrefixComptime(marker_remaining, "<")) { + op = .lt; + marker_remaining = marker_remaining[1..]; + } else if (strings.hasPrefixComptime(marker_remaining, ">")) { + op = .gt; + marker_remaining = marker_remaining[1..]; + } + + marker_remaining = strings.trim(marker_remaining, &strings.whitespace_chars); + + // Parse version string (remove quotes) + if (marker_remaining.len > 0 and (marker_remaining[0] == '"' or marker_remaining[0] == '\'')) { + const quote = marker_remaining[0]; + marker_remaining = marker_remaining[1..]; + if (strings.indexOfChar(marker_remaining, quote)) |end| { + const ver_str = marker_remaining[0..end]; + // Parse "X.Y" format + if (strings.indexOfChar(ver_str, '.')) |dot| { + const major = std.fmt.parseInt(u8, ver_str[0..dot], 10) catch return true; + const minor = std.fmt.parseInt(u8, ver_str[dot + 1 ..], 10) catch return true; + + // Compare with current Python version + const current = @as(u16, target.python_version.major) * 100 + target.python_version.minor; + const required = @as(u16, major) * 100 + minor; + + const version_matches = switch (op) { + .lt => current < required, + .lte => current <= required, + .gt => current > required, + .gte => current >= required, + .eq => current == required, + .neq => current != required, + }; + if (!version_matches) return false; + } + } + } + } + } + + // Handle platform_system markers (e.g., platform_system == "Linux") + if (strings.containsComptime(self.markers, "platform_system")) { + const current_platform: []const u8 = switch (target.os) { + .macos => "Darwin", + .linux => "Linux", + .windows => "Windows", + .unknown => "", + }; + + // Check for platform_system == "X" or platform_system != "X" + if (strings.indexOf(self.markers, "platform_system")) |idx| { + var marker_remaining = self.markers[idx + "platform_system".len ..]; + marker_remaining = strings.trim(marker_remaining, &strings.whitespace_chars); + + var is_negated = false; + if (strings.hasPrefixComptime(marker_remaining, "!=")) { + is_negated = true; + marker_remaining = marker_remaining[2..]; + } else if (strings.hasPrefixComptime(marker_remaining, "==")) { + marker_remaining = marker_remaining[2..]; + } else { + // Unknown operator, be permissive + return true; + } + + marker_remaining = strings.trim(marker_remaining, &strings.whitespace_chars); + + // Parse quoted platform string + if (marker_remaining.len > 0 and (marker_remaining[0] == '"' or marker_remaining[0] == '\'')) { + const quote = marker_remaining[0]; + marker_remaining = marker_remaining[1..]; + if (strings.indexOfChar(marker_remaining, quote)) |end| { + const platform_str = marker_remaining[0..end]; + const matches = strings.eql(platform_str, current_platform); + const platform_matches = if (is_negated) !matches else matches; + if (!platform_matches) return false; + } + } + } + } + + // Handle sys_platform markers (e.g., sys_platform == "linux") + if (strings.containsComptime(self.markers, "sys_platform")) { + const current_sys_platform: []const u8 = switch (target.os) { + .macos => "darwin", + .linux => "linux", + .windows => "win32", + .unknown => "", + }; + + if (strings.indexOf(self.markers, "sys_platform")) |idx| { + var marker_remaining = self.markers[idx + "sys_platform".len ..]; + marker_remaining = strings.trim(marker_remaining, &strings.whitespace_chars); + + var is_negated = false; + if (strings.hasPrefixComptime(marker_remaining, "!=")) { + is_negated = true; + marker_remaining = marker_remaining[2..]; + } else if (strings.hasPrefixComptime(marker_remaining, "==")) { + marker_remaining = marker_remaining[2..]; + } else { + return true; + } + + marker_remaining = strings.trim(marker_remaining, &strings.whitespace_chars); + + if (marker_remaining.len > 0 and (marker_remaining[0] == '"' or marker_remaining[0] == '\'')) { + const quote = marker_remaining[0]; + marker_remaining = marker_remaining[1..]; + if (strings.indexOfChar(marker_remaining, quote)) |end| { + const platform_str = marker_remaining[0..end]; + const matches = strings.eql(platform_str, current_sys_platform); + const platform_matches = if (is_negated) !matches else matches; + if (!platform_matches) return false; + } + } + } + } + + // For other markers (implementation, etc.), be permissive + return true; + } + + /// Normalize a Python version (PEP 440) to a semver-compatible format. + /// Strips suffixes like .postN, .devN that semver doesn't understand. + /// Returns the normalized version string length. + pub fn normalizeVersion(version: []const u8, buf: []u8) []const u8 { + // Find and strip Python-specific suffixes: + // - .postN (post-releases) + // - .devN (development releases) + // - +local (local version identifier) + var end = version.len; + + // Strip local version identifier (+...) + if (strings.indexOfChar(version, '+')) |plus_idx| { + end = plus_idx; + } + + // Strip .post, .dev suffixes + const suffixes = [_][]const u8{ ".post", ".dev" }; + for (suffixes) |suffix| { + if (strings.indexOf(version[0..end], suffix)) |suffix_idx| { + end = suffix_idx; + break; + } + } + + const copy_len = @min(end, buf.len); + @memcpy(buf[0..copy_len], version[0..copy_len]); + return buf[0..copy_len]; + } + + /// Normalize a PyPI package name according to PEP 503 + /// - Lowercase + /// - Replace runs of [-_.] with single - + pub fn normalizeName(name: []const u8, buf: []u8) []const u8 { + var write_idx: usize = 0; + var prev_was_separator = false; + + for (name) |c| { + if (write_idx >= buf.len) break; + const is_separator = (c == '-' or c == '_' or c == '.'); + if (is_separator) { + if (!prev_was_separator) { + buf[write_idx] = '-'; + write_idx += 1; + } + prev_was_separator = true; + } else { + buf[write_idx] = std.ascii.toLower(c); + write_idx += 1; + prev_was_separator = false; + } + } + + return buf[0..write_idx]; + } +}; + +test "WheelTag.parse" { + const tag1 = WheelTag.parse("numpy-2.0.0-cp312-cp312-macosx_14_0_arm64.whl"); + try std.testing.expect(tag1 != null); + try std.testing.expectEqualStrings("cp312", tag1.?.python); + try std.testing.expectEqualStrings("cp312", tag1.?.abi); + try std.testing.expectEqualStrings("macosx_14_0_arm64", tag1.?.platform); + + const tag2 = WheelTag.parse("requests-2.32.0-py3-none-any.whl"); + try std.testing.expect(tag2 != null); + try std.testing.expectEqualStrings("py3", tag2.?.python); + try std.testing.expectEqualStrings("none", tag2.?.abi); + try std.testing.expectEqualStrings("any", tag2.?.platform); + + // Not a wheel + try std.testing.expect(WheelTag.parse("requests-2.32.0.tar.gz") == null); +} + +test "PlatformTarget.isPlatformCompatible" { + const mac_arm = PlatformTarget{ + .os = .macos, + .arch = .aarch64, + .python_version = .{ .major = 3, .minor = 12 }, + }; + + try std.testing.expect(mac_arm.isPlatformCompatible("any")); + try std.testing.expect(mac_arm.isPlatformCompatible("macosx_14_0_arm64")); + try std.testing.expect(mac_arm.isPlatformCompatible("macosx_11_0_universal2")); + try std.testing.expect(!mac_arm.isPlatformCompatible("macosx_14_0_x86_64")); + try std.testing.expect(!mac_arm.isPlatformCompatible("linux_x86_64")); +} + +test "DependencySpecifier.parse" { + const spec1 = DependencySpecifier.parse("requests>=2.0,<3.0"); + try std.testing.expect(spec1 != null); + try std.testing.expectEqualStrings("requests", spec1.?.name); + try std.testing.expectEqualStrings(">=2.0,<3.0", spec1.?.version_spec); + + const spec2 = DependencySpecifier.parse("urllib3 (>=1.21.1,<3)"); + try std.testing.expect(spec2 != null); + try std.testing.expectEqualStrings("urllib3", spec2.?.name); + try std.testing.expectEqualStrings(">=1.21.1,<3", spec2.?.version_spec); + + const spec3 = DependencySpecifier.parse("PySocks!=1.5.7,>=1.5.6 ; extra == 'socks'"); + try std.testing.expect(spec3 != null); + try std.testing.expectEqualStrings("PySocks", spec3.?.name); + try std.testing.expectEqualStrings("!=1.5.7,>=1.5.6", spec3.?.version_spec); + try std.testing.expectEqualStrings("extra == 'socks'", spec3.?.markers); +} diff --git a/src/install/resolution.zig b/src/install/resolution.zig index 5256227f01..fea6905349 100644 --- a/src/install/resolution.zig +++ b/src/install/resolution.zig @@ -97,6 +97,9 @@ pub fn ResolutionType(comptime SemverIntType: type) type { // should not happen .dist_tag => error.UnexpectedResolution, .uninitialized => error.UnexpectedResolution, + + // TODO: handle PyPI resolutions + .pypi => error.UnexpectedResolution, }; } @@ -173,6 +176,7 @@ pub fn ResolutionType(comptime SemverIntType: type) type { .catalog => error.InvalidPnpmLockfile, .dist_tag => error.InvalidPnpmLockfile, .uninitialized => error.InvalidPnpmLockfile, + .pypi => error.InvalidPnpmLockfile, }; } @@ -211,6 +215,7 @@ pub fn ResolutionType(comptime SemverIntType: type) type { .single_file_module => builder.count(this.value.single_file_module.slice(buf)), .git => this.value.git.count(buf, Builder, builder), .github => this.value.github.count(buf, Builder, builder), + .pypi => this.value.pypi.count(buf, Builder, builder), else => {}, } } @@ -244,6 +249,9 @@ pub fn ResolutionType(comptime SemverIntType: type) type { .github => Value.init(.{ .github = this.value.github.clone(buf, Builder, builder), }), + .pypi => Value.init(.{ + .pypi = this.value.pypi.clone(buf, Builder, builder), + }), .root => Value.init(.{ .root = {} }), .uninitialized => Value.init(.{ .uninitialized = {} }), else => { @@ -264,6 +272,7 @@ pub fn ResolutionType(comptime SemverIntType: type) type { .single_file_module => .init(.{ .single_file_module = this.value.single_file_module }), .git => .init(.{ .git = this.value.git }), .github => .init(.{ .github = this.value.github }), + .pypi => .init(.{ .pypi = this.value.pypi }), .root => .init(.{ .root = {} }), .uninitialized => .init(.{ .uninitialized = {} }), else => { @@ -370,6 +379,7 @@ pub fn ResolutionType(comptime SemverIntType: type) type { lhs_string_buf, rhs_string_buf, ), + .pypi => lhs.value.pypi.eql(rhs.value.pypi), else => unreachable, }; } @@ -419,6 +429,7 @@ pub fn ResolutionType(comptime SemverIntType: type) type { .path_sep = formatter.path_sep, })}), .single_file_module => try writer.print("module:{s}", .{value.single_file_module.slice(buf)}), + .pypi => try value.pypi.version.fmt(buf).format(writer), else => {}, } } @@ -442,6 +453,7 @@ pub fn ResolutionType(comptime SemverIntType: type) type { .workspace => try writer.print("workspace:{s}", .{formatter.resolution.value.workspace.slice(formatter.buf)}), .symlink => try writer.print("link:{s}", .{formatter.resolution.value.symlink.slice(formatter.buf)}), .single_file_module => try writer.print("module:{s}", .{formatter.resolution.value.single_file_module.slice(formatter.buf)}), + .pypi => try formatter.resolution.value.pypi.version.fmt(formatter.buf).format(writer), else => try writer.writeAll("{}"), } try writer.writeAll(" }"); @@ -471,6 +483,9 @@ pub fn ResolutionType(comptime SemverIntType: type) type { /// URL to a tarball. remote_tarball: String, + /// PyPI package with version and wheel URL + pypi: VersionedURLType(SemverIntType), + single_file_module: String, pub var zero: Value = @bitCast(std.mem.zeroes([@sizeOf(Value)]u8)); @@ -505,6 +520,9 @@ pub fn ResolutionType(comptime SemverIntType: type) type { remote_tarball = 80, + /// PyPI package (wheel URL) + pypi = 88, + // This is a placeholder for now. // But the intent is to eventually support URL imports at the package manager level. // @@ -531,7 +549,7 @@ pub fn ResolutionType(comptime SemverIntType: type) type { } pub fn canEnqueueInstallTask(this: Tag) bool { - return this == .npm or this == .local_tarball or this == .remote_tarball or this == .git or this == .github; + return this == .npm or this == .local_tarball or this == .remote_tarball or this == .git or this == .github or this == .pypi; } }; }; diff --git a/src/js_printer.zig b/src/js_printer.zig index 77f839e2f3..e849743a48 100644 --- a/src/js_printer.zig +++ b/src/js_printer.zig @@ -4650,6 +4650,7 @@ fn NewPrinter( .sqlite, .sqlite_embedded => p.printWhitespacer(ws(" with { type: \"sqlite\" }")), .html => p.printWhitespacer(ws(" with { type: \"html\" }")), .md => p.printWhitespacer(ws(" with { type: \"md\" }")), + .py => p.printWhitespacer(ws(" with { type: \"py\" }")), }; p.printSemicolonAfterStatement(); @@ -4678,6 +4679,7 @@ fn NewPrinter( .html => analyze_transpiled_module.ModuleInfo.FetchParameters.hostDefined(bun.handleOom(mi.str("html"))), .json5 => analyze_transpiled_module.ModuleInfo.FetchParameters.hostDefined(bun.handleOom(mi.str("json5"))), .md => analyze_transpiled_module.ModuleInfo.FetchParameters.hostDefined(bun.handleOom(mi.str("md"))), + .py => analyze_transpiled_module.ModuleInfo.FetchParameters.hostDefined(bun.handleOom(mi.str("py"))), } else .none) else .none; bun.handleOom(mi.requestModule(irp_id, fetch_parameters)); diff --git a/src/libarchive/libarchive.zig b/src/libarchive/libarchive.zig index 72bc5d5b96..dea1489d71 100644 --- a/src/libarchive/libarchive.zig +++ b/src/libarchive/libarchive.zig @@ -32,33 +32,38 @@ pub const BufferReadStream = struct { _ = this.archive.readFree(); } + pub const Format = enum { + tar_gzip, + zip, + }; + pub fn openRead(this: *BufferReadStream) Archive.Result { - // lib.archive_read_set_open_callback(this.archive, this.); - // _ = lib.archive_read_set_read_callback(this.archive, archive_read_callback); - // _ = lib.archive_read_set_seek_callback(this.archive, archive_seek_callback); - // _ = lib.archive_read_set_skip_callback(this.archive, archive_skip_callback); - // _ = lib.archive_read_set_close_callback(this.archive, archive_close_callback); - // // lib.archive_read_set_switch_callback(this.archive, this.archive_s); - // _ = lib.archive_read_set_callback_data(this.archive, this); + return this.openReadWithFormat(.tar_gzip); + } - _ = this.archive.readSupportFormatTar(); - _ = this.archive.readSupportFormatGnutar(); - _ = this.archive.readSupportFilterGzip(); + pub fn openReadWithFormat(this: *BufferReadStream, format: Format) Archive.Result { + switch (format) { + .tar_gzip => { + _ = this.archive.readSupportFormatTar(); + _ = this.archive.readSupportFormatGnutar(); + _ = this.archive.readSupportFilterGzip(); - // Ignore zeroed blocks in the archive, which occurs when multiple tar archives - // have been concatenated together. - // Without this option, only the contents of - // the first concatenated archive would be read. - _ = this.archive.readSetOptions("read_concatenated_archives"); - - // _ = lib.archive_read_support_filter_none(this.archive); + // Ignore zeroed blocks in the archive, which occurs when multiple tar archives + // have been concatenated together. + // Without this option, only the contents of + // the first concatenated archive would be read. + _ = this.archive.readSetOptions("read_concatenated_archives"); + }, + .zip => { + _ = this.archive.readSupportFormatZip(); + _ = this.archive.readSupportFilterNone(); + }, + } const rc = this.archive.readOpenMemory(this.buf); this.reading = @intFromEnum(rc) > -1; - // _ = lib.archive_read_support_compression_all(this.archive); - return rc; } @@ -330,6 +335,7 @@ pub const Archiver = struct { close_handles: bool = true, log: bool = false, npm: bool = false, + format: BufferReadStream.Format = .tar_gzip, }; pub fn extractToDir( @@ -345,7 +351,7 @@ pub const Archiver = struct { var stream: BufferReadStream = undefined; stream.init(file_buffer); defer stream.deinit(); - _ = stream.openRead(); + _ = stream.openReadWithFormat(options.format); const archive = stream.archive; var count: u32 = 0; const dir_fd = dir.fd; diff --git a/src/options.zig b/src/options.zig index 81d0c28d3b..68874f4d85 100644 --- a/src/options.zig +++ b/src/options.zig @@ -636,6 +636,7 @@ pub const Loader = enum(u8) { yaml = 18, json5 = 19, md = 20, + py = 21, pub const Optional = enum(u8) { none = 254, @@ -736,7 +737,7 @@ pub const Loader = enum(u8) { pub fn canBeRunByBun(this: Loader) bool { return switch (this) { - .jsx, .js, .ts, .tsx, .wasm, .bunsh => true, + .jsx, .js, .ts, .tsx, .wasm, .bunsh, .py => true, else => false, }; } @@ -812,6 +813,7 @@ pub const Loader = enum(u8) { .{ "html", .html }, .{ "md", .md }, .{ "markdown", .md }, + .{ "py", .py }, }); pub const api_names = bun.ComptimeStringMap(api.Loader, .{ @@ -841,6 +843,7 @@ pub const Loader = enum(u8) { .{ "html", .html }, .{ "md", .md }, .{ "markdown", .md }, + .{ "py", .py }, }); pub fn fromString(slice_: string) ?Loader { @@ -880,6 +883,7 @@ pub const Loader = enum(u8) { .text => .text, .sqlite_embedded, .sqlite => .sqlite, .md => .md, + .py => .py, }; } @@ -907,6 +911,7 @@ pub const Loader = enum(u8) { .sqlite => .sqlite, .sqlite_embedded => .sqlite_embedded, .md => .md, + .py => .py, _ => .file, }; } @@ -1126,6 +1131,7 @@ const default_loaders_posix = .{ .{ ".html", .html }, .{ ".jsonc", .jsonc }, .{ ".json5", .json5 }, + .{ ".py", .py }, }; const default_loaders_win32 = default_loaders_posix ++ .{ .{ ".sh", .bunsh }, diff --git a/src/transpiler.zig b/src/transpiler.zig index 688c780038..51bd07ada7 100644 --- a/src/transpiler.zig +++ b/src/transpiler.zig @@ -691,6 +691,28 @@ pub const Transpiler = struct { .dataurl, .base64 => { Output.panic("TODO: dataurl, base64", .{}); // TODO }, + .py => { + const entry = transpiler.resolver.caches.fs.readFileWithAllocator( + transpiler.allocator, + transpiler.fs, + file_path.text, + resolve_result.dirname_fd, + false, + null, + ) catch |err| { + transpiler.log.addErrorFmt(null, .Empty, transpiler.allocator, "{s} reading \"{s}\"", .{ @errorName(err), file_path.pretty }) catch {}; + return null; + }; + + output_file.size = entry.contents.len; + + output_file.value = .{ + .buffer = .{ + .allocator = transpiler.allocator, + .bytes = entry.contents, + }, + }; + }, .css => { const alloc = transpiler.allocator; diff --git a/test/js/bun/python/python-interop.test.ts b/test/js/bun/python/python-interop.test.ts new file mode 100644 index 0000000000..3dfb28d4cc --- /dev/null +++ b/test/js/bun/python/python-interop.test.ts @@ -0,0 +1,1890 @@ +import { describe, expect, test } from "bun:test"; +import { bunEnv, bunExe, tempDir } from "harness"; + +describe("Python imports", () => { + test("import simple values from Python", async () => { + using dir = tempDir("python-test", { + "test.py": ` +count = 42 +name = "hello" +pi = 3.14 +flag = True +`, + "test.js": ` +import { count, name, pi, flag } from "./test.py"; +console.log(JSON.stringify({ count, name, pi, flag })); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(JSON.parse(stdout.trim())).toEqual({ + count: 42, + name: "hello", + pi: 3.14, + flag: true, + }); + expect(exitCode).toBe(0); + }); + + test("import and access dict properties", async () => { + using dir = tempDir("python-test", { + "test.py": ` +data = { + 'count': 1, + 'name': 'test' +} +`, + "test.js": ` +import { data } from "./test.py"; +console.log(data.count); +console.log(data.name); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("1\ntest"); + expect(exitCode).toBe(0); + }); + + test("modify dict from JS, visible in Python", async () => { + using dir = tempDir("python-test", { + "test.py": ` +data = {'count': 1} + +def get_count(): + return data['count'] + +def get_new_key(): + return data.get('new_key', 'NOT SET') +`, + "test.js": ` +import { data, get_count, get_new_key } from "./test.py"; + +console.log("before:", get_count()); +data.count = 999; +console.log("after:", get_count()); + +console.log("new_key before:", get_new_key()); +data.new_key = "added from JS"; +console.log("new_key after:", get_new_key()); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("before: 1\nafter: 999\nnew_key before: NOT SET\nnew_key after: added from JS"); + expect(exitCode).toBe(0); + }); + + test("nested object access and mutation", async () => { + using dir = tempDir("python-test", { + "test.py": ` +data = { + 'inner': { + 'value': 42 + } +} + +def get_inner_x(): + return data['inner'].get('x', 'NOT SET') +`, + "test.js": ` +import { data, get_inner_x } from "./test.py"; + +const inner = data.inner; +console.log("inner.value:", inner.value); + +console.log("before:", get_inner_x()); +inner.x = "set from JS"; +console.log("after:", get_inner_x()); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("inner.value: 42\nbefore: NOT SET\nafter: set from JS"); + expect(exitCode).toBe(0); + }); + + test("call Python functions with arguments", async () => { + using dir = tempDir("python-test", { + "test.py": ` +def add(a, b): + return a + b + +def greet(name): + return f"Hello, {name}!" + +def no_args(): + return "called with no args" +`, + "test.js": ` +import { add, greet, no_args } from "./test.py"; + +console.log(add(2, 3)); +console.log(greet("World")); +console.log(no_args()); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("5\nHello, World!\ncalled with no args"); + expect(exitCode).toBe(0); + }); + + test("Python class instantiation and methods", async () => { + using dir = tempDir("python-test", { + "test.py": ` +class Counter: + def __init__(self, start=0): + self.value = start + + def increment(self): + self.value += 1 + return self.value + + def get(self): + return self.value +`, + "test.js": ` +import { Counter } from "./test.py"; + +const counter = new Counter(10); +console.log("initial:", counter.get()); +console.log("after increment:", counter.increment()); +console.log("after increment:", counter.increment()); +console.log("value property:", counter.value); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("initial: 10\nafter increment: 11\nafter increment: 12\nvalue property: 12"); + expect(exitCode).toBe(0); + }); + + test("assign class instance to Python dict", async () => { + using dir = tempDir("python-test", { + "test.py": ` +class Potato: + def __init__(self, name): + self.name = name + + def greet(self): + return f"I am {self.name}" + +data = {} + +def check(): + if 'item' in data: + return f"name={data['item'].name}, greet={data['item'].greet()}" + return "not found" +`, + "test.js": ` +import { Potato, data, check } from "./test.py"; + +console.log("before:", check()); + +const spud = new Potato("Spudnik"); +data.item = spud; + +console.log("after:", check()); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("before: not found\nafter: name=Spudnik, greet=I am Spudnik"); + expect(exitCode).toBe(0); + }); + + test("Python lists", async () => { + using dir = tempDir("python-test", { + "test.py": ` +items = [1, 2, 3, "four", 5.0] + +def get_length(): + return len(items) +`, + "test.js": ` +import { items, get_length } from "./test.py"; + +console.log("length:", get_length()); +console.log("items[0]:", items[0]); +console.log("items[3]:", items[3]); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("length: 5\nitems[0]: 1\nitems[3]: four"); + expect(exitCode).toBe(0); + }); + + test("None becomes null", async () => { + using dir = tempDir("python-test", { + "test.py": ` +nothing = None + +def returns_none(): + return None +`, + "test.js": ` +import { nothing, returns_none } from "./test.py"; + +console.log("nothing:", nothing); +console.log("nothing === null:", nothing === null); +console.log("returns_none():", returns_none()); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("nothing: null\nnothing === null: true\nreturns_none(): null"); + expect(exitCode).toBe(0); + }); + + test("toString and console.log use Python str()", async () => { + using dir = tempDir("python-test", { + "test.py": ` +data = {'name': 'test', 'count': 42} + +class Point: + def __init__(self, x, y): + self.x = x + self.y = y + + def __str__(self): + return f"Point({self.x}, {self.y})" +`, + "test.js": ` +import { data, Point } from "./test.py"; + +// toString() returns Python's str() +console.log(data.toString()); + +// String() coercion +console.log(String(data)); + +// Class with custom __str__ +const p = new Point(3, 4); +console.log(p.toString()); + +// console.log uses Python representation +console.log(data); +console.log(p); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + // Dict toString + expect(lines[0]).toBe("{'name': 'test', 'count': 42}"); + // Dict String() + expect(lines[1]).toBe("{'name': 'test', 'count': 42}"); + // Point toString (custom __str__) + expect(lines[2]).toBe("Point(3, 4)"); + // console.log dict + expect(lines[3]).toBe("{'name': 'test', 'count': 42}"); + // console.log Point + expect(lines[4]).toBe("Point(3, 4)"); + expect(exitCode).toBe(0); + }); + + test("Python print() output appears", async () => { + using dir = tempDir("python-test", { + "test.py": ` +def say_hello(name): + print(f"Hello, {name}!") + return "done" + +def multi_line(): + print("Line 1") + print("Line 2") +`, + "test.js": ` +import { say_hello, multi_line } from "./test.py"; + +console.log("before"); +say_hello("World"); +console.log("middle"); +multi_line(); +console.log("after"); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("before\nHello, World!\nmiddle\nLine 1\nLine 2\nafter"); + expect(exitCode).toBe(0); + }); +}); + +describe("JavaScript imports in Python", () => { + test("import simple values from JavaScript", async () => { + using dir = tempDir("python-js-test", { + "utils.js": ` +export const count = 42; +export const name = "hello"; +export const pi = 3.14; +export const flag = true; +`, + "test.py": ` +import utils + +print(utils.count) +print(utils.name) +print(utils.pi) +print(utils.flag) +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.py"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("42\nhello\n3.14\nTrue"); + expect(exitCode).toBe(0); + }); + + test("call JavaScript functions from Python", async () => { + using dir = tempDir("python-js-test", { + "jsmath.js": ` +export function add(a, b) { + return a + b; +} + +export function greet(name) { + return "Hello, " + name + "!"; +} + +export function noArgs() { + return "called with no args"; +} +`, + "test.py": ` +import jsmath + +print(jsmath.add(2, 3)) +print(jsmath.greet("Python")) +print(jsmath.noArgs()) +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.py"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("5\nHello, Python!\ncalled with no args"); + expect(exitCode).toBe(0); + }); + + test("access JavaScript object properties", async () => { + using dir = tempDir("python-js-test", { + "config.js": ` +export const config = { + name: "MyApp", + version: "1.0.0", + settings: { + debug: true, + port: 3000 + } +}; +`, + "test.py": ` +import config + +print(config.config.name) +print(config.config.version) +print(config.config.settings.debug) +print(config.config.settings.port) +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.py"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("MyApp\n1.0.0\nTrue\n3000"); + expect(exitCode).toBe(0); + }); + + test("subscript access on JavaScript objects", async () => { + using dir = tempDir("python-js-test", { + "data.js": ` +export const obj = { count: 1, name: "test" }; +export const arr = [10, 20, 30]; +`, + "test.py": ` +import data + +print(data.obj['count']) +print(data.obj['name']) +print(data.arr[0]) +print(data.arr[2]) +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.py"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("1\ntest\n10\n30"); + expect(exitCode).toBe(0); + }); + + test("modify JavaScript objects from Python", async () => { + using dir = tempDir("python-js-test", { + "state.js": ` +export const obj = { count: 1 }; + +export function getCount() { + return obj.count; +} +`, + "test.py": ` +import state + +print(state.getCount()) +state.obj['count'] = 999 +print(state.getCount()) +state.obj.count = 42 +print(state.getCount()) +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.py"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("1\n999\n42"); + expect(exitCode).toBe(0); + }); + + test("import TypeScript from Python", async () => { + using dir = tempDir("python-ts-test", { + "utils.ts": ` +export function multiply(a: number, b: number): number { + return a * b; +} + +export const PI: number = 3.14159; + +interface Config { + name: string; +} + +export const config: Config = { name: "TypeScript" }; +`, + "test.py": ` +import utils + +print(utils.multiply(6, 7)) +print(utils.PI) +print(utils.config.name) +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.py"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("42\n3.14159\nTypeScript"); + expect(exitCode).toBe(0); + }); + + test("bidirectional: Python calls JS which calls Python", async () => { + using dir = tempDir("python-bidirectional", { + "helper.py": ` +def double(x): + return x * 2 + +def format_result(value): + return f"Result: {value}" +`, + "processor.js": ` +import { double, format_result } from "./helper.py"; + +export function process(value) { + const doubled = double(value); + return format_result(doubled); +} +`, + "main.py": ` +import processor + +result = processor.process(21) +print(result) +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "main.py"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("Result: 42"); + expect(exitCode).toBe(0); + }); + + test("JavaScript undefined and null become None", async () => { + using dir = tempDir("python-js-null", { + "nulls.js": ` +export const nothing = null; +export const undef = undefined; + +export function returnsNull() { + return null; +} + +export function returnsUndefined() { + return undefined; +} +`, + "test.py": ` +import nulls + +print(nulls.nothing) +print(nulls.undef) +print(nulls.returnsNull()) +print(nulls.returnsUndefined()) +print(nulls.nothing is None) +print(nulls.undef is None) +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.py"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("None\nNone\nNone\nNone\nTrue\nTrue"); + expect(exitCode).toBe(0); + }); + + test("multiple imports of same module use cached version", async () => { + using dir = tempDir("python-multi-import", { + "counter.js": ` +export let count = 0; + +export function increment() { + count++; + return count; +} +`, + "test.py": ` +import counter +import counter as counter2 + +# Both should refer to the same module +print(counter.increment()) +print(counter2.increment()) +print(counter.count) +print(counter2.count) +print(counter is counter2) +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.py"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + // Both imports should share state - count increments from 1 to 2 + expect(stdout.trim()).toBe("1\n2\n2\n2\nTrue"); + expect(exitCode).toBe(0); + }); + + test("__name__ is module name when imported from JS", async () => { + using dir = tempDir("python-name-import", { + "my_module.py": ` +def get_name(): + return __name__ + +module_name = __name__ +`, + "test.js": ` +import { get_name, module_name } from "./my_module.py"; + +console.log("get_name():", get_name()); +console.log("module_name:", module_name); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + // __name__ should be the module name derived from filename (without .py extension) + expect(stdout.trim()).toBe("get_name(): my_module\nmodule_name: my_module"); + expect(exitCode).toBe(0); + }); + + test("__name__ is __main__ when running Python file directly", async () => { + using dir = tempDir("python-name-main", { + "main.py": ` +print("__name__:", __name__) + +if __name__ == "__main__": + print("running as main") +else: + print("imported as module") +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "main.py"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + // When running directly, __name__ should be "__main__" + expect(stdout.trim()).toBe("__name__: __main__\nrunning as main"); + expect(exitCode).toBe(0); + }); + + test("if __name__ == '__main__' block runs only when executed directly", async () => { + using dir = tempDir("python-main-guard", { + "utils.py": ` +def helper(): + return "helper called" + +main_executed = False + +if __name__ == "__main__": + main_executed = True + print("utils.py executed as main") +`, + "test.js": ` +import { helper, main_executed } from "./utils.py"; + +console.log("helper():", helper()); +console.log("main_executed:", main_executed); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + // When imported, the if __name__ == "__main__" block should NOT run + expect(stdout.trim()).toBe("helper(): helper called\nmain_executed: false"); + expect(exitCode).toBe(0); + }); +}); + +describe("Python class instantiation requires new", () => { + test("Python class requires new keyword like JS classes", async () => { + using dir = tempDir("python-new-test", { + "test.py": ` +class Counter: + def __init__(self, start=0): + self.value = start + + def increment(self): + self.value += 1 + return self.value +`, + "test.js": ` +import { Counter } from "./test.py"; + +// Using new should work +const counter = new Counter(10); +console.log("new Counter(10).value:", counter.value); + +// Calling without new should throw +try { + const bad = Counter(10); + console.log("ERROR: Counter(10) should have thrown"); +} catch (e) { + console.log("Counter(10) threw:", e.name); +} +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("new Counter(10).value: 10\nCounter(10) threw: TypeError"); + expect(exitCode).toBe(0); + }); + + test("Python functions do not require new", async () => { + using dir = tempDir("python-new-test", { + "test.py": ` +def add(a, b): + return a + b +`, + "test.js": ` +import { add } from "./test.py"; + +// Functions should work without new +console.log("add(2, 3):", add(2, 3)); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("add(2, 3): 5"); + expect(exitCode).toBe(0); + }); +}); + +describe("Python builtin modules with python: prefix", () => { + test("import pathlib from python:pathlib", async () => { + using dir = tempDir("python-builtin-test", { + "test.js": ` +import pathlib from "python:pathlib"; + +// pathlib.Path should be a callable class +const p = new pathlib.Path("/tmp/test"); +console.log("path:", p.toString()); +console.log("name:", p.name); +console.log("parent:", p.parent.toString()); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("path: /tmp/test\nname: test\nparent: /tmp"); + expect(exitCode).toBe(0); + }); + + test("import named exports from python:pathlib", async () => { + using dir = tempDir("python-builtin-test", { + "test.js": ` +import { Path, PurePath } from "python:pathlib"; + +const p = new Path("/home/user/file.txt"); +console.log("suffix:", p.suffix); +console.log("stem:", p.stem); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("suffix: .txt\nstem: file"); + expect(exitCode).toBe(0); + }); + + test("import json from python:json", async () => { + using dir = tempDir("python-builtin-test", { + "test.js": ` +import json from "python:json"; + +// Test dumps with JS object - works because JS objects become Python dicts +const data = { name: "test", count: 42 }; +const encoded = json.dumps(data); +console.log("encoded:", encoded); + +// Test dumps with JS array - becomes Python list +const arr = [1, 2, "three"]; +const arrEncoded = json.dumps(arr); +console.log("array encoded:", arrEncoded); + +// Test loads - parses JSON string into Python object +const decoded = json.loads('{"hello": "world"}'); +console.log("decoded.hello:", decoded.hello); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe('encoded: {"name": "test", "count": 42}'); + expect(lines[1]).toBe('array encoded: [1, 2, "three"]'); + expect(lines[2]).toBe("decoded.hello: world"); + expect(exitCode).toBe(0); + }); + + test("import os from python:os", async () => { + using dir = tempDir("python-builtin-test", { + "test.js": ` +import os from "python:os"; + +// os.getcwd() should return current working directory +const cwd = os.getcwd(); +console.log("has cwd:", typeof cwd === "string" && cwd.length > 0); + +// os.name should be a string (posix or nt) +console.log("os.name:", os.name); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("has cwd: true"); + expect(lines[1]).toMatch(/os\.name: (posix|nt)/); + expect(exitCode).toBe(0); + }); +}); + +describe("Python/JS shared reference semantics", () => { + test("Python list modified in JS is seen by Python", async () => { + using dir = tempDir("python-shared-ref-test", { + "test.py": ` +def create_list(): + return [1, 2, 3] + +def get_list_length(lst): + return len(lst) + +def get_list_item(lst, index): + return lst[index] +`, + "test.js": ` +const py = await import("./test.py"); + +// Create a Python list +const pyList = py.create_list(); +console.log("initial length:", py.get_list_length(pyList)); +console.log("initial items:", pyList[0], pyList[1], pyList[2]); + +// Modify the list from JS +pyList[3] = 4; // append by index +console.log("after JS modification length:", py.get_list_length(pyList)); +console.log("new item from Python:", py.get_list_item(pyList, 3)); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("initial length: 3"); + expect(lines[1]).toBe("initial items: 1 2 3"); + expect(lines[2]).toBe("after JS modification length: 4"); + expect(lines[3]).toBe("new item from Python: 4"); + expect(exitCode).toBe(0); + }); + + test("Python dict modified in JS is seen by Python", async () => { + using dir = tempDir("python-shared-ref-test", { + "test.py": ` +def create_dict(): + return {"a": 1, "b": 2} + +def get_dict_keys(d): + return sorted(list(d.keys())) + +def get_dict_value(d, key): + return d.get(key) + +def dict_has_key(d, key): + return key in d +`, + "test.js": ` +const py = await import("./test.py"); + +// Create a Python dict +const pyDict = py.create_dict(); +console.log("initial keys:", py.get_dict_keys(pyDict).join(",")); +console.log("initial a:", py.get_dict_value(pyDict, "a")); + +// Modify the dict from JS +pyDict.c = 3; // add new key +pyDict.a = 100; // modify existing key +console.log("after JS modification has c:", py.dict_has_key(pyDict, "c")); +console.log("new value c from Python:", py.get_dict_value(pyDict, "c")); +console.log("modified value a from Python:", py.get_dict_value(pyDict, "a")); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("initial keys: a,b"); + expect(lines[1]).toBe("initial a: 1"); + expect(lines[2]).toBe("after JS modification has c: true"); + expect(lines[3]).toBe("new value c from Python: 3"); + expect(lines[4]).toBe("modified value a from Python: 100"); + expect(exitCode).toBe(0); + }); + + test("JS array modified in Python is seen by JS", async () => { + using dir = tempDir("python-shared-ref-test", { + "test.py": ` +def append_to_list(lst, value): + lst.append(value) + return len(lst) + +def modify_list_item(lst, index, value): + lst[index] = value +`, + "test.js": ` +const py = await import("./test.py"); + +// Create a JS array +const jsArray = [1, 2, 3]; +console.log("initial:", JSON.stringify(jsArray)); + +// Pass to Python and modify +const newLen = py.append_to_list(jsArray, 4); +console.log("after Python append, length from Python:", newLen); +console.log("after Python append, JS sees:", JSON.stringify(jsArray)); + +// Modify an existing item +py.modify_list_item(jsArray, 0, 100); +console.log("after Python modify, JS sees:", JSON.stringify(jsArray)); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("initial: [1,2,3]"); + expect(lines[1]).toBe("after Python append, length from Python: 4"); + expect(lines[2]).toBe("after Python append, JS sees: [1,2,3,4]"); + expect(lines[3]).toBe("after Python modify, JS sees: [100,2,3,4]"); + expect(exitCode).toBe(0); + }); + + test("JS object modified in Python is seen by JS", async () => { + using dir = tempDir("python-shared-ref-test", { + "test.py": ` +def add_key(d, key, value): + d[key] = value + +def modify_key(d, key, value): + d[key] = value + +def delete_key(d, key): + del d[key] +`, + "test.js": ` +const py = await import("./test.py"); + +// Create a JS object +const jsObj = { a: 1, b: 2 }; +console.log("initial:", JSON.stringify(jsObj)); + +// Pass to Python and add a key +py.add_key(jsObj, "c", 3); +console.log("after Python add_key, JS sees:", JSON.stringify(jsObj)); + +// Modify existing key +py.modify_key(jsObj, "a", 100); +console.log("after Python modify_key, JS sees:", JSON.stringify(jsObj)); + +// Delete a key +py.delete_key(jsObj, "b"); +console.log("after Python delete_key, JS sees:", JSON.stringify(jsObj)); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe('initial: {"a":1,"b":2}'); + expect(lines[1]).toBe('after Python add_key, JS sees: {"a":1,"b":2,"c":3}'); + expect(lines[2]).toBe('after Python modify_key, JS sees: {"a":100,"b":2,"c":3}'); + expect(lines[3]).toBe('after Python delete_key, JS sees: {"a":100,"c":3}'); + expect(exitCode).toBe(0); + }); + + test("nested structures maintain shared references", async () => { + using dir = tempDir("python-shared-ref-test", { + "test.py": ` +def modify_nested(obj): + obj["nested"]["value"] = 999 + obj["nested"]["items"].append("from_python") +`, + "test.js": ` +const py = await import("./test.py"); + +// Create a nested JS structure +const jsObj = { + nested: { + value: 1, + items: ["a", "b"] + } +}; +console.log("initial:", JSON.stringify(jsObj)); + +// Python modifies nested properties +py.modify_nested(jsObj); +console.log("after Python modify:", JSON.stringify(jsObj)); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe('initial: {"nested":{"value":1,"items":["a","b"]}}'); + expect(lines[1]).toBe('after Python modify: {"nested":{"value":999,"items":["a","b","from_python"]}}'); + expect(exitCode).toBe(0); + }); +}); + +describe("Python stdlib imports via python: prefix", () => { + test("import collections from python:collections", async () => { + using dir = tempDir("python-stdlib-test", { + "test.js": ` +import collections from "python:collections"; + +// Test Counter +const counter = new collections.Counter(["a", "b", "a", "c", "a", "b"]); +console.log("Counter most_common:", counter.most_common(2).toString()); + +// Test defaultdict (also a class requiring new, needs Python type as factory) +import builtins from "python:builtins"; +const dd = new collections.defaultdict(builtins.int); +dd["key1"] = 1; +console.log("defaultdict:", dd["key1"]); + +// Test deque +const dq = new collections.deque([1, 2, 3]); +dq.append(4); +console.log("deque length:", dq.__len__()); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toContain("Counter most_common:"); + expect(lines[1]).toBe("defaultdict: 1"); + expect(lines[2]).toBe("deque length: 4"); + expect(exitCode).toBe(0); + }); + + test("import datetime from python:datetime", async () => { + using dir = tempDir("python-stdlib-test", { + "test.js": ` +import datetime from "python:datetime"; + +// Create a date +const d = new datetime.date(2024, 1, 15); +console.log("date:", d.toString()); +console.log("year:", d.year); +console.log("month:", d.month); +console.log("day:", d.day); + +// Create a timedelta +const td = new datetime.timedelta(1, 3600); // 1 day + 1 hour +console.log("timedelta days:", td.days); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("date: 2024-01-15"); + expect(lines[1]).toBe("year: 2024"); + expect(lines[2]).toBe("month: 1"); + expect(lines[3]).toBe("day: 15"); + expect(lines[4]).toBe("timedelta days: 1"); + expect(exitCode).toBe(0); + }); + + test("import re from python:re", async () => { + using dir = tempDir("python-stdlib-test", { + "test.js": ` +import re from "python:re"; + +// Test re.match +const match = re.match("(\\\\w+) (\\\\w+)", "Hello World"); +console.log("match group(0):", match.group(0)); +console.log("match group(1):", match.group(1)); +console.log("match group(2):", match.group(2)); + +// Test re.findall +const matches = re.findall("\\\\d+", "foo 123 bar 456"); +console.log("findall:", matches.toString()); + +// Test re.sub +const result = re.sub("\\\\d+", "X", "foo 123 bar 456"); +console.log("sub:", result); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("match group(0): Hello World"); + expect(lines[1]).toBe("match group(1): Hello"); + expect(lines[2]).toBe("match group(2): World"); + expect(lines[3]).toBe("findall: ['123', '456']"); + expect(lines[4]).toBe("sub: foo X bar X"); + expect(exitCode).toBe(0); + }); + + test("import itertools from python:itertools", async () => { + using dir = tempDir("python-stdlib-test", { + "test.js": ` +import itertools from "python:itertools"; + +// Test chain with spread syntax +const chained = [...new itertools.chain([1, 2], [3, 4])]; +console.log("chain:", JSON.stringify(chained)); + +// Test cycle (take first 5 with for-of) +const cycled = []; +let count = 0; +for (const item of new itertools.cycle(["a", "b"])) { + cycled.push(item); + if (++count >= 5) break; +} +console.log("cycle:", JSON.stringify(cycled)); + +// Test permutations with spread +const perms = [...new itertools.permutations([1, 2, 3], 2)]; +console.log("permutations count:", perms.length); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("chain: [1,2,3,4]"); + expect(lines[1]).toBe('cycle: ["a","b","a","b","a"]'); + expect(lines[2]).toBe("permutations count: 6"); + expect(exitCode).toBe(0); + }); + + test("import math from python:math", async () => { + using dir = tempDir("python-stdlib-test", { + "test.js": ` +import math from "python:math"; + +console.log("pi:", math.pi); +console.log("e:", math.e); +console.log("sqrt(16):", math.sqrt(16)); +console.log("ceil(4.2):", math.ceil(4.2)); +console.log("floor(4.8):", math.floor(4.8)); +console.log("factorial(5):", math.factorial(5)); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toMatch(/pi: 3\.14159/); + expect(lines[1]).toMatch(/e: 2\.718/); + expect(lines[2]).toBe("sqrt(16): 4"); + expect(lines[3]).toBe("ceil(4.2): 5"); + expect(lines[4]).toBe("floor(4.8): 4"); + expect(lines[5]).toBe("factorial(5): 120"); + expect(exitCode).toBe(0); + }); + + test("import functools from python:functools", async () => { + using dir = tempDir("python-stdlib-test", { + "test.js": ` +import functools from "python:functools"; + +// Test reduce - works with JS callbacks +const sum = functools.reduce((a, b) => a + b, [1, 2, 3, 4, 5]); +console.log("reduce sum:", sum); + +// Test reduce with initial value +const sum2 = functools.reduce((a, b) => a + b, [1, 2, 3], 10); +console.log("reduce with initial:", sum2); + +// Test partial is a class +const add5 = new functools.partial((a, b) => a + b, 5); +console.log("partial add5(3):", add5(3)); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("reduce sum: 15"); + expect(lines[1]).toBe("reduce with initial: 16"); + expect(lines[2]).toBe("partial add5(3): 8"); + expect(exitCode).toBe(0); + }); +}); + +describe("Async/await interop between Python and JavaScript", () => { + test("JS awaits Python asyncio coroutine", async () => { + using dir = tempDir("python-async-test", { + "async_funcs.py": ` +import asyncio + +async def async_add(a, b): + await asyncio.sleep(0.1) + return a + b + +async def async_greet(name): + await asyncio.sleep(0.05) + return f"Hello, {name}!" +`, + "test.js": ` +import asyncio from "python:asyncio"; +import { async_add, async_greet } from "./async_funcs.py"; + +const start = performance.now(); + +// Await Python coroutine +const result = await async_add(2, 3); +console.log("async_add(2, 3):", result); + +// Another async call +const greeting = await async_greet("World"); +console.log("async_greet:", greeting); + +const elapsed = performance.now() - start; +console.log("elapsed >= 150ms:", elapsed >= 150); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("async_add(2, 3): 5"); + expect(lines[1]).toBe("async_greet: Hello, World!"); + expect(lines[2]).toBe("elapsed >= 150ms: true"); + expect(exitCode).toBe(0); + }); + + test("JS awaits Python asyncio.sleep in parallel", async () => { + using dir = tempDir("python-async-test", { + "test.js": ` +import asyncio from "python:asyncio"; + +const start = performance.now(); + +// Run multiple Python sleeps in parallel +await Promise.all([ + asyncio.sleep(0.2), + asyncio.sleep(0.2), + asyncio.sleep(0.2), +]); + +const elapsed = performance.now() - start; + +// Should complete in ~200ms, not 600ms (parallel, not sequential) +console.log("elapsed < 400ms:", elapsed < 400); +console.log("elapsed >= 200ms:", elapsed >= 200); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("elapsed < 400ms: true\nelapsed >= 200ms: true"); + expect(exitCode).toBe(0); + }); + + test("Python awaits JS Promise (Bun.sleep)", async () => { + using dir = tempDir("python-await-js-test", { + "test.py": ` +import asyncio + +async def test_await(js_sleep, js_double): + # Await JS async function + result = await js_sleep(100) + print(f"jsSleep result: {result}") + + # Await another JS async function + doubled = await js_double(21) + print(f"jsDouble(21): {doubled}") + + # Sequential awaits + start = asyncio.get_event_loop().time() + await js_sleep(100) + await js_sleep(100) + elapsed = asyncio.get_event_loop().time() - start + print(f"sequential >= 200ms: {elapsed >= 0.2}") +`, + "test.js": ` +import { test_await } from "./test.py"; + +async function jsSleep(ms) { + await Bun.sleep(ms); + return \`slept for \${ms}ms\`; +} + +async function jsDouble(n) { + await Bun.sleep(50); + return n * 2; +} + +await test_await(jsSleep, jsDouble); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("jsSleep result: slept for 100ms"); + expect(lines[1]).toBe("jsDouble(21): 42"); + expect(lines[2]).toBe("sequential >= 200ms: True"); + expect(exitCode).toBe(0); + }); + + test("bidirectional async: JS and Python awaiting each other", async () => { + using dir = tempDir("python-bidirectional-async", { + "py_module.py": ` +import asyncio + +async def py_work(seconds): + await asyncio.sleep(seconds) + return "python done" +`, + "test.js": ` +import { py_work } from "./py_module.py"; + +async function jsWork(ms) { + await Bun.sleep(ms); + return "js done"; +} + +const start = performance.now(); + +// Run JS and Python async in parallel +const [pyResult, jsResult] = await Promise.all([ + py_work(0.2), + jsWork(200), +]); + +const elapsed = performance.now() - start; + +console.log("py_work result:", pyResult); +console.log("jsWork result:", jsResult); +console.log("parallel (elapsed < 400ms):", elapsed < 400); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("py_work result: python done"); + expect(lines[1]).toBe("jsWork result: js done"); + expect(lines[2]).toBe("parallel (elapsed < 400ms): true"); + expect(exitCode).toBe(0); + }); + + test("Python interleaved awaits of JS and Python async", async () => { + using dir = tempDir("python-interleaved-async", { + "test.py": ` +import asyncio + +async def run_test(js_sleep): + start = asyncio.get_event_loop().time() + + # Interleaved Python and JS awaits + await asyncio.sleep(0.1) + t1 = asyncio.get_event_loop().time() - start + print(f"after py sleep: {t1:.1f}s") + + await js_sleep(100) + t2 = asyncio.get_event_loop().time() - start + print(f"after js sleep: {t2:.1f}s") + + await asyncio.sleep(0.1) + t3 = asyncio.get_event_loop().time() - start + print(f"after py sleep: {t3:.1f}s") + + elapsed = asyncio.get_event_loop().time() - start + print(f"total ~0.3s: {0.25 < elapsed < 0.4}") +`, + "test.js": ` +import { run_test } from "./test.py"; + +async function jsSleep(ms) { + await Bun.sleep(ms); +} + +await run_test(jsSleep); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("after py sleep: 0.1s"); + expect(lines[1]).toBe("after js sleep: 0.2s"); + expect(lines[2]).toBe("after py sleep: 0.3s"); + expect(lines[3]).toBe("total ~0.3s: True"); + expect(exitCode).toBe(0); + }); +}); + +describe("Python isinstance checks for JS wrappers", () => { + test("JS array passes isinstance(x, list) in Python", async () => { + using dir = tempDir("python-isinstance-test", { + "test.py": ` +def check_list(obj): + return isinstance(obj, list) + +def check_list_and_use(obj): + if isinstance(obj, list): + return f"list with {len(obj)} items" + return "not a list" +`, + "test.js": ` +import { check_list, check_list_and_use } from "./test.py"; + +const jsArray = [1, 2, 3]; + +console.log("isinstance(jsArray, list):", check_list(jsArray)); +console.log("use as list:", check_list_and_use(jsArray)); +console.log("empty array:", check_list([])); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe("isinstance(jsArray, list): true\nuse as list: list with 3 items\nempty array: true"); + expect(exitCode).toBe(0); + }); + + test("JS object passes isinstance(x, dict) in Python", async () => { + using dir = tempDir("python-isinstance-test", { + "test.py": ` +def check_dict(obj): + return isinstance(obj, dict) + +def check_dict_and_use(obj): + if isinstance(obj, dict): + return f"dict with keys: {sorted(obj.keys())}" + return "not a dict" +`, + "test.js": ` +import { check_dict, check_dict_and_use } from "./test.py"; + +const jsObj = { a: 1, b: 2 }; + +console.log("isinstance(jsObj, dict):", check_dict(jsObj)); +console.log("use as dict:", check_dict_and_use(jsObj)); +console.log("empty object:", check_dict({})); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout.trim()).toBe( + "isinstance(jsObj, dict): true\nuse as dict: dict with keys: ['a', 'b']\nempty object: true", + ); + expect(exitCode).toBe(0); + }); + + test("Python list methods work on JS arrays", async () => { + using dir = tempDir("python-list-methods-test", { + "test.py": ` +def use_list_methods(lst): + lst.append(4) + lst.insert(0, 0) + last = lst.pop() + lst.reverse() + return f"after ops: {list(lst)}, popped: {last}" +`, + "test.js": ` +import { use_list_methods } from "./test.py"; + +const jsArray = [1, 2, 3]; +console.log("initial:", JSON.stringify(jsArray)); +const result = use_list_methods(jsArray); +console.log("Python result:", result); +console.log("JS sees:", JSON.stringify(jsArray)); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe("initial: [1,2,3]"); + // After append(4), insert(0,0), pop(), reverse(): [0,1,2,3] -> pop -> [0,1,2,3][:3] reversed = [3,2,1,0] + expect(lines[1]).toBe("Python result: after ops: [3, 2, 1, 0], popped: 4"); + expect(lines[2]).toBe("JS sees: [3,2,1,0]"); + expect(exitCode).toBe(0); + }); + + test("Python dict methods work on JS objects", async () => { + using dir = tempDir("python-dict-methods-test", { + "test.py": ` +def use_dict_methods(d): + d['new_key'] = 'new_value' + d.update({'x': 10, 'y': 20}) + val = d.pop('a', 'not found') + keys = sorted(d.keys()) + return f"keys: {keys}, popped a: {val}" +`, + "test.js": ` +import { use_dict_methods } from "./test.py"; + +const jsObj = { a: 1, b: 2 }; +console.log("initial:", JSON.stringify(jsObj)); +const result = use_dict_methods(jsObj); +console.log("Python result:", result); +console.log("JS sees:", JSON.stringify(jsObj)); +`, + }); + + await using proc = Bun.spawn({ + cmd: [bunExe(), "test.js"], + cwd: String(dir), + env: bunEnv, + stdout: "pipe", + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + const lines = stdout.trim().split("\n"); + expect(lines[0]).toBe('initial: {"a":1,"b":2}'); + expect(lines[1]).toBe("Python result: keys: ['b', 'new_key', 'x', 'y'], popped a: 1"); + expect(lines[2]).toBe('JS sees: {"b":2,"new_key":"new_value","x":10,"y":20}'); + expect(exitCode).toBe(0); + }); +});