[autofix.ci] apply automated fixes

fix(jsonl): add RELEASE_AND_RETURN for proper exception scope handling
fix(jsonl): replace UNLIKELY macro with [[unlikely]] for Windows compatibility
2026-02-17 06:12:08 +00:00 · 2025-12-30 17:44:28 +00:00 · 2025-12-31 02:42:50 +09:00 · 2025-12-31 01:52:55 +09:00 · 2025-12-30 16:44:22 +00:00 · 2025-12-31 01:38:04 +09:00
9 changed files with 841 additions and 0 deletions
--- a/bench/snippets/jsonl-comparison.ts
+++ b/bench/snippets/jsonl-comparison.ts
@@ -0,0 +1,170 @@
+// Benchmark comparing Bun.file().jsonl() vs TypeScript implementation
+import { bench, group, run } from "mitata";
+import { mkdir, readFile, rm, writeFile } from "node:fs/promises";
+import path from "node:path";
+
+// User's TypeScript implementation
+const UTF8_BOM = "\ufeff";
+
+function stripBOM(content: string): string {
+  return content.startsWith(UTF8_BOM) ? content.slice(1) : content;
+}
+
+async function readJSONLFile<T>(filePath: string): Promise<T[]> {
+  try {
+    let content = await readFile(filePath, "utf8");
+    if (!content.trim()) return [];
+
+    // Strip BOM from the beginning of the file - PowerShell 5.x adds BOM to UTF-8 files
+    content = stripBOM(content);
+
+    return content
+      .split("\n")
+      .filter(line => line.trim())
+      .map(line => {
+        try {
+          return JSON.parse(line) as T;
+        } catch (err) {
+          console.error(`Error parsing line in ${filePath}: ${err}`);
+          return null;
+        }
+      })
+      .filter((entry): entry is T => entry !== null);
+  } catch (err) {
+    console.error(`Error opening file ${filePath}: ${err}`);
+    return [];
+  }
+}
+
+async function readJSONLFileBunJSONL<T>(filePath: string): Promise<T[]> {
+  const result = await Bun.file(filePath).jsonl();
+  return result;
+}
+
+// Alternative TypeScript implementation using Bun.file().text()
+async function readJSONLFileBunText<T>(filePath: string): Promise<T[]> {
+  try {
+    let content = await Bun.file(filePath).text();
+    if (!content.trim()) return [];
+
+    content = stripBOM(content);
+
+    return content
+      .split("\n")
+      .filter(line => line.trim())
+      .map(line => {
+        try {
+          return JSON.parse(line) as T;
+        } catch {
+          return null;
+        }
+      })
+      .filter((entry): entry is T => entry !== null);
+  } catch {
+    return [];
+  }
+}
+
+// Setup test data directory
+const BENCH_DIR = path.join(import.meta.dir, ".jsonl-bench-data");
+
+interface TestRecord {
+  id: number;
+  name: string;
+  email: string;
+  timestamp: number;
+  data: { key: string; value: number };
+}
+
+function generateRecord(i: number): TestRecord {
+  return {
+    id: i,
+    name: `User_${i}`,
+    email: `user${i}@example.com`,
+    timestamp: Date.now(),
+    data: { key: `key_${i}`, value: i * 100 },
+  };
+}
+
+async function setup() {
+  await mkdir(BENCH_DIR, { recursive: true });
+
+  // Generate test files of various sizes
+  const sizes = [10, 100, 1000, 10000, 100000];
+
+  for (const size of sizes) {
+    const lines: string[] = [];
+    for (let i = 0; i < size; i++) {
+      lines.push(JSON.stringify(generateRecord(i)));
+    }
+    await writeFile(path.join(BENCH_DIR, `data-${size}.jsonl`), lines.join("\n") + "\n");
+  }
+
+  // File with BOM
+  const bomContent = "\ufeff" + [0, 1, 2].map(i => JSON.stringify(generateRecord(i))).join("\n") + "\n";
+  await writeFile(path.join(BENCH_DIR, "data-bom.jsonl"), bomContent);
+
+  // File with empty lines and invalid JSON
+  const mixedContent = [
+    JSON.stringify(generateRecord(0)),
+    "",
+    "   ",
+    "invalid json here",
+    JSON.stringify(generateRecord(1)),
+    "\t\t",
+    JSON.stringify(generateRecord(2)),
+  ].join("\n");
+  await writeFile(path.join(BENCH_DIR, "data-mixed.jsonl"), mixedContent);
+
+  // File with CRLF
+  const crlfContent = [0, 1, 2].map(i => JSON.stringify(generateRecord(i))).join("\r\n") + "\r\n";
+  await writeFile(path.join(BENCH_DIR, "data-crlf.jsonl"), crlfContent);
+
+  console.log("Setup complete. Test files created in:", BENCH_DIR);
+}
+
+async function cleanup() {
+  await rm(BENCH_DIR, { recursive: true, force: true });
+}
+
+async function runBenchmarks() {
+  await setup();
+
+  const sizes = [10, 100, 1000, 10000, 100000];
+
+  for (const size of sizes) {
+    const filePath = path.join(BENCH_DIR, `data-${size}.jsonl`);
+
+    group(`JSONL parsing (${size} lines)`, () => {
+      bench("Bun.file().jsonl() [native]", async () => {
+        await readJSONLFileBunJSONL(filePath);
+      });
+
+      bench("readJSONLFile (node:fs)", async () => {
+        await readJSONLFile(filePath);
+      });
+
+      bench("readJSONLFile (Bun.file)", async () => {
+        await readJSONLFileBunText(filePath);
+      });
+    });
+  }
+
+  // Edge cases
+  group("Edge cases - BOM handling", () => {
+    const filePath = path.join(BENCH_DIR, "data-bom.jsonl");
+
+    bench("Bun.file().jsonl() [native]", async () => {
+      await readJSONLFileBunJSONL(filePath);
+    });
+
+    bench("readJSONLFile (node:fs)", async () => {
+      await readJSONLFile(filePath);
+    });
+  });
+
+  await run();
+  await cleanup();
+}
+
+runBenchmarks().catch(console.error);
--- a/bench/snippets/jsonl-memory-bench.ts
+++ b/bench/snippets/jsonl-memory-bench.ts
@@ -0,0 +1,132 @@
+// Benchmark JSONL parsing performance without file I/O overhead
+import { bench, group, run } from "mitata";
+
+interface TestRecord {
+  id: number;
+  name: string;
+  email: string;
+  timestamp: number;
+  data: { key: string; value: number };
+}
+
+function generateRecord(i: number): TestRecord {
+  return {
+    id: i,
+    name: `User_${i}`,
+    email: `user${i}@example.com`,
+    timestamp: Date.now(),
+    data: { key: `key_${i}`, value: i * 100 },
+  };
+}
+
+function generateJSONLContent(lineCount: number): string {
+  const lines: string[] = [];
+  for (let i = 0; i < lineCount; i++) {
+    lines.push(JSON.stringify(generateRecord(i)));
+  }
+  return lines.join("\n") + "\n";
+}
+
+// TypeScript implementation using Blob.text()
+async function parseJSONLWithText<T>(blob: Blob): Promise<T[]> {
+  const content = await blob.text();
+  if (!content.trim()) return [];
+
+  return content
+    .split("\n")
+    .filter(line => line.trim())
+    .map(line => {
+      try {
+        return JSON.parse(line) as T;
+      } catch {
+        return null;
+      }
+    })
+    .filter((entry): entry is T => entry !== null);
+}
+
+// Native Bun.file().jsonl() equivalent via Blob
+async function parseJSONLNative<T>(blob: Blob): Promise<T[]> {
+  return (blob as any).jsonl();
+}
+
+// Sync-like TypeScript implementation (text is already available)
+function parseJSONLSync<T>(content: string): T[] {
+  if (!content.trim()) return [];
+
+  return content
+    .split("\n")
+    .filter(line => line.trim())
+    .map(line => {
+      try {
+        return JSON.parse(line) as T;
+      } catch {
+        return null;
+      }
+    })
+    .filter((entry): entry is T => entry !== null);
+}
+
+async function runBenchmarks() {
+  const sizes = [100, 1000, 10000, 100000];
+
+  for (const size of sizes) {
+    const content = generateJSONLContent(size);
+    const blob = new Blob([content]);
+
+    // Pre-warm the blob text for sync comparison
+    const textContent = await blob.text();
+
+    group(`JSONL parsing ${size} lines (in-memory)`, () => {
+      bench("Blob.jsonl() [native]", async () => {
+        // Create new blob each time to avoid caching effects
+        const b = new Blob([content]);
+        await parseJSONLNative(b);
+      });
+
+      bench("Blob.text() + JS parse", async () => {
+        const b = new Blob([content]);
+        await parseJSONLWithText(b);
+      });
+
+      bench("String split + JSON.parse (sync)", () => {
+        parseJSONLSync(textContent);
+      });
+    });
+  }
+
+  // Test with varying line lengths
+  group("JSONL with large objects (1000 lines)", () => {
+    const largeObjects = Array.from({ length: 1000 }, (_, i) => ({
+      id: i,
+      name: `User_${i}`,
+      description: "A".repeat(500), // 500 char string
+      metadata: {
+        key1: "value1",
+        key2: "value2",
+        key3: "value3",
+        nested: { a: 1, b: 2, c: 3 },
+      },
+    }));
+    const content = largeObjects.map(o => JSON.stringify(o)).join("\n") + "\n";
+    const textContent = content;
+
+    bench("Blob.jsonl() [native]", async () => {
+      const b = new Blob([content]);
+      await parseJSONLNative(b);
+    });
+
+    bench("Blob.text() + JS parse", async () => {
+      const b = new Blob([content]);
+      await parseJSONLWithText(b);
+    });
+
+    bench("String split + JSON.parse (sync)", () => {
+      parseJSONLSync(textContent);
+    });
+  });
+
+  await run();
+}
+
+runBenchmarks().catch(console.error);
--- a/bench/snippets/jsonl-timing-test.js
+++ b/bench/snippets/jsonl-timing-test.js
@@ -0,0 +1,43 @@
+// Test script for JSONL timing
+const fs = require("fs");
+const path = require("path");
+
+const testDir = path.join(__dirname, ".jsonl-bench-data");
+
+// Create test data if it doesn't exist
+if (!fs.existsSync(testDir)) {
+  fs.mkdirSync(testDir, { recursive: true });
+
+  const sizes = [1000, 10000, 100000];
+  for (const size of sizes) {
+    const lines = [];
+    for (let i = 0; i < size; i++) {
+      lines.push(
+        JSON.stringify({
+          id: i,
+          name: `User_${i}`,
+          email: `user${i}@example.com`,
+          timestamp: Date.now(),
+          data: { key: `key_${i}`, value: i * 100 },
+        }),
+      );
+    }
+    fs.writeFileSync(path.join(testDir, `data-${size}.jsonl`), lines.join("\n") + "\n");
+    console.log(`Created data-${size}.jsonl`);
+  }
+}
+
+// Run tests
+async function main() {
+  const sizes = [1000, 10000, 100000];
+
+  for (const size of sizes) {
+    const filePath = path.join(testDir, `data-${size}.jsonl`);
+    console.log(`\n>>> Testing ${size} lines...`);
+
+    const result = await Bun.file(filePath).jsonl();
+    console.log(`Result: ${result.length} items parsed`);
+  }
+}
+
+main().catch(console.error);
--- a/bench/snippets/jsonl-tiny-objects-bench.ts
+++ b/bench/snippets/jsonl-tiny-objects-bench.ts
@@ -0,0 +1,120 @@
+// Benchmark designed to maximize native implementation advantage
+// Small JSON objects = minimal parse time, maximum boundary-crossing overhead ratio
+import { bench, group, run } from "mitata";
+
+// Generate tiny JSON objects - minimal parse time per object
+function generateTinyJSONL(lineCount: number): string {
+  const lines: string[] = [];
+  for (let i = 0; i < lineCount; i++) {
+    lines.push(`{"i":${i}}`);
+  }
+  return lines.join("\n") + "\n";
+}
+
+// Even smaller - just numbers
+function generateNumbersJSONL(lineCount: number): string {
+  const lines: string[] = [];
+  for (let i = 0; i < lineCount; i++) {
+    lines.push(String(i));
+  }
+  return lines.join("\n") + "\n";
+}
+
+// Small strings
+function generateStringsJSONL(lineCount: number): string {
+  const lines: string[] = [];
+  for (let i = 0; i < lineCount; i++) {
+    lines.push(`"s${i}"`);
+  }
+  return lines.join("\n") + "\n";
+}
+
+// TypeScript implementation
+async function parseJSONLWithText<T>(blob: Blob): Promise<T[]> {
+  const content = await blob.text();
+  if (!content.trim()) return [];
+
+  return content
+    .split("\n")
+    .filter(line => line.trim())
+    .map(line => {
+      try {
+        return JSON.parse(line) as T;
+      } catch {
+        return null;
+      }
+    })
+    .filter((entry): entry is T => entry !== null);
+}
+
+// Native Blob.jsonl()
+async function parseJSONLNative<T>(blob: Blob): Promise<T[]> {
+  return (blob as any).jsonl();
+}
+
+async function runBenchmarks() {
+  console.log("=== Native vs JS: Small Objects Benchmark ===");
+  console.log("Goal: Maximize boundary-crossing overhead ratio\n");
+
+  // Test with very high line counts and tiny objects
+  const sizes = [10_000, 50_000, 100_000, 500_000, 1_000_000];
+
+  // Tiny objects: {"i":N}
+  for (const size of sizes) {
+    const content = generateTinyJSONL(size);
+    const sizeKB = (content.length / 1024).toFixed(1);
+
+    group(`Tiny objects {"i":N} - ${size / 1000}k lines (${sizeKB} KB)`, () => {
+      bench("Blob.jsonl() [native]", async () => {
+        const b = new Blob([content]);
+        await parseJSONLNative(b);
+      });
+
+      bench("Blob.text() + JS parse", async () => {
+        const b = new Blob([content]);
+        await parseJSONLWithText(b);
+      });
+    });
+  }
+
+  // Plain numbers - absolute minimum parse time
+  const numberSizes = [100_000, 500_000, 1_000_000];
+  for (const size of numberSizes) {
+    const content = generateNumbersJSONL(size);
+    const sizeKB = (content.length / 1024).toFixed(1);
+
+    group(`Plain numbers - ${size / 1000}k lines (${sizeKB} KB)`, () => {
+      bench("Blob.jsonl() [native]", async () => {
+        const b = new Blob([content]);
+        await parseJSONLNative(b);
+      });
+
+      bench("Blob.text() + JS parse", async () => {
+        const b = new Blob([content]);
+        await parseJSONLWithText(b);
+      });
+    });
+  }
+
+  // Small strings
+  for (const size of numberSizes) {
+    const content = generateStringsJSONL(size);
+    const sizeKB = (content.length / 1024).toFixed(1);
+
+    group(`Small strings "sN" - ${size / 1000}k lines (${sizeKB} KB)`, () => {
+      bench("Blob.jsonl() [native]", async () => {
+        const b = new Blob([content]);
+        await parseJSONLNative(b);
+      });
+
+      bench("Blob.text() + JS parse", async () => {
+        const b = new Blob([content]);
+        await parseJSONLWithText(b);
+      });
+    });
+  }
+
+  await run();
+}
+
+runBenchmarks().catch(console.error);
--- a/src/bun.js/bindings/bindings.cpp
+++ b/src/bun.js/bindings/bindings.cpp
@@ -60,7 +60,9 @@
 #include "JavaScriptCore/JSModuleRecord.h"
 #include "JavaScriptCore/JSNativeStdFunction.h"
 #include "JavaScriptCore/JSONObject.h"
+#include "JavaScriptCore/LiteralParser.h"
 #include "JavaScriptCore/JSObject.h"
+#include <wtf/text/ASCIIFastPath.h>
 #include "JavaScriptCore/JSSet.h"
 #include "JavaScriptCore/Strong.h"
 #include "JavaScriptCore/JSSetIterator.h"
@@ -2195,6 +2197,189 @@ extern "C" JSC::EncodedJSValue ZigString__toJSONObject(const ZigString* strPtr,
    return JSValue::encode(result);
 }

+// Parse JSONL content entirely in C++ - no Zig offset/length arrays needed.
+// Forward declaration for Bun's optimized UTF-8 to string conversion
+extern "C" JSC::EncodedJSValue Bun__encoding__toStringUTF8(const uint8_t* input, size_t len, JSC::JSGlobalObject* globalObject);
+
+// Helper to find newline in byte array using memchr (SIMD-optimized)
+static inline size_t findNewline(const uint8_t* data, size_t start, size_t end)
+{
+    if (start >= end) return notFound;
+    const void* result = memchr(data + start, '\n', end - start);
+    if (result) {
+        return static_cast<const uint8_t*>(result) - data;
+    }
+    return notFound;
+}
+
+// Check if a line is whitespace-only (for 8-bit data)
+static inline bool isWhitespaceOnlyLine8(const Latin1Character* data, size_t start, size_t len)
+{
+    Latin1Character firstChar = data[start];
+    if (firstChar != ' ' && firstChar != '\t') return false;
+    for (size_t i = start; i < start + len; i++) {
+        Latin1Character c = data[i];
+        if (c != ' ' && c != '\t') return false;
+    }
+    return true;
+}
+
+// Uses MarkedArgumentBuffer for GC-safe value collection.
+// Optimized: For ASCII-only data, parses directly from UTF-8 using LiteralParser<Latin1Character>
+// to avoid UTF-16 conversion overhead.
+extern "C" JSC::EncodedJSValue Bun__parseJSONLFromBlob(
+    JSC::JSGlobalObject* globalObject,
+    const uint8_t* data,
+    size_t size)
+{
+    auto& vm = globalObject->vm();
+    auto scope = DECLARE_THROW_SCOPE(vm);
+
+    // Handle BOM (Byte Order Mark)
+    size_t offset = 0;
+    if (size >= 3 && data[0] == 0xEF && data[1] == 0xBB && data[2] == 0xBF) {
+        offset = 3; // UTF-8 BOM
+    }
+
+    if (size <= offset) {
+        RELEASE_AND_RETURN(scope, JSValue::encode(constructEmptyArray(globalObject, nullptr)));
+    }
+
+    const uint8_t* contentStart = data + offset;
+    size_t contentSize = size - offset;
+
+    // Use MarkedArgumentBuffer for GC-safe collection of parsed values
+    MarkedArgumentBuffer args;
+
+    // Check if content is ASCII-only (fast SIMD check)
+    std::span<const uint8_t> contentSpan(contentStart, contentSize);
+    bool isAllASCII = charactersAreAllASCII(contentSpan);
+
+    if (isAllASCII) {
+        // Fast path: ASCII-only data can be parsed directly as Latin1
+        // UTF-8 ASCII bytes are identical to Latin1 encoding
+        const Latin1Character* latin1Data = reinterpret_cast<const Latin1Character*>(contentStart);
+        size_t pos = 0;
+
+        while (pos < contentSize) {
+            // Find newline
+            size_t newlinePos = findNewline(contentStart, pos, contentSize);
+            size_t lineEnd = (newlinePos == notFound) ? contentSize : newlinePos;
+
+            // Handle CRLF
+            if (lineEnd > pos && latin1Data[lineEnd - 1] == '\r') {
+                lineEnd--;
+            }
+
+            size_t lineLen = lineEnd - pos;
+
+            if (lineLen > 0 && !isWhitespaceOnlyLine8(latin1Data, pos, lineLen)) {
+                // Use LiteralParser directly with Latin1 data (8-bit fast path)
+                std::span<const Latin1Character> lineSpan(latin1Data + pos, lineLen);
+                LiteralParser<Latin1Character, JSONReviverMode::Disabled> parser(globalObject, lineSpan, StrictJSON);
+                JSValue parsed = parser.tryLiteralParse();
+
+                if (scope.exception()) {
+                    scope.clearException();
+                } else if (parsed) {
+                    args.append(parsed);
+                }
+            }
+
+            pos = (newlinePos == notFound) ? contentSize : newlinePos + 1;
+        }
+    } else {
+        // Slow path: Contains non-ASCII, need UTF-16 conversion
+        JSValue jsStringValue = JSValue::decode(Bun__encoding__toStringUTF8(contentStart, contentSize, globalObject));
+
+        if (!jsStringValue || !jsStringValue.isString()) {
+            return JSValue::encode(constructEmptyArray(globalObject, nullptr));
+        }
+
+        JSString* jsString = jsCast<JSString*>(jsStringValue);
+        auto fullString = jsString->value(globalObject);
+        RETURN_IF_EXCEPTION(scope, {});
+
+        StringView fullView = fullString;
+        size_t pos = 0;
+        size_t length = fullView.length();
+
+        // Check if the converted string is 8-bit (Latin1)
+        // Even with non-ASCII UTF-8, if all chars fit in Latin1, we can use 8-bit path
+        bool use8BitPath = fullView.is8Bit();
+
+        while (pos < length) {
+            size_t newlinePos = fullView.find('\n', pos);
+            size_t lineEnd = (newlinePos == notFound) ? length : newlinePos;
+
+            if (lineEnd > pos && fullView[lineEnd - 1] == '\r') {
+                lineEnd--;
+            }
+
+            size_t lineLen = lineEnd - pos;
+
+            if (lineLen > 0) {
+                bool isWhitespaceOnly = false;
+                if (use8BitPath) {
+                    Latin1Character firstChar = fullView.span8()[pos];
+                    if (firstChar == ' ' || firstChar == '\t') {
+                        isWhitespaceOnly = true;
+                        for (size_t i = pos; i < pos + lineLen; i++) {
+                            Latin1Character c = fullView.span8()[i];
+                            if (c != ' ' && c != '\t') {
+                                isWhitespaceOnly = false;
+                                break;
+                            }
+                        }
+                    }
+                } else {
+                    UChar firstChar = fullView[pos];
+                    if (firstChar == ' ' || firstChar == '\t') {
+                        isWhitespaceOnly = true;
+                        for (size_t i = pos; i < pos + lineLen; i++) {
+                            UChar c = fullView[i];
+                            if (c != ' ' && c != '\t') {
+                                isWhitespaceOnly = false;
+                                break;
+                            }
+                        }
+                    }
+                }
+
+                if (!isWhitespaceOnly) {
+                    JSValue parsed;
+                    if (use8BitPath) {
+                        // Use LiteralParser directly with 8-bit data
+                        std::span<const Latin1Character> lineSpan(fullView.span8().data() + pos, lineLen);
+                        LiteralParser<Latin1Character, JSONReviverMode::Disabled> parser(globalObject, lineSpan, StrictJSON);
+                        parsed = parser.tryLiteralParse();
+                    } else {
+                        // Use LiteralParser with 16-bit data
+                        std::span<const char16_t> lineSpan(fullView.span16().data() + pos, lineLen);
+                        LiteralParser<char16_t, JSONReviverMode::Disabled> parser(globalObject, lineSpan, StrictJSON);
+                        parsed = parser.tryLiteralParse();
+                    }
+
+                    if (scope.exception()) {
+                        scope.clearException();
+                    } else if (parsed) {
+                        args.append(parsed);
+                    }
+                }
+            }
+
+            pos = (newlinePos == notFound) ? length : newlinePos + 1;
+        }
+    }
+
+    if (args.hasOverflowed()) [[unlikely]] {
+        throwOutOfMemoryError(globalObject, scope);
+        return {};
+    }
+
+    RELEASE_AND_RETURN(scope, JSValue::encode(constructArray(globalObject, static_cast<ArrayAllocationProfile*>(nullptr), args)));
+}
+
 // We used to just throw "Out of memory" as a regular Error with that string.
 //
 // But JSC has some different handling for out of memory errors. So we should
--- a/src/bun.js/bindings/headers.h
+++ b/src/bun.js/bindings/headers.h
@@ -61,6 +61,7 @@ CPP_DECL JSC::EncodedJSValue ZigString__toRangeErrorInstance(const ZigString* ar
 CPP_DECL JSC::EncodedJSValue ZigString__toSyntaxErrorInstance(const ZigString* arg0, JSC::JSGlobalObject* arg1);
 CPP_DECL JSC::EncodedJSValue ZigString__toTypeErrorInstance(const ZigString* arg0, JSC::JSGlobalObject* arg1);
 CPP_DECL JSC::EncodedJSValue ZigString__toValueGC(const ZigString* arg0, JSC::JSGlobalObject* arg1);
+CPP_DECL JSC::EncodedJSValue Bun__parseJSONLFromBlob(JSC::JSGlobalObject* arg0, const uint8_t* arg1, size_t arg2);
 CPP_DECL WebCore::DOMURL* WebCore__DOMURL__cast_(JSC::EncodedJSValue JSValue0, JSC::VM* arg1);
 CPP_DECL BunString WebCore__DOMURL__fileSystemPath(WebCore::DOMURL* arg0, int* errorCode);
 CPP_DECL void WebCore__DOMURL__href_(WebCore::DOMURL* arg0, ZigString* arg1);
--- a/src/bun.js/webcore/Blob.zig
+++ b/src/bun.js/webcore/Blob.zig
@@ -3602,6 +3602,62 @@ pub fn toJSONWithBytes(this: *Blob, global: *JSGlobalObject, raw_bytes: []const
    return ZigString.init(buf).toJSONObject(global);
 }

+// ===== JSONL Support =====
+
+pub fn getJSONL(
+    this: *Blob,
+    globalThis: *jsc.JSGlobalObject,
+    _: *jsc.CallFrame,
+) bun.JSError!jsc.JSValue {
+    return this.getJSONLShare(globalThis);
+}
+
+pub fn getJSONLShare(
+    this: *Blob,
+    globalObject: *jsc.JSGlobalObject,
+) bun.JSTerminated!jsc.JSValue {
+    const store = this.store;
+    if (store) |st| st.ref();
+    defer if (store) |st| st.deref();
+    return jsc.JSPromise.wrap(globalObject, lifetimeWrap(toJSONL, .share), .{ this, globalObject });
+}
+
+pub fn toJSONL(this: *Blob, global: *JSGlobalObject, comptime lifetime: Lifetime) bun.JSError!JSValue {
+    if (this.needsToReadFile()) {
+        return this.doReadFile(toJSONLWithBytes, global);
+    }
+    if (this.isS3()) {
+        return this.doReadFromS3(toJSONLWithBytes, global);
+    }
+    const view_ = this.sharedView();
+    return toJSONLWithBytes(this, global, view_, lifetime);
+}
+
+// Pure C++ JSONL parsing - all processing happens in C++ for efficiency
+extern fn Bun__parseJSONLFromBlob(
+    globalObject: *JSGlobalObject,
+    data: [*]const u8,
+    size: usize,
+) JSValue;
+
+pub fn toJSONLWithBytes(_: *Blob, global: *JSGlobalObject, raw_bytes: []const u8, comptime lifetime: Lifetime) bun.JSError!JSValue {
+    defer if (comptime lifetime == .temporary) bun.default_allocator.free(@constCast(raw_bytes));
+
+    if (raw_bytes.len == 0) {
+        return jsc.JSArray.createEmpty(global, 0);
+    }
+
+    // All processing (BOM handling, line scanning, JSON parsing) happens in C++
+    const result = Bun__parseJSONLFromBlob(global, raw_bytes.ptr, raw_bytes.len);
+
+    // C++ returns .zero on exception
+    if (result == .zero) {
+        return error.JSError;
+    }
+
+    return result;
+}
+
 pub fn toFormDataWithBytes(this: *Blob, global: *JSGlobalObject, buf: []u8, comptime _: Lifetime) JSValue {
    var encoder = this.getFormDataEncoding() orelse return {
        return ZigString.init("Invalid encoding").toErrorInstance(global);
--- a/src/bun.js/webcore/response.classes.ts
+++ b/src/bun.js/webcore/response.classes.ts
@@ -152,6 +152,7 @@ export default [
    proto: {
      text: { fn: "getText", async: true },
      json: { fn: "getJSON", async: true },
+      jsonl: { fn: "getJSONL", async: true },
      arrayBuffer: { fn: "getArrayBuffer", async: true },
      slice: { fn: "getSlice", length: 2 },
      stream: { fn: "getStream", length: 1 },
--- a/test/js/bun/io/file-jsonl.test.ts
+++ b/test/js/bun/io/file-jsonl.test.ts
@@ -0,0 +1,133 @@
+import { describe, expect, test } from "bun:test";
+import { tempDir } from "harness";
+
+describe("Bun.file().jsonl()", () => {
+  test("parses basic JSONL file", async () => {
+    using dir = tempDir("jsonl-basic", {
+      "data.jsonl": '{"a":1}\n{"b":2}\n',
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([{ a: 1 }, { b: 2 }]);
+  });
+
+  test("returns empty array for empty file", async () => {
+    using dir = tempDir("jsonl-empty-file", {
+      "data.jsonl": "",
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([]);
+  });
+
+  test("handles CRLF line endings", async () => {
+    using dir = tempDir("jsonl-crlf", {
+      "data.jsonl": '{"a":1}\r\n{"b":2}\r\n',
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([{ a: 1 }, { b: 2 }]);
+  });
+
+  test("handles last line without newline", async () => {
+    using dir = tempDir("jsonl-no-trailing", {
+      "data.jsonl": '{"a":1}\n{"b":2}',
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([{ a: 1 }, { b: 2 }]);
+  });
+
+  test("skips empty lines", async () => {
+    using dir = tempDir("jsonl-empty-lines", {
+      "data.jsonl": '{"a":1}\n\n{"b":2}\n\n',
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([{ a: 1 }, { b: 2 }]);
+  });
+
+  test("skips whitespace-only lines", async () => {
+    using dir = tempDir("jsonl-whitespace-lines", {
+      "data.jsonl": '{"a":1}\n   \n{"b":2}\n\t\t\n',
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([{ a: 1 }, { b: 2 }]);
+  });
+
+  test("skips invalid JSON lines", async () => {
+    using dir = tempDir("jsonl-invalid", {
+      "data.jsonl": '{"a":1}\ninvalid json\n{"b":2}\n',
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([{ a: 1 }, { b: 2 }]);
+  });
+
+  test("handles BOM", async () => {
+    using dir = tempDir("jsonl-bom", {
+      "data.jsonl": '\ufeff{"a":1}\n{"b":2}\n',
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([{ a: 1 }, { b: 2 }]);
+  });
+
+  test("handles arrays as JSON values", async () => {
+    using dir = tempDir("jsonl-arrays", {
+      "data.jsonl": '[1,2,3]\n["a","b"]\n',
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([
+      [1, 2, 3],
+      ["a", "b"],
+    ]);
+  });
+
+  test("handles strings as JSON values", async () => {
+    using dir = tempDir("jsonl-strings", {
+      "data.jsonl": '"hello"\n"world"\n',
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual(["hello", "world"]);
+  });
+
+  test("handles numbers as JSON values", async () => {
+    using dir = tempDir("jsonl-numbers", {
+      "data.jsonl": "42\n3.14\n-100\n",
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([42, 3.14, -100]);
+  });
+
+  test("handles null and boolean values", async () => {
+    using dir = tempDir("jsonl-primitives", {
+      "data.jsonl": "null\ntrue\nfalse\n",
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([null, true, false]);
+  });
+
+  test("handles nested objects", async () => {
+    using dir = tempDir("jsonl-nested", {
+      "data.jsonl": '{"user":{"name":"John","age":30}}\n{"data":[1,2,3]}\n',
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([{ user: { name: "John", age: 30 } }, { data: [1, 2, 3] }]);
+  });
+
+  test("handles unicode content", async () => {
+    using dir = tempDir("jsonl-unicode", {
+      "data.jsonl": '{"emoji":"\\ud83d\\ude00"}\n{"japanese":"\\u3053\\u3093\\u306b\\u3061\\u306f"}\n',
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([{ emoji: "\ud83d\ude00" }, { japanese: "\u3053\u3093\u306b\u3061\u306f" }]);
+  });
+
+  test("works with Blob directly", async () => {
+    const blob = new Blob(['{"a":1}\n{"b":2}\n']);
+    const result = await blob.jsonl();
+    expect(result).toEqual([{ a: 1 }, { b: 2 }]);
+  });
+
+  test("handles single line without newline", async () => {
+    using dir = tempDir("jsonl-single", {
+      "data.jsonl": '{"only":"one"}',
+    });
+    const result = await Bun.file(`${dir}/data.jsonl`).jsonl();
+    expect(result).toEqual([{ only: "one" }]);
+  });
+});
Author	SHA1	Message	Date
autofix-ci[bot]	a07c319887	[autofix.ci] apply automated fixes	2025-12-30 17:44:28 +00:00
Sosuke Suzuki	f160cc91a2	fix(jsonl): add RELEASE_AND_RETURN for proper exception scope handling	2025-12-31 02:42:50 +09:00
Sosuke Suzuki	7a831b71c9	fix(jsonl): replace UNLIKELY macro with [[unlikely]] for Windows compatibility	2025-12-31 01:52:55 +09:00
autofix-ci[bot]	26ea6a1251	[autofix.ci] apply automated fixes	2025-12-30 16:44:22 +00:00
Sosuke Suzuki	afe0c17dae	perf(jsonl): add ASCII fast path using LiteralParser<Latin1Character> - Use charactersAreAllASCII() for fast SIMD ASCII detection - For ASCII-only data, parse directly as Latin1 to skip UTF-16 conversion - Use memchr for SIMD-optimized newline search - Fall back to UTF-16 path for non-ASCII content	2025-12-31 01:38:04 +09:00
Sosuke Suzuki	765b34992a	perf(jsonl): use Bun's SIMD-accelerated UTF-8 conversion Replace WTF::String::fromUTF8() with Bun__encoding__toStringUTF8() for ~12x faster UTF-8 to UTF-16 conversion. Before: UTF-8 conversion took 68% of total time (35.9ms for 16MB file) After: UTF-8 conversion takes 10% of total time (2.9ms for 16MB file) Native JSONL parsing is now on par with JS blob.text() + JSON.parse().	2025-12-31 00:33:44 +09:00
Sosuke Suzuki	0312ac0285	perf(jsonl): add ASCII fast path to skip UTF-8 conversion For ASCII-only lines (common in JSON), create StringView directly from raw bytes as Latin1 instead of converting through WTF::String::fromUTF8(). Uses word-sized operations to quickly check if line is ASCII by testing 8 bytes at a time for high bit. Also removes timing instrumentation that was adding ~30% overhead.	2025-12-30 22:15:35 +09:00
Sosuke Suzuki	295162ab0c	perf(jsonl): pure C++ implementation with MarkedArgumentBuffer Move all JSONL parsing logic to C++ for better performance: - Use MarkedArgumentBuffer for GC-safe value collection - Scan newlines with memchr (typically SIMD-optimized) - Handle BOM, CRLF, whitespace-only lines in C++ - Simplify Zig code from ~70 lines to ~15 lines Performance improvements: - Small objects 100k lines: 2.41x faster (was 1.24x) - Small strings 500k lines: 2.48x faster (was JS 1.07x faster) - Session files (large JSON): now competitive with JS	2025-12-30 19:46:04 +09:00
Sosuke Suzuki	caeb43f3cd	bench: add JSONL parsing benchmarks - jsonl-comparison.ts: Compare native vs JS parsing with file I/O - jsonl-memory-bench.ts: In-memory benchmark without I/O variance - jsonl-tiny-objects-bench.ts: Stress test with many small objects - jsonl-timing-test.js: Simple timing test	2025-12-30 18:50:07 +09:00
Sosuke Suzuki	7555710057	perf(jsonl): batch parse all lines in single C++ call Reduce Zig↔C++ boundary crossings from N (number of lines) to 1 by parsing all JSON lines in a single Bun__parseJSONLines call. This eliminates per-line overhead from Strong.Optional GC wrappers and improves performance for files with many small JSON objects.	2025-12-30 18:47:42 +09:00
Sosuke Suzuki	ec4c0bf888	fix(jsonl): use Strong.Optional for GC safety Address CodeRabbit review: wrap JSValue in jsc.Strong.Optional when storing in ArrayList to prevent GC from collecting parsed JSON objects during the parsing loop.	2025-12-30 17:15:18 +09:00
Claude Bot	25f334805b	feat(io): add Bun.file().jsonl() for parsing JSONL files Add a new `jsonl()` method to Blob that parses JSONL (JSON Lines) format and returns `Promise<T[]>`. This provides a high-performance way to parse JSONL files. Features: - Returns Promise<T[]> with parsed JSON objects - Automatically removes BOM - Skips empty lines and whitespace-only lines - Silently skips lines with JSON parse errors - Handles both LF and CRLF line endings - Handles files with or without trailing newline - Works with Bun.file() and Blob 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>	2025-12-30 07:42:57 +00:00