mirror of
https://github.com/oven-sh/bun
synced 2026-02-02 15:08:46 +00:00
feat: add Bun.JSONL.parse() for streaming newline-delimited JSON parsing (#26356)
Adds a built-in JSONL parser implemented in C++ using JavaScriptCore's
optimized JSON parser.
## API
### `Bun.JSONL.parse(input)`
Parse a complete JSONL string or `Uint8Array` and return an array of all
parsed values. Throws on invalid input.
```ts
const results = Bun.JSONL.parse('{"a":1}\n{"b":2}\n');
// [{ a: 1 }, { b: 2 }]
```
### `Bun.JSONL.parseChunk(input, start?, end?)`
Parse as many complete values as possible, returning `{ values, read,
done, error }`. Designed for streaming use cases where input arrives
incrementally.
```ts
const result = Bun.JSONL.parseChunk('{"id":1}\n{"id":2}\n{"id":3');
result.values; // [{ id: 1 }, { id: 2 }]
result.read; // 17
result.done; // false
result.error; // null
```
## Implementation Details
- C++ implementation in `BunObject.cpp` using JSC's `streamingJSONParse`
- ASCII fast path: zero-copy `StringView` for pure ASCII input
- Non-ASCII: uses `fromUTF8ReplacingInvalidSequences` with
`utf16_length_from_utf8` size check to prevent overflow
- UTF-8 BOM automatically skipped for `Uint8Array` input
- Pre-built `Structure` with fixed property offsets for fast result
object creation
- `Symbol.toStringTag = "JSONL"` on the namespace object
- `parseChunk` returns errors in `error` property instead of throwing,
preserving partial results
- Comprehensive boundary checks on start/end parameters
## Tests
234 tests covering:
- Complete and partial/streaming input scenarios
- Error handling and recovery
- UTF-8 multi-byte characters and BOM handling
- start/end boundary security (exhaustive combinations, clamping, OOB
prevention)
- 4 GB input rejection (both ASCII and non-ASCII paths)
- Edge cases (empty input, single values, whitespace, special numbers)
---------
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -25,15 +25,22 @@ describe("doesnt_crash", async () => {
|
||||
{ target: "browser", minify: false },
|
||||
{ target: "browser", minify: true },
|
||||
];
|
||||
let code = "";
|
||||
async function getCode() {
|
||||
if (code) return code;
|
||||
code = await Bun.file(absolute).text();
|
||||
return code;
|
||||
}
|
||||
|
||||
for (const { target, minify } of configs) {
|
||||
test(`${file} - ${minify ? "minify" : "not minify"}`, async () => {
|
||||
test(`${file} - ${minify ? "minify" : "not minify"} - ${target}`, async () => {
|
||||
const timeLog = `Transpiled ${file} - ${minify ? "minify" : "not minify"}`;
|
||||
console.time(timeLog);
|
||||
const { logs, outputs } = await Bun.build({
|
||||
entrypoints: [absolute],
|
||||
minify: minify,
|
||||
target,
|
||||
files: { [absolute]: await getCode() },
|
||||
});
|
||||
console.timeEnd(timeLog);
|
||||
|
||||
@@ -43,6 +50,7 @@ describe("doesnt_crash", async () => {
|
||||
|
||||
expect(outputs.length).toBe(1);
|
||||
const outfile1 = path.join(temp_dir, "file-1" + file).replaceAll("\\", "/");
|
||||
const content1 = await outputs[0].text();
|
||||
|
||||
await Bun.write(outfile1, outputs[0]);
|
||||
|
||||
@@ -53,6 +61,7 @@ describe("doesnt_crash", async () => {
|
||||
const { logs, outputs } = await Bun.build({
|
||||
entrypoints: [outfile1],
|
||||
target,
|
||||
files: { [outfile1]: content1 },
|
||||
minify: minify,
|
||||
});
|
||||
|
||||
|
||||
2112
test/js/bun/jsonl/jsonl-parse.test.ts
Normal file
2112
test/js/bun/jsonl/jsonl-parse.test.ts
Normal file
File diff suppressed because it is too large
Load Diff
@@ -157,4 +157,7 @@ vendor/elysia/test/ws/message.test.ts
|
||||
test/js/node/test/parallel/test-worker-abort-on-uncaught-exception.js
|
||||
|
||||
# TODO: WebCore fixes
|
||||
test/js/web/urlpattern/urlpattern.test.ts
|
||||
test/js/web/urlpattern/urlpattern.test.ts
|
||||
|
||||
# TODO: jsc
|
||||
test/js/bun/jsonl/jsonl-parse.test.ts
|
||||
Reference in New Issue
Block a user