mirror of
https://github.com/oven-sh/bun
synced 2026-02-12 20:09:04 +00:00
fix(yaml): double-quoted strings with '...' incorrectly trigger document end error (#23491)
### What does this PR do?
Fixes #23489
The YAML parser was incorrectly treating `...` inside double-quoted
strings as document end markers, causing parse errors for strings
containing ellipsis, particularly affecting internationalized text.
### Example of the bug:
```yaml
balance: "👛 لا تمتلك محفظة... !"
```
This would fail with: `error: Unexpected document end`
### Root cause:
The bug was introduced in commit fcbd57ac48 which attempted to optimize
document marker detection by using `self.line_indent == .none` instead
of tracking newlines with a local flag. However, this check was
incomplete - it didn't track whether we had just processed a newline
character.
### The fix:
Restored the `nl` (newline) flag pattern from the single-quoted scanner
and combined it with the `line_indent` check. Document markers `...` and
`---` are now only recognized when **all** of these conditions are met:
1. We're after a newline (`nl == true`)
2. We're at column 0 (`self.line_indent == .none`)
3. Followed by whitespace or EOF
This allows `...` to appear freely in double-quoted strings while still
correctly recognizing actual document end markers at the start of lines.
### How did you verify your code works?
1. Reproduced the original issue from #23489
2. Applied the fix and verified all test cases pass:
- Original Arabic text with emoji: `"👛 لا تمتلك محفظة... !"`
- Various `...` positions: start, middle, end
- Both single and double quotes
- Multiline strings with indented `...` (issue #22392)
3. Created regression test in `test/regression/issue/23489.test.ts`
4. Verified existing YAML tests still pass (514 pass, up from 513)
cc @dylan-conway for review
---------
Co-authored-by: Claude Bot <claude-bot@bun.sh>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Jarred Sumner <jarred@jarredsumner.com>
Co-authored-by: Dylan Conway <dylan.conway567@gmail.com>
This commit is contained in:
@@ -3117,7 +3117,7 @@ pub fn Parser(comptime enc: Encoding) type {
|
||||
0 => return error.UnexpectedCharacter,
|
||||
|
||||
'.' => {
|
||||
if (nl and self.remainStartsWith("...") and self.isSWhiteOrBCharAt(3)) {
|
||||
if (nl and self.line_indent == .none and self.remainStartsWith("...") and self.isSWhiteOrBCharAt(3)) {
|
||||
return error.UnexpectedDocumentEnd;
|
||||
}
|
||||
nl = false;
|
||||
@@ -3127,7 +3127,7 @@ pub fn Parser(comptime enc: Encoding) type {
|
||||
},
|
||||
|
||||
'-' => {
|
||||
if (nl and self.remainStartsWith("---") and self.isSWhiteOrBCharAt(3)) {
|
||||
if (nl and self.line_indent == .none and self.remainStartsWith("---") and self.isSWhiteOrBCharAt(3)) {
|
||||
return error.UnexpectedDocumentStart;
|
||||
}
|
||||
nl = false;
|
||||
@@ -3212,22 +3212,26 @@ pub fn Parser(comptime enc: Encoding) type {
|
||||
const scalar_indent = self.line_indent;
|
||||
var text: std.ArrayList(enc.unit()) = .init(self.allocator);
|
||||
|
||||
var nl = false;
|
||||
|
||||
next: switch (self.next()) {
|
||||
0 => return error.UnexpectedCharacter,
|
||||
|
||||
'.' => {
|
||||
if (self.line_indent == .none and self.remainStartsWith("...") and self.isSWhiteOrBCharAt(3)) {
|
||||
if (nl and self.line_indent == .none and self.remainStartsWith("...") and self.isSWhiteOrBCharAt(3)) {
|
||||
return error.UnexpectedDocumentEnd;
|
||||
}
|
||||
nl = false;
|
||||
try text.append('.');
|
||||
self.inc(1);
|
||||
continue :next self.next();
|
||||
},
|
||||
|
||||
'-' => {
|
||||
if (self.line_indent == .none and self.remainStartsWith("---") and self.isSWhiteOrBCharAt(3)) {
|
||||
if (nl and self.line_indent == .none and self.remainStartsWith("---") and self.isSWhiteOrBCharAt(3)) {
|
||||
return error.UnexpectedDocumentStart;
|
||||
}
|
||||
nl = false;
|
||||
try text.append('-');
|
||||
self.inc(1);
|
||||
continue :next self.next();
|
||||
@@ -3248,12 +3252,14 @@ pub fn Parser(comptime enc: Encoding) type {
|
||||
return error.UnexpectedCharacter;
|
||||
}
|
||||
}
|
||||
nl = true;
|
||||
continue :next self.next();
|
||||
},
|
||||
|
||||
' ',
|
||||
'\t',
|
||||
=> {
|
||||
nl = false;
|
||||
const off = self.pos;
|
||||
self.inc(1);
|
||||
self.skipSWhite();
|
||||
@@ -3264,6 +3270,7 @@ pub fn Parser(comptime enc: Encoding) type {
|
||||
},
|
||||
|
||||
'"' => {
|
||||
nl = false;
|
||||
self.inc(1);
|
||||
return .scalar(.{
|
||||
.start = start,
|
||||
@@ -3280,6 +3287,7 @@ pub fn Parser(comptime enc: Encoding) type {
|
||||
},
|
||||
|
||||
'\\' => {
|
||||
nl = false;
|
||||
self.inc(1);
|
||||
switch (self.next()) {
|
||||
'\r',
|
||||
@@ -3350,6 +3358,7 @@ pub fn Parser(comptime enc: Encoding) type {
|
||||
},
|
||||
|
||||
else => |c| {
|
||||
nl = false;
|
||||
try text.append(c);
|
||||
self.inc(1);
|
||||
continue :next self.next();
|
||||
|
||||
@@ -494,6 +494,58 @@ document: 2
|
||||
expect(YAML.parse(yaml)).toEqual([{ document: 1 }, { document: 2 }]);
|
||||
});
|
||||
|
||||
test("document markers in quoted strings", () => {
|
||||
const inputs = [
|
||||
{ expected: "hi ... hello", input: '"hi ... hello"' },
|
||||
{ expected: "hi ... hello", input: "'hi ... hello'" },
|
||||
{ expected: { foo: "hi ... hello" }, input: 'foo: "hi ... hello"' },
|
||||
{ expected: { foo: "hi ... hello" }, input: "foo: 'hi ... hello'" },
|
||||
{
|
||||
expected: "hi ... hello",
|
||||
input: `"hi
|
||||
...
|
||||
hello"`,
|
||||
},
|
||||
{
|
||||
expected: "hi ... hello",
|
||||
input: `'hi
|
||||
...
|
||||
hello'`,
|
||||
},
|
||||
{
|
||||
expected: { foo: "hi ... hello" },
|
||||
input: `foo: "hi
|
||||
...
|
||||
hello"`,
|
||||
},
|
||||
{
|
||||
expected: { foo: "hi ... hello" },
|
||||
input: `foo: 'hi
|
||||
...
|
||||
hello'`,
|
||||
},
|
||||
{
|
||||
expected: { foo: { bar: "hi ... hello" } },
|
||||
input: `foo:
|
||||
bar: "hi
|
||||
...
|
||||
hello"`,
|
||||
},
|
||||
{
|
||||
expected: { foo: { bar: "hi ... hello" } },
|
||||
input: `foo:
|
||||
bar: 'hi
|
||||
...
|
||||
hello'`,
|
||||
},
|
||||
];
|
||||
|
||||
for (const { input, expected } of inputs) {
|
||||
expect(YAML.parse(input)).toEqual(expected);
|
||||
expect(YAML.parse(YAML.stringify(YAML.parse(input)))).toEqual(expected);
|
||||
}
|
||||
});
|
||||
|
||||
test("handles multiline strings", () => {
|
||||
const yaml = `
|
||||
literal: |
|
||||
|
||||
58
test/regression/issue/23489.test.ts
Normal file
58
test/regression/issue/23489.test.ts
Normal file
@@ -0,0 +1,58 @@
|
||||
import { YAML } from "bun";
|
||||
import { expect, test } from "bun:test";
|
||||
import { bunEnv, bunExe, tempDir } from "harness";
|
||||
|
||||
test("YAML double-quoted strings with ... should not trigger document end error - issue #23489", () => {
|
||||
// Test the original failing case with Arabic text and emoji
|
||||
const yaml1 = 'balance_dont_have_wallet: "👛 لا تمتلك محفظة... !"';
|
||||
const result1 = YAML.parse(yaml1);
|
||||
expect(result1).toEqual({
|
||||
balance_dont_have_wallet: "👛 لا تمتلك محفظة... !",
|
||||
});
|
||||
|
||||
// Test various patterns of ... in double-quoted strings
|
||||
const yaml2 = `test1: "this has ... dots"
|
||||
test2: "... at start"
|
||||
test3: "at end ..."
|
||||
test4: "👛 ... with emoji"`;
|
||||
const result2 = YAML.parse(yaml2);
|
||||
expect(result2).toEqual({
|
||||
test1: "this has ... dots",
|
||||
test2: "... at start",
|
||||
test3: "at end ...",
|
||||
test4: "👛 ... with emoji",
|
||||
});
|
||||
|
||||
// Test that both single and double quotes work
|
||||
const yaml3 = `single: 'this has ... dots'
|
||||
double: "this has ... dots"`;
|
||||
const result3 = YAML.parse(yaml3);
|
||||
expect(result3).toEqual({
|
||||
single: "this has ... dots",
|
||||
double: "this has ... dots",
|
||||
});
|
||||
});
|
||||
|
||||
test("YAML import with double-quoted strings containing ... - issue #23489", async () => {
|
||||
using dir = tempDir("yaml-ellipsis", {
|
||||
"test.yml": 'balance: "👛 لا تمتلك محفظة... !"',
|
||||
"test.ts": `
|
||||
import yaml from "./test.yml";
|
||||
console.log(JSON.stringify(yaml));
|
||||
`,
|
||||
});
|
||||
|
||||
await using proc = Bun.spawn({
|
||||
cmd: [bunExe(), "test.ts"],
|
||||
env: bunEnv,
|
||||
cwd: String(dir),
|
||||
stdout: "pipe",
|
||||
stderr: "pipe",
|
||||
});
|
||||
|
||||
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
|
||||
|
||||
expect(stderr).not.toContain("Unexpected document end");
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout.trim()).toBe('{"balance":"👛 لا تمتلك محفظة... !"}');
|
||||
});
|
||||
Reference in New Issue
Block a user