fix(yaml): double-quoted strings with '...' incorrectly trigger document end error (#23491)

### What does this PR do?

Fixes #23489

The YAML parser was incorrectly treating `...` inside double-quoted
strings as document end markers, causing parse errors for strings
containing ellipsis, particularly affecting internationalized text.

### Example of the bug:
```yaml
balance: "👛 لا تمتلك محفظة... !"
```

This would fail with: `error: Unexpected document end`

### Root cause:

The bug was introduced in commit fcbd57ac48 which attempted to optimize
document marker detection by using `self.line_indent == .none` instead
of tracking newlines with a local flag. However, this check was
incomplete - it didn't track whether we had just processed a newline
character.

### The fix:

Restored the `nl` (newline) flag pattern from the single-quoted scanner
and combined it with the `line_indent` check. Document markers `...` and
`---` are now only recognized when **all** of these conditions are met:

1. We're after a newline (`nl == true`)
2. We're at column 0 (`self.line_indent == .none`)
3. Followed by whitespace or EOF

This allows `...` to appear freely in double-quoted strings while still
correctly recognizing actual document end markers at the start of lines.

### How did you verify your code works?

1. Reproduced the original issue from #23489
2. Applied the fix and verified all test cases pass:
   - Original Arabic text with emoji: `"👛 لا تمتلك محفظة... !"`
   - Various `...` positions: start, middle, end
   - Both single and double quotes
   - Multiline strings with indented `...` (issue #22392)
3. Created regression test in `test/regression/issue/23489.test.ts`
4. Verified existing YAML tests still pass (514 pass, up from 513)

cc @dylan-conway for review

---------

Co-authored-by: Claude Bot <claude-bot@bun.sh>
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Jarred Sumner <jarred@jarredsumner.com>
Co-authored-by: Dylan Conway <dylan.conway567@gmail.com>
This commit is contained in:
robobun
2025-10-20 14:19:22 -07:00
committed by GitHub
parent abb85018df
commit ebc0cfeacd
3 changed files with 123 additions and 4 deletions

View File

@@ -3117,7 +3117,7 @@ pub fn Parser(comptime enc: Encoding) type {
0 => return error.UnexpectedCharacter,
'.' => {
if (nl and self.remainStartsWith("...") and self.isSWhiteOrBCharAt(3)) {
if (nl and self.line_indent == .none and self.remainStartsWith("...") and self.isSWhiteOrBCharAt(3)) {
return error.UnexpectedDocumentEnd;
}
nl = false;
@@ -3127,7 +3127,7 @@ pub fn Parser(comptime enc: Encoding) type {
},
'-' => {
if (nl and self.remainStartsWith("---") and self.isSWhiteOrBCharAt(3)) {
if (nl and self.line_indent == .none and self.remainStartsWith("---") and self.isSWhiteOrBCharAt(3)) {
return error.UnexpectedDocumentStart;
}
nl = false;
@@ -3212,22 +3212,26 @@ pub fn Parser(comptime enc: Encoding) type {
const scalar_indent = self.line_indent;
var text: std.ArrayList(enc.unit()) = .init(self.allocator);
var nl = false;
next: switch (self.next()) {
0 => return error.UnexpectedCharacter,
'.' => {
if (self.line_indent == .none and self.remainStartsWith("...") and self.isSWhiteOrBCharAt(3)) {
if (nl and self.line_indent == .none and self.remainStartsWith("...") and self.isSWhiteOrBCharAt(3)) {
return error.UnexpectedDocumentEnd;
}
nl = false;
try text.append('.');
self.inc(1);
continue :next self.next();
},
'-' => {
if (self.line_indent == .none and self.remainStartsWith("---") and self.isSWhiteOrBCharAt(3)) {
if (nl and self.line_indent == .none and self.remainStartsWith("---") and self.isSWhiteOrBCharAt(3)) {
return error.UnexpectedDocumentStart;
}
nl = false;
try text.append('-');
self.inc(1);
continue :next self.next();
@@ -3248,12 +3252,14 @@ pub fn Parser(comptime enc: Encoding) type {
return error.UnexpectedCharacter;
}
}
nl = true;
continue :next self.next();
},
' ',
'\t',
=> {
nl = false;
const off = self.pos;
self.inc(1);
self.skipSWhite();
@@ -3264,6 +3270,7 @@ pub fn Parser(comptime enc: Encoding) type {
},
'"' => {
nl = false;
self.inc(1);
return .scalar(.{
.start = start,
@@ -3280,6 +3287,7 @@ pub fn Parser(comptime enc: Encoding) type {
},
'\\' => {
nl = false;
self.inc(1);
switch (self.next()) {
'\r',
@@ -3350,6 +3358,7 @@ pub fn Parser(comptime enc: Encoding) type {
},
else => |c| {
nl = false;
try text.append(c);
self.inc(1);
continue :next self.next();

View File

@@ -494,6 +494,58 @@ document: 2
expect(YAML.parse(yaml)).toEqual([{ document: 1 }, { document: 2 }]);
});
test("document markers in quoted strings", () => {
const inputs = [
{ expected: "hi ... hello", input: '"hi ... hello"' },
{ expected: "hi ... hello", input: "'hi ... hello'" },
{ expected: { foo: "hi ... hello" }, input: 'foo: "hi ... hello"' },
{ expected: { foo: "hi ... hello" }, input: "foo: 'hi ... hello'" },
{
expected: "hi ... hello",
input: `"hi
...
hello"`,
},
{
expected: "hi ... hello",
input: `'hi
...
hello'`,
},
{
expected: { foo: "hi ... hello" },
input: `foo: "hi
...
hello"`,
},
{
expected: { foo: "hi ... hello" },
input: `foo: 'hi
...
hello'`,
},
{
expected: { foo: { bar: "hi ... hello" } },
input: `foo:
bar: "hi
...
hello"`,
},
{
expected: { foo: { bar: "hi ... hello" } },
input: `foo:
bar: 'hi
...
hello'`,
},
];
for (const { input, expected } of inputs) {
expect(YAML.parse(input)).toEqual(expected);
expect(YAML.parse(YAML.stringify(YAML.parse(input)))).toEqual(expected);
}
});
test("handles multiline strings", () => {
const yaml = `
literal: |

View File

@@ -0,0 +1,58 @@
import { YAML } from "bun";
import { expect, test } from "bun:test";
import { bunEnv, bunExe, tempDir } from "harness";
test("YAML double-quoted strings with ... should not trigger document end error - issue #23489", () => {
// Test the original failing case with Arabic text and emoji
const yaml1 = 'balance_dont_have_wallet: "👛 لا تمتلك محفظة... !"';
const result1 = YAML.parse(yaml1);
expect(result1).toEqual({
balance_dont_have_wallet: "👛 لا تمتلك محفظة... !",
});
// Test various patterns of ... in double-quoted strings
const yaml2 = `test1: "this has ... dots"
test2: "... at start"
test3: "at end ..."
test4: "👛 ... with emoji"`;
const result2 = YAML.parse(yaml2);
expect(result2).toEqual({
test1: "this has ... dots",
test2: "... at start",
test3: "at end ...",
test4: "👛 ... with emoji",
});
// Test that both single and double quotes work
const yaml3 = `single: 'this has ... dots'
double: "this has ... dots"`;
const result3 = YAML.parse(yaml3);
expect(result3).toEqual({
single: "this has ... dots",
double: "this has ... dots",
});
});
test("YAML import with double-quoted strings containing ... - issue #23489", async () => {
using dir = tempDir("yaml-ellipsis", {
"test.yml": 'balance: "👛 لا تمتلك محفظة... !"',
"test.ts": `
import yaml from "./test.yml";
console.log(JSON.stringify(yaml));
`,
});
await using proc = Bun.spawn({
cmd: [bunExe(), "test.ts"],
env: bunEnv,
cwd: String(dir),
stdout: "pipe",
stderr: "pipe",
});
const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]);
expect(stderr).not.toContain("Unexpected document end");
expect(exitCode).toBe(0);
expect(stdout.trim()).toBe('{"balance":"👛 لا تمتلك محفظة... !"}');
});