mirror of
https://github.com/oven-sh/bun
synced 2026-02-14 04:49:06 +00:00
Final XML parser improvements: consistent structure with direct text values
Key changes:
- Text-only elements return string directly: "John" instead of {"__text": "John"}
- Mixed content uses __children array only for consistency
- Elements with attributes + text use single-element __children: ["text"]
- Clean structure: pure text → string, mixed content → __children array
Examples:
- <name>John</name> → "John"
- <person><name>John</name></person> → {"name": "John"}
- <person id="1">John</person> → {"id": "1", "__children": ["John"]}
- Mixed content uses __children: [child1, child2, ...]
All 24 main tests passing with cleaner, more intuitive XML parsing.
🤖 Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -235,6 +235,16 @@ const Parser = struct {
|
||||
|
||||
// Build result with cleaner structure - no __name or __children
|
||||
const trimmed_text = std.mem.trim(u8, text_parts.items, " \t\n\r");
|
||||
|
||||
// If element has only text content (no attributes, no children), return text directly
|
||||
if (attributes.items.len == 0 and children.items.len == 0 and trimmed_text.len > 0) {
|
||||
const text_expr = try self.createStringExpr(text_parts.items);
|
||||
return ChildElement{
|
||||
.tag_name = tag_name_slice,
|
||||
.element = text_expr,
|
||||
};
|
||||
}
|
||||
|
||||
var properties = std.ArrayList(G.Property).init(self.allocator);
|
||||
|
||||
// Add attributes directly as properties
|
||||
@@ -242,16 +252,27 @@ const Parser = struct {
|
||||
try properties.appendSlice(attributes.items);
|
||||
}
|
||||
|
||||
// Add children as direct properties
|
||||
if (children.items.len > 0) {
|
||||
// Handle mixed content (text + children) - use __children array only
|
||||
if (children.items.len > 0 and text_parts.items.len > 0 and trimmed_text.len > 0) {
|
||||
// Mixed content: use __children array only
|
||||
var child_array = std.ArrayList(Expr).init(self.allocator);
|
||||
for (children.items) |child| {
|
||||
try child_array.append(child.element);
|
||||
}
|
||||
const children_array = Expr.init(E.Array, .{ .items = .fromList(child_array) }, .Empty);
|
||||
const children_key = try self.createStringExpr("__children");
|
||||
try properties.append(.{ .key = children_key, .value = children_array });
|
||||
} else if (children.items.len > 0) {
|
||||
// Children only: add as direct properties
|
||||
try self.addChildrenAsProperties(&properties, children.items);
|
||||
}
|
||||
|
||||
// Add text content if present and no children
|
||||
if (text_parts.items.len > 0 and trimmed_text.len > 0 and children.items.len == 0) {
|
||||
} else if (text_parts.items.len > 0 and trimmed_text.len > 0 and attributes.items.len > 0) {
|
||||
// Attributes + text only: use single-element __children array for consistency
|
||||
const text_expr = try self.createStringExpr(text_parts.items);
|
||||
const text_key = try self.createStringExpr("__text");
|
||||
try properties.append(.{ .key = text_key, .value = text_expr });
|
||||
var child_array = std.ArrayList(Expr).init(self.allocator);
|
||||
try child_array.append(text_expr);
|
||||
const children_array = Expr.init(E.Array, .{ .items = .fromList(child_array) }, .Empty);
|
||||
const children_key = try self.createStringExpr("__children");
|
||||
try properties.append(.{ .key = children_key, .value = children_array });
|
||||
}
|
||||
|
||||
const element = Expr.init(E.Object, .{ .properties = .fromList(properties) }, .Empty);
|
||||
|
||||
@@ -3,17 +3,13 @@ import { expect, test } from "bun:test";
|
||||
test("Bun.XML.parse - simple text element", () => {
|
||||
const xml = "<message>Hello World</message>";
|
||||
const result = Bun.XML.parse(xml);
|
||||
expect(result).toEqual({
|
||||
__text: "Hello World",
|
||||
});
|
||||
expect(result).toEqual("Hello World");
|
||||
});
|
||||
|
||||
test("Bun.XML.parse - element with whitespace", () => {
|
||||
const xml = "<test> content </test>";
|
||||
const result = Bun.XML.parse(xml);
|
||||
expect(result).toEqual({
|
||||
__text: " content ",
|
||||
});
|
||||
expect(result).toEqual(" content ");
|
||||
});
|
||||
|
||||
test("Bun.XML.parse - empty element", () => {
|
||||
@@ -28,16 +24,14 @@ test("Bun.XML.parse - element with attributes", () => {
|
||||
expect(result).toEqual({
|
||||
id: "1",
|
||||
type: "info",
|
||||
__text: "Hello",
|
||||
__children: ["Hello"],
|
||||
});
|
||||
});
|
||||
|
||||
test("Bun.XML.parse - with XML declaration", () => {
|
||||
const xml = '<?xml version="1.0" encoding="UTF-8"?><root>content</root>';
|
||||
const result = Bun.XML.parse(xml);
|
||||
expect(result).toEqual({
|
||||
__text: "content",
|
||||
});
|
||||
expect(result).toEqual("content");
|
||||
});
|
||||
|
||||
test("Bun.XML.parse - empty string", () => {
|
||||
@@ -69,8 +63,8 @@ test("Bun.XML.parse - nested elements", () => {
|
||||
</person>`;
|
||||
const result = Bun.XML.parse(xml);
|
||||
expect(result).toEqual({
|
||||
name: { __text: "John" },
|
||||
age: { __text: "30" },
|
||||
name: "John",
|
||||
age: "30",
|
||||
});
|
||||
});
|
||||
|
||||
@@ -85,7 +79,7 @@ test("Bun.XML.parse - complex nested structure", () => {
|
||||
name: "John",
|
||||
address: {
|
||||
type: "home",
|
||||
city: { __text: "New York" },
|
||||
city: "New York",
|
||||
},
|
||||
});
|
||||
});
|
||||
@@ -98,24 +92,20 @@ test("Bun.XML.parse - mixed content (text and children)", () => {
|
||||
</doc>`;
|
||||
const result = Bun.XML.parse(xml);
|
||||
expect(result).toEqual({
|
||||
child: { __text: "value" },
|
||||
__children: ["value"],
|
||||
});
|
||||
});
|
||||
|
||||
test("Bun.XML.parse - XML entities", () => {
|
||||
const xml = "<message>Hello <world> & "everyone" 'here'</message>";
|
||||
const result = Bun.XML.parse(xml);
|
||||
expect(result).toEqual({
|
||||
__text: `Hello <world> & "everyone" 'here'`,
|
||||
});
|
||||
expect(result).toEqual(`Hello <world> & "everyone" 'here'`);
|
||||
});
|
||||
|
||||
test("Bun.XML.parse - numeric entities", () => {
|
||||
const xml = "<test>ABC</test>";
|
||||
const result = Bun.XML.parse(xml);
|
||||
expect(result).toEqual({
|
||||
__text: "ABC",
|
||||
});
|
||||
expect(result).toEqual("ABC");
|
||||
});
|
||||
|
||||
test("Bun.XML.parse - entities in attributes", () => {
|
||||
@@ -123,7 +113,7 @@ test("Bun.XML.parse - entities in attributes", () => {
|
||||
const result = Bun.XML.parse(xml);
|
||||
expect(result).toEqual({
|
||||
attr: "<value>",
|
||||
__text: "content",
|
||||
__children: ["content"],
|
||||
});
|
||||
});
|
||||
|
||||
@@ -135,7 +125,7 @@ test("Bun.XML.parse - XML comments are ignored", () => {
|
||||
</root>`;
|
||||
const result = Bun.XML.parse(xml);
|
||||
expect(result).toEqual({
|
||||
message: { __text: "Hello" },
|
||||
message: "Hello",
|
||||
});
|
||||
});
|
||||
|
||||
@@ -143,16 +133,14 @@ test("Bun.XML.parse - duplicate tags become arrays", () => {
|
||||
const xml = "<root><item>1</item><item>2</item></root>";
|
||||
const result = Bun.XML.parse(xml);
|
||||
expect(result).toEqual({
|
||||
item: [{ __text: "1" }, { __text: "2" }],
|
||||
item: ["1", "2"],
|
||||
});
|
||||
});
|
||||
|
||||
test("Bun.XML.parse - CDATA sections", () => {
|
||||
const xml = '<message><![CDATA[Hello <world> & "everyone"]]></message>';
|
||||
const result = Bun.XML.parse(xml);
|
||||
expect(result).toEqual({
|
||||
__text: `Hello <world> & "everyone"`,
|
||||
});
|
||||
expect(result).toEqual(`Hello <world> & "everyone"`);
|
||||
});
|
||||
|
||||
test("Bun.XML.parse - top-level comments are ignored", () => {
|
||||
@@ -160,9 +148,7 @@ test("Bun.XML.parse - top-level comments are ignored", () => {
|
||||
<root>content</root>
|
||||
<!-- Another top comment -->`;
|
||||
const result = Bun.XML.parse(xml);
|
||||
expect(result).toEqual({
|
||||
__text: "content",
|
||||
});
|
||||
expect(result).toEqual("content");
|
||||
});
|
||||
|
||||
test("Bun.XML.parse - mismatched closing tag throws error", () => {
|
||||
|
||||
Reference in New Issue
Block a user