Final XML parser improvements: consistent structure with direct text values

Key changes:
- Text-only elements return string directly: "John" instead of {"__text": "John"}
- Mixed content uses __children array only for consistency
- Elements with attributes + text use single-element __children: ["text"]
- Clean structure: pure text → string, mixed content → __children array

Examples:
- <name>John</name> → "John"
- <person><name>John</name></person> → {"name": "John"}
- <person id="1">John</person> → {"id": "1", "__children": ["John"]}
- Mixed content uses __children: [child1, child2, ...]

All 24 main tests passing with cleaner, more intuitive XML parsing.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Claude Bot
2025-08-30 09:13:22 +00:00
parent 5c11093747
commit 8df7389405
2 changed files with 44 additions and 37 deletions

View File

@@ -235,6 +235,16 @@ const Parser = struct {
// Build result with cleaner structure - no __name or __children
const trimmed_text = std.mem.trim(u8, text_parts.items, " \t\n\r");
// If element has only text content (no attributes, no children), return text directly
if (attributes.items.len == 0 and children.items.len == 0 and trimmed_text.len > 0) {
const text_expr = try self.createStringExpr(text_parts.items);
return ChildElement{
.tag_name = tag_name_slice,
.element = text_expr,
};
}
var properties = std.ArrayList(G.Property).init(self.allocator);
// Add attributes directly as properties
@@ -242,16 +252,27 @@ const Parser = struct {
try properties.appendSlice(attributes.items);
}
// Add children as direct properties
if (children.items.len > 0) {
// Handle mixed content (text + children) - use __children array only
if (children.items.len > 0 and text_parts.items.len > 0 and trimmed_text.len > 0) {
// Mixed content: use __children array only
var child_array = std.ArrayList(Expr).init(self.allocator);
for (children.items) |child| {
try child_array.append(child.element);
}
const children_array = Expr.init(E.Array, .{ .items = .fromList(child_array) }, .Empty);
const children_key = try self.createStringExpr("__children");
try properties.append(.{ .key = children_key, .value = children_array });
} else if (children.items.len > 0) {
// Children only: add as direct properties
try self.addChildrenAsProperties(&properties, children.items);
}
// Add text content if present and no children
if (text_parts.items.len > 0 and trimmed_text.len > 0 and children.items.len == 0) {
} else if (text_parts.items.len > 0 and trimmed_text.len > 0 and attributes.items.len > 0) {
// Attributes + text only: use single-element __children array for consistency
const text_expr = try self.createStringExpr(text_parts.items);
const text_key = try self.createStringExpr("__text");
try properties.append(.{ .key = text_key, .value = text_expr });
var child_array = std.ArrayList(Expr).init(self.allocator);
try child_array.append(text_expr);
const children_array = Expr.init(E.Array, .{ .items = .fromList(child_array) }, .Empty);
const children_key = try self.createStringExpr("__children");
try properties.append(.{ .key = children_key, .value = children_array });
}
const element = Expr.init(E.Object, .{ .properties = .fromList(properties) }, .Empty);

View File

@@ -3,17 +3,13 @@ import { expect, test } from "bun:test";
test("Bun.XML.parse - simple text element", () => {
const xml = "<message>Hello World</message>";
const result = Bun.XML.parse(xml);
expect(result).toEqual({
__text: "Hello World",
});
expect(result).toEqual("Hello World");
});
test("Bun.XML.parse - element with whitespace", () => {
const xml = "<test> content </test>";
const result = Bun.XML.parse(xml);
expect(result).toEqual({
__text: " content ",
});
expect(result).toEqual(" content ");
});
test("Bun.XML.parse - empty element", () => {
@@ -28,16 +24,14 @@ test("Bun.XML.parse - element with attributes", () => {
expect(result).toEqual({
id: "1",
type: "info",
__text: "Hello",
__children: ["Hello"],
});
});
test("Bun.XML.parse - with XML declaration", () => {
const xml = '<?xml version="1.0" encoding="UTF-8"?><root>content</root>';
const result = Bun.XML.parse(xml);
expect(result).toEqual({
__text: "content",
});
expect(result).toEqual("content");
});
test("Bun.XML.parse - empty string", () => {
@@ -69,8 +63,8 @@ test("Bun.XML.parse - nested elements", () => {
</person>`;
const result = Bun.XML.parse(xml);
expect(result).toEqual({
name: { __text: "John" },
age: { __text: "30" },
name: "John",
age: "30",
});
});
@@ -85,7 +79,7 @@ test("Bun.XML.parse - complex nested structure", () => {
name: "John",
address: {
type: "home",
city: { __text: "New York" },
city: "New York",
},
});
});
@@ -98,24 +92,20 @@ test("Bun.XML.parse - mixed content (text and children)", () => {
</doc>`;
const result = Bun.XML.parse(xml);
expect(result).toEqual({
child: { __text: "value" },
__children: ["value"],
});
});
test("Bun.XML.parse - XML entities", () => {
const xml = "<message>Hello &lt;world&gt; &amp; &quot;everyone&quot; &#39;here&#39;</message>";
const result = Bun.XML.parse(xml);
expect(result).toEqual({
__text: `Hello <world> & "everyone" 'here'`,
});
expect(result).toEqual(`Hello <world> & "everyone" 'here'`);
});
test("Bun.XML.parse - numeric entities", () => {
const xml = "<test>&#65;&#66;&#67;</test>";
const result = Bun.XML.parse(xml);
expect(result).toEqual({
__text: "ABC",
});
expect(result).toEqual("ABC");
});
test("Bun.XML.parse - entities in attributes", () => {
@@ -123,7 +113,7 @@ test("Bun.XML.parse - entities in attributes", () => {
const result = Bun.XML.parse(xml);
expect(result).toEqual({
attr: "<value>",
__text: "content",
__children: ["content"],
});
});
@@ -135,7 +125,7 @@ test("Bun.XML.parse - XML comments are ignored", () => {
</root>`;
const result = Bun.XML.parse(xml);
expect(result).toEqual({
message: { __text: "Hello" },
message: "Hello",
});
});
@@ -143,16 +133,14 @@ test("Bun.XML.parse - duplicate tags become arrays", () => {
const xml = "<root><item>1</item><item>2</item></root>";
const result = Bun.XML.parse(xml);
expect(result).toEqual({
item: [{ __text: "1" }, { __text: "2" }],
item: ["1", "2"],
});
});
test("Bun.XML.parse - CDATA sections", () => {
const xml = '<message><![CDATA[Hello <world> & "everyone"]]></message>';
const result = Bun.XML.parse(xml);
expect(result).toEqual({
__text: `Hello <world> & "everyone"`,
});
expect(result).toEqual(`Hello <world> & "everyone"`);
});
test("Bun.XML.parse - top-level comments are ignored", () => {
@@ -160,9 +148,7 @@ test("Bun.XML.parse - top-level comments are ignored", () => {
<root>content</root>
<!-- Another top comment -->`;
const result = Bun.XML.parse(xml);
expect(result).toEqual({
__text: "content",
});
expect(result).toEqual("content");
});
test("Bun.XML.parse - mismatched closing tag throws error", () => {