From 0480d55a676e8f4c960042e5485e92aa079434a9 Mon Sep 17 00:00:00 2001 From: Dylan Conway Date: Wed, 19 Nov 2025 21:20:55 -0800 Subject: [PATCH] fix(YAML): handle exponential merge keys (#24729) ### What does this PR do? Fixes ENG-21490 ### How did you verify your code works? Added a test that would previously fail due to timeout. It also confirms the parsed result is correct. --------- Co-authored-by: taylor.fish --- src/collections/baby_list.zig | 21 ++-- src/interchange/yaml.zig | 187 ++++++++++++++++------------------ test/js/bun/yaml/yaml.test.ts | 125 +++++++++++++++++++++++ 3 files changed, 223 insertions(+), 110 deletions(-) diff --git a/src/collections/baby_list.zig b/src/collections/baby_list.zig index 80237c0c3f..bac768a269 100644 --- a/src/collections/baby_list.zig +++ b/src/collections/baby_list.zig @@ -53,13 +53,6 @@ pub fn BabyList(comptime Type: type) type { const unsupported_arg_msg = "unsupported argument to `moveFromList`: *" ++ @typeName(ListType); - const items = if (comptime @hasField(ListType, "items")) - list_ptr.items - else if (comptime std.meta.hasFn(ListType, "slice")) - list_ptr.slice() - else - @compileError(unsupported_arg_msg); - const capacity = if (comptime @hasField(ListType, "capacity")) list_ptr.capacity else if (comptime @hasField(ListType, "cap")) @@ -69,6 +62,15 @@ pub fn BabyList(comptime Type: type) type { else @compileError(unsupported_arg_msg); + const items = if (comptime std.meta.hasFn(ListType, "moveToUnmanaged")) + list_ptr.moveToUnmanaged().items + else if (comptime @hasField(ListType, "items")) + list_ptr.items + else if (comptime std.meta.hasFn(ListType, "slice")) + list_ptr.slice() + else + @compileError(unsupported_arg_msg); + if (comptime Environment.allow_assert) { bun.assert(items.len <= capacity); } @@ -88,7 +90,10 @@ pub fn BabyList(comptime Type: type) type { list_ptr.* = .empty; } else { this.#allocator.set(bun.allocators.asStd(allocator)); - list_ptr.* = .init(allocator); + // `moveToUnmanaged` already cleared the old list. + if (comptime !std.meta.hasFn(ListType, "moveToUnmanaged")) { + list_ptr.* = .init(allocator); + } } return this; } diff --git a/src/interchange/yaml.zig b/src/interchange/yaml.zig index 7aab24b11b..048fc86d53 100644 --- a/src/interchange/yaml.zig +++ b/src/interchange/yaml.zig @@ -780,7 +780,7 @@ pub fn Parser(comptime enc: Encoding) type { const mapping_line = self.token.line; _ = mapping_line; - var props: std.array_list.Managed(G.Property) = .init(self.allocator); + var props: MappingProps = .init(self.allocator); { try self.context.set(.flow_in); @@ -833,39 +833,7 @@ pub fn Parser(comptime enc: Encoding) type { }); } else { const value = try self.parseNode(.{}); - - append: { - switch (key.data) { - .e_string => |key_string| { - if (key_string.eqlComptime("<<")) { - switch (value.data) { - .e_object => |value_obj| { - try props.appendSlice(value_obj.properties.slice()); - break :append; - }, - .e_array => |value_arr| { - for (value_arr.slice()) |item| { - switch (item.data) { - .e_object => |item_obj| { - try props.appendSlice(item_obj.properties.slice()); - }, - else => {}, - } - } - break :append; - }, - else => {}, - } - } - }, - else => {}, - } - - try props.append(.{ - .key = key, - .value = value, - }); - } + try props.appendMaybeMerge(key, value); } if (self.token.data == .collect_entry) { @@ -880,7 +848,7 @@ pub fn Parser(comptime enc: Encoding) type { try self.scan(.{}); - return .init(E.Object, .{ .properties = .moveFromList(&props) }, mapping_start.loc()); + return .init(E.Object, .{ .properties = props.moveList() }, mapping_start.loc()); } fn parseBlockSequence(self: *@This()) ParseError!Expr { @@ -994,6 +962,83 @@ pub fn Parser(comptime enc: Encoding) type { return .init(E.Array, .{ .items = .moveFromList(&seq) }, sequence_start.loc()); } + /// Should only be used with expressions created with the YAML parser. It assumes + /// only null, boolean, number, string, array, object are possible. It also only + /// does pointer comparison with arrays and objects (so exponential merges are avoided) + fn yamlMergeKeyExprEql(l: Expr, r: Expr) bool { + if (std.meta.activeTag(l.data) != std.meta.activeTag(r.data)) { + return false; + } + + return switch (l.data) { + .e_null => true, + .e_boolean => |l_boolean| l_boolean.value == r.data.e_boolean.value, + .e_number => |l_number| l_number.value == r.data.e_number.value, + .e_string => |l_string| l_string.eql(E.String, r.data.e_string), + + .e_array => |l_array| l_array == r.data.e_array, + .e_object => |l_object| l_object == r.data.e_object, + + else => false, + }; + } + + const MappingProps = struct { + #list: bun.collections.ArrayList(G.Property), + + pub fn init(allocator: std.mem.Allocator) MappingProps { + return .{ .#list = .initIn(allocator) }; + } + + pub fn merge(self: *MappingProps, merge_props: []const G.Property) OOM!void { + try self.#list.ensureUnusedCapacity(merge_props.len); + var iter = std.mem.reverseIterator(merge_props); + next_merge_prop: while (iter.next()) |merge_prop| { + const merge_key = merge_prop.key.?; + for (self.#list.items()) |existing_prop| { + const existing_key = existing_prop.key.?; + if (yamlMergeKeyExprEql(existing_key, merge_key)) { + continue :next_merge_prop; + } + } + self.#list.appendAssumeCapacity(merge_prop); + } + } + + pub fn append(self: *MappingProps, prop: G.Property) OOM!void { + try self.#list.append(prop); + } + + pub fn appendMaybeMerge(self: *MappingProps, key: Expr, value: Expr) OOM!void { + if (switch (key.data) { + .e_string => |key_str| !key_str.eqlComptime("<<"), + else => true, + }) { + return self.#list.append(.{ .key = key, .value = value }); + } + + return switch (value.data) { + .e_object => |value_obj| self.merge(value_obj.properties.slice()), + .e_array => |value_arr| { + for (value_arr.items.slice()) |item| { + const item_obj = switch (item.data) { + .e_object => |obj| obj, + else => continue, + }; + + try self.merge(item_obj.properties.slice()); + } + }, + + else => self.#list.append(.{ .key = key, .value = value }), + }; + } + + pub fn moveList(self: *MappingProps) G.Property.List { + return .moveFromList(&self.#list); + } + }; + fn parseBlockMapping( self: *@This(), first_key: Expr, @@ -1011,7 +1056,7 @@ pub fn Parser(comptime enc: Encoding) type { try self.block_indents.push(mapping_indent); defer self.block_indents.pop(); - var props: std.array_list.Managed(G.Property) = .init(self.allocator); + var props: MappingProps = .init(self.allocator); { // try self.context.set(.block_in); @@ -1056,42 +1101,11 @@ pub fn Parser(comptime enc: Encoding) type { }, }; - append: { - switch (first_key.data) { - .e_string => |key_string| { - if (key_string.eqlComptime("<<")) { - switch (value.data) { - .e_object => |value_obj| { - try props.appendSlice(value_obj.properties.slice()); - break :append; - }, - .e_array => |value_arr| { - for (value_arr.slice()) |item| { - switch (item.data) { - .e_object => |item_obj| { - try props.appendSlice(item_obj.properties.slice()); - }, - else => {}, - } - } - break :append; - }, - else => {}, - } - } - }, - else => {}, - } - - try props.append(.{ - .key = first_key, - .value = value, - }); - } + try props.appendMaybeMerge(first_key, value); } if (self.context.get() == .flow_in) { - return .init(E.Object, .{ .properties = .moveFromList(&props) }, mapping_start.loc()); + return .init(E.Object, .{ .properties = props.moveList() }, mapping_start.loc()); } try self.context.set(.block_in); @@ -1173,41 +1187,10 @@ pub fn Parser(comptime enc: Encoding) type { }, }; - append: { - switch (key.data) { - .e_string => |key_string| { - if (key_string.eqlComptime("<<")) { - switch (value.data) { - .e_object => |value_obj| { - try props.appendSlice(value_obj.properties.slice()); - break :append; - }, - .e_array => |value_arr| { - for (value_arr.slice()) |item| { - switch (item.data) { - .e_object => |item_obj| { - try props.appendSlice(item_obj.properties.slice()); - }, - else => {}, - } - } - break :append; - }, - else => {}, - } - } - }, - else => {}, - } - - try props.append(.{ - .key = key, - .value = value, - }); - } + try props.appendMaybeMerge(key, value); } - return .init(E.Object, .{ .properties = .moveFromList(&props) }, mapping_start.loc()); + return .init(E.Object, .{ .properties = props.moveList() }, mapping_start.loc()); } const NodeProperties = struct { diff --git a/test/js/bun/yaml/yaml.test.ts b/test/js/bun/yaml/yaml.test.ts index 59276fc20e..cecb864cd3 100644 --- a/test/js/bun/yaml/yaml.test.ts +++ b/test/js/bun/yaml/yaml.test.ts @@ -813,6 +813,131 @@ my_config: const input2 = await file(join(import.meta.dir, "fixtures", "AHatInTime.yaml")).text(); expect(YAML.parse(input2)).toMatchSnapshot(); }); + + test("handles YAML bombs", () => { + function buildTest(depth) { + const lines: string[] = []; + lines.push(`a0: &a0\n k0: 0`); + for (let i = 1; i <= depth; i++) { + const refs = Array.from({ length: i }, (_, j) => `*a${j}`).join(", "); + lines.push(`a${i}: &a${i}\n <<: [${refs}]\n k${i}: ${i}`); + } + lines.push(`root:\n <<: *a${depth}`); + const input = lines.join("\n"); + + const expected: any = {}; + for (let i = 0; i <= depth; i++) { + const record = {}; + for (let j = 0; j <= i; j++) record[`k${j}`] = j; + expected[`a${i}`] = record; + } + expected.root = { ...expected[`a${depth}`] }; + + return { input, expected }; + } + + const { input, expected } = buildTest(24); + + expect(YAML.parse(input)).toEqual(expected); + }, 100); + + describe("merge keys", () => { + test("merge overrides", () => { + const input = ` +--- +- &CENTER { x: 1, 'y': 2 } +- &LEFT { x: 0, 'y': 2 } +- &BIG { r: 10 } +- &SMALL { r: 1 } + +# All the following maps are equal: + +- # Explicit keys + x: 1 + 'y': 2 + r: 10 + label: center/big + +- # Merge one map + << : *CENTER + r: 10 + label: center/big + +- # Merge multiple maps + << : [ *CENTER, *BIG ] + label: center/big + +- # Override + << : [ *BIG, *LEFT, *SMALL ] + x: 1 + label: center/big + `; + + const expected = [ + { x: 1, y: 2 }, + { x: 0, y: 2 }, + { r: 10 }, + { r: 1 }, + { x: 1, y: 2, r: 10, label: "center/big" }, + { x: 1, y: 2, r: 10, label: "center/big" }, + { x: 1, y: 2, r: 10, label: "center/big" }, + { x: 1, y: 2, r: 10, label: "center/big" }, + ]; + + expect(YAML.parse(input)).toEqual(expected); + }); + + test("duplicate merge key", () => { + const input = ` +--- +<<: {x: 1, y: 2} +foo: bar +<<: {z: 3, t: 4} +`; + + expect(YAML.parse(input)).toEqual({ + x: 1, + y: 2, + z: 3, + t: 4, + foo: "bar", + }); + }); + + test("duplicate keys from the same anchor", () => { + let input = ` +defaults: &d + foo: 1 + foo: 2 +config: + <<: *d`; + expect(YAML.parse(input)).toEqual({ + defaults: { + foo: 2, + }, + config: { + foo: 2, + }, + }); + + // Can still override + input = ` +defaults: &d + foo: 1 + foo: 2 +config: + <<: *d + foo: 3`; + expect(YAML.parse(input)).toEqual({ + defaults: { + foo: 2, + }, + config: { + foo: 3, + }, + }); + }); + }); }); describe("stringify", () => {