From 7a801fcf930bd8082bafaa975d1709693f9b2b0d Mon Sep 17 00:00:00 2001 From: robobun Date: Thu, 12 Feb 2026 00:28:44 -0800 Subject: [PATCH] fix(ini): prevent OOB read and UB on truncated/invalid UTF-8 in INI parser (#26947) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary - Fix out-of-bounds read in the INI parser's `prepareStr` function when a multi-byte UTF-8 lead byte appears at the end of a value with insufficient continuation bytes - Fix undefined behavior when bare continuation bytes (0x80-0xBF) cause `utf8ByteSequenceLength` to return 0, hitting an `unreachable` branch (UB in ReleaseFast builds) - Add bounds checking before accessing `val[i+1]`, `val[i+2]`, `val[i+3]` in both escaped and non-escaped code paths The vulnerability could be triggered by a crafted `.npmrc` file containing truncated UTF-8 sequences. In release builds, this could cause OOB heap reads (potential info leak) or undefined behavior. ## Test plan - [x] Added 9 tests covering truncated 2/3/4-byte sequences, bare continuation bytes, and escaped contexts - [x] All 52 INI parser tests pass (`bun bd test test/js/bun/ini/ini.test.ts`) - [x] No regressions in npmrc tests (failures are pre-existing Verdaccio connectivity issues) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Bot Co-authored-by: Claude --- src/ini.zig | 72 +++++++++++++++++++++---------------- test/js/bun/ini/ini.test.ts | 55 ++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+), 30 deletions(-) diff --git a/src/ini.zig b/src/ini.zig index 23e8b57380..e15573b082 100644 --- a/src/ini.zig +++ b/src/ini.zig @@ -291,25 +291,32 @@ pub const Parser = struct { } }, else => { - try unesc.appendSlice(switch (bun.strings.utf8ByteSequenceLength(c)) { - 1 => brk: { - break :brk &[_]u8{ '\\', c }; + switch (bun.strings.utf8ByteSequenceLength(c)) { + 0, 1 => try unesc.appendSlice(&[_]u8{ '\\', c }), + 2 => if (val.len - i >= 2) { + try unesc.appendSlice(&[_]u8{ '\\', c, val[i + 1] }); + i += 1; + } else { + try unesc.appendSlice(&[_]u8{ '\\', c }); }, - 2 => brk: { - defer i += 1; - break :brk &[_]u8{ '\\', c, val[i + 1] }; + 3 => if (val.len - i >= 3) { + try unesc.appendSlice(&[_]u8{ '\\', c, val[i + 1], val[i + 2] }); + i += 2; + } else { + try unesc.append('\\'); + try unesc.appendSlice(val[i..val.len]); + i = val.len - 1; }, - 3 => brk: { - defer i += 2; - break :brk &[_]u8{ '\\', c, val[i + 1], val[i + 2] }; + 4 => if (val.len - i >= 4) { + try unesc.appendSlice(&[_]u8{ '\\', c, val[i + 1], val[i + 2], val[i + 3] }); + i += 3; + } else { + try unesc.append('\\'); + try unesc.appendSlice(val[i..val.len]); + i = val.len - 1; }, - 4 => brk: { - defer i += 3; - break :brk &[_]u8{ '\\', c, val[i + 1], val[i + 2], val[i + 3] }; - }, - // this means invalid utf8 else => unreachable, - }); + } }, } @@ -342,25 +349,30 @@ pub const Parser = struct { try unesc.append('.'); } }, - else => try unesc.appendSlice(switch (bun.strings.utf8ByteSequenceLength(c)) { - 1 => brk: { - break :brk &[_]u8{c}; + else => switch (bun.strings.utf8ByteSequenceLength(c)) { + 0, 1 => try unesc.append(c), + 2 => if (val.len - i >= 2) { + try unesc.appendSlice(&[_]u8{ c, val[i + 1] }); + i += 1; + } else { + try unesc.append(c); }, - 2 => brk: { - defer i += 1; - break :brk &[_]u8{ c, val[i + 1] }; + 3 => if (val.len - i >= 3) { + try unesc.appendSlice(&[_]u8{ c, val[i + 1], val[i + 2] }); + i += 2; + } else { + try unesc.appendSlice(val[i..val.len]); + i = val.len - 1; }, - 3 => brk: { - defer i += 2; - break :brk &[_]u8{ c, val[i + 1], val[i + 2] }; + 4 => if (val.len - i >= 4) { + try unesc.appendSlice(&[_]u8{ c, val[i + 1], val[i + 2], val[i + 3] }); + i += 3; + } else { + try unesc.appendSlice(val[i..val.len]); + i = val.len - 1; }, - 4 => brk: { - defer i += 3; - break :brk &[_]u8{ c, val[i + 1], val[i + 2], val[i + 3] }; - }, - // this means invalid utf8 else => unreachable, - }), + }, } } diff --git a/test/js/bun/ini/ini.test.ts b/test/js/bun/ini/ini.test.ts index 8212316b7f..00ce8b4395 100644 --- a/test/js/bun/ini/ini.test.ts +++ b/test/js/bun/ini/ini.test.ts @@ -489,6 +489,61 @@ brr = 3 "zr": ["deedee"], }); }); + + describe("truncated/invalid utf-8", () => { + test("bare continuation byte (0x80) should not crash", () => { + // 0x80 is a continuation byte without a leading byte + // utf8ByteSequenceLength returns 0, which must not hit unreachable + const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0x80])]).toString("latin1"); + // Should not crash - just parse gracefully + expect(() => parse(ini)).not.toThrow(); + }); + + test("truncated 2-byte sequence at end of value", () => { + // 0xC0 is a 2-byte lead byte, but there's no continuation byte following + const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0xc0])]).toString("latin1"); + expect(() => parse(ini)).not.toThrow(); + }); + + test("truncated 3-byte sequence at end of value", () => { + // 0xE0 is a 3-byte lead byte, but only 0 continuation bytes follow + const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0xe0])]).toString("latin1"); + expect(() => parse(ini)).not.toThrow(); + }); + + test("truncated 3-byte sequence with 1 continuation byte at end", () => { + // 0xE0 is a 3-byte lead byte, but only 1 continuation byte follows + const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0xe0, 0x80])]).toString("latin1"); + expect(() => parse(ini)).not.toThrow(); + }); + + test("truncated 4-byte sequence at end of value", () => { + // 0xF0 is a 4-byte lead byte, but only 0 continuation bytes follow + const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0xf0])]).toString("latin1"); + expect(() => parse(ini)).not.toThrow(); + }); + + test("truncated 4-byte sequence with 1 continuation byte at end", () => { + const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0xf0, 0x80])]).toString("latin1"); + expect(() => parse(ini)).not.toThrow(); + }); + + test("truncated 4-byte sequence with 2 continuation bytes at end", () => { + const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0xf0, 0x80, 0x80])]).toString("latin1"); + expect(() => parse(ini)).not.toThrow(); + }); + + test("truncated 2-byte sequence in escaped context", () => { + // Backslash followed by a 2-byte lead byte at end of value + const ini = Buffer.concat([Buffer.from("key = \\"), Buffer.from([0xc0])]).toString("latin1"); + expect(() => parse(ini)).not.toThrow(); + }); + + test("bare continuation byte in escaped context", () => { + const ini = Buffer.concat([Buffer.from("key = \\"), Buffer.from([0x80])]).toString("latin1"); + expect(() => parse(ini)).not.toThrow(); + }); + }); }); const wtf = {