mirror of
https://github.com/oven-sh/bun
synced 2026-02-14 04:49:06 +00:00
fix(ini): prevent OOB read and UB on truncated/invalid UTF-8 in INI parser (#26947)
## Summary - Fix out-of-bounds read in the INI parser's `prepareStr` function when a multi-byte UTF-8 lead byte appears at the end of a value with insufficient continuation bytes - Fix undefined behavior when bare continuation bytes (0x80-0xBF) cause `utf8ByteSequenceLength` to return 0, hitting an `unreachable` branch (UB in ReleaseFast builds) - Add bounds checking before accessing `val[i+1]`, `val[i+2]`, `val[i+3]` in both escaped and non-escaped code paths The vulnerability could be triggered by a crafted `.npmrc` file containing truncated UTF-8 sequences. In release builds, this could cause OOB heap reads (potential info leak) or undefined behavior. ## Test plan - [x] Added 9 tests covering truncated 2/3/4-byte sequences, bare continuation bytes, and escaped contexts - [x] All 52 INI parser tests pass (`bun bd test test/js/bun/ini/ini.test.ts`) - [x] No regressions in npmrc tests (failures are pre-existing Verdaccio connectivity issues) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Bot <claude-bot@bun.sh> Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
72
src/ini.zig
72
src/ini.zig
@@ -291,25 +291,32 @@ pub const Parser = struct {
|
||||
}
|
||||
},
|
||||
else => {
|
||||
try unesc.appendSlice(switch (bun.strings.utf8ByteSequenceLength(c)) {
|
||||
1 => brk: {
|
||||
break :brk &[_]u8{ '\\', c };
|
||||
switch (bun.strings.utf8ByteSequenceLength(c)) {
|
||||
0, 1 => try unesc.appendSlice(&[_]u8{ '\\', c }),
|
||||
2 => if (val.len - i >= 2) {
|
||||
try unesc.appendSlice(&[_]u8{ '\\', c, val[i + 1] });
|
||||
i += 1;
|
||||
} else {
|
||||
try unesc.appendSlice(&[_]u8{ '\\', c });
|
||||
},
|
||||
2 => brk: {
|
||||
defer i += 1;
|
||||
break :brk &[_]u8{ '\\', c, val[i + 1] };
|
||||
3 => if (val.len - i >= 3) {
|
||||
try unesc.appendSlice(&[_]u8{ '\\', c, val[i + 1], val[i + 2] });
|
||||
i += 2;
|
||||
} else {
|
||||
try unesc.append('\\');
|
||||
try unesc.appendSlice(val[i..val.len]);
|
||||
i = val.len - 1;
|
||||
},
|
||||
3 => brk: {
|
||||
defer i += 2;
|
||||
break :brk &[_]u8{ '\\', c, val[i + 1], val[i + 2] };
|
||||
4 => if (val.len - i >= 4) {
|
||||
try unesc.appendSlice(&[_]u8{ '\\', c, val[i + 1], val[i + 2], val[i + 3] });
|
||||
i += 3;
|
||||
} else {
|
||||
try unesc.append('\\');
|
||||
try unesc.appendSlice(val[i..val.len]);
|
||||
i = val.len - 1;
|
||||
},
|
||||
4 => brk: {
|
||||
defer i += 3;
|
||||
break :brk &[_]u8{ '\\', c, val[i + 1], val[i + 2], val[i + 3] };
|
||||
},
|
||||
// this means invalid utf8
|
||||
else => unreachable,
|
||||
});
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
@@ -342,25 +349,30 @@ pub const Parser = struct {
|
||||
try unesc.append('.');
|
||||
}
|
||||
},
|
||||
else => try unesc.appendSlice(switch (bun.strings.utf8ByteSequenceLength(c)) {
|
||||
1 => brk: {
|
||||
break :brk &[_]u8{c};
|
||||
else => switch (bun.strings.utf8ByteSequenceLength(c)) {
|
||||
0, 1 => try unesc.append(c),
|
||||
2 => if (val.len - i >= 2) {
|
||||
try unesc.appendSlice(&[_]u8{ c, val[i + 1] });
|
||||
i += 1;
|
||||
} else {
|
||||
try unesc.append(c);
|
||||
},
|
||||
2 => brk: {
|
||||
defer i += 1;
|
||||
break :brk &[_]u8{ c, val[i + 1] };
|
||||
3 => if (val.len - i >= 3) {
|
||||
try unesc.appendSlice(&[_]u8{ c, val[i + 1], val[i + 2] });
|
||||
i += 2;
|
||||
} else {
|
||||
try unesc.appendSlice(val[i..val.len]);
|
||||
i = val.len - 1;
|
||||
},
|
||||
3 => brk: {
|
||||
defer i += 2;
|
||||
break :brk &[_]u8{ c, val[i + 1], val[i + 2] };
|
||||
4 => if (val.len - i >= 4) {
|
||||
try unesc.appendSlice(&[_]u8{ c, val[i + 1], val[i + 2], val[i + 3] });
|
||||
i += 3;
|
||||
} else {
|
||||
try unesc.appendSlice(val[i..val.len]);
|
||||
i = val.len - 1;
|
||||
},
|
||||
4 => brk: {
|
||||
defer i += 3;
|
||||
break :brk &[_]u8{ c, val[i + 1], val[i + 2], val[i + 3] };
|
||||
},
|
||||
// this means invalid utf8
|
||||
else => unreachable,
|
||||
}),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -489,6 +489,61 @@ brr = 3
|
||||
"zr": ["deedee"],
|
||||
});
|
||||
});
|
||||
|
||||
describe("truncated/invalid utf-8", () => {
|
||||
test("bare continuation byte (0x80) should not crash", () => {
|
||||
// 0x80 is a continuation byte without a leading byte
|
||||
// utf8ByteSequenceLength returns 0, which must not hit unreachable
|
||||
const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0x80])]).toString("latin1");
|
||||
// Should not crash - just parse gracefully
|
||||
expect(() => parse(ini)).not.toThrow();
|
||||
});
|
||||
|
||||
test("truncated 2-byte sequence at end of value", () => {
|
||||
// 0xC0 is a 2-byte lead byte, but there's no continuation byte following
|
||||
const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0xc0])]).toString("latin1");
|
||||
expect(() => parse(ini)).not.toThrow();
|
||||
});
|
||||
|
||||
test("truncated 3-byte sequence at end of value", () => {
|
||||
// 0xE0 is a 3-byte lead byte, but only 0 continuation bytes follow
|
||||
const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0xe0])]).toString("latin1");
|
||||
expect(() => parse(ini)).not.toThrow();
|
||||
});
|
||||
|
||||
test("truncated 3-byte sequence with 1 continuation byte at end", () => {
|
||||
// 0xE0 is a 3-byte lead byte, but only 1 continuation byte follows
|
||||
const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0xe0, 0x80])]).toString("latin1");
|
||||
expect(() => parse(ini)).not.toThrow();
|
||||
});
|
||||
|
||||
test("truncated 4-byte sequence at end of value", () => {
|
||||
// 0xF0 is a 4-byte lead byte, but only 0 continuation bytes follow
|
||||
const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0xf0])]).toString("latin1");
|
||||
expect(() => parse(ini)).not.toThrow();
|
||||
});
|
||||
|
||||
test("truncated 4-byte sequence with 1 continuation byte at end", () => {
|
||||
const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0xf0, 0x80])]).toString("latin1");
|
||||
expect(() => parse(ini)).not.toThrow();
|
||||
});
|
||||
|
||||
test("truncated 4-byte sequence with 2 continuation bytes at end", () => {
|
||||
const ini = Buffer.concat([Buffer.from("key = "), Buffer.from([0xf0, 0x80, 0x80])]).toString("latin1");
|
||||
expect(() => parse(ini)).not.toThrow();
|
||||
});
|
||||
|
||||
test("truncated 2-byte sequence in escaped context", () => {
|
||||
// Backslash followed by a 2-byte lead byte at end of value
|
||||
const ini = Buffer.concat([Buffer.from("key = \\"), Buffer.from([0xc0])]).toString("latin1");
|
||||
expect(() => parse(ini)).not.toThrow();
|
||||
});
|
||||
|
||||
test("bare continuation byte in escaped context", () => {
|
||||
const ini = Buffer.concat([Buffer.from("key = \\"), Buffer.from([0x80])]).toString("latin1");
|
||||
expect(() => parse(ini)).not.toThrow();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
const wtf = {
|
||||
|
||||
Reference in New Issue
Block a user