From c0cf7b4501efe579f0cbd6c2118c00df2ebc6a13 Mon Sep 17 00:00:00 2001 From: Ai Hoshino Date: Sat, 8 Jul 2023 06:10:49 +0800 Subject: [PATCH] fix decoding invalid UTF-8 input (#3563) * fix decoding invalid UTF-8 input Close: https://github.com/oven-sh/bun/issues/3562 * add unittest --- src/bun.js/bindings/JSStringDecoder.cpp | 4 ++-- test/js/node/string_decoder/string-decoder.test.js | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/bun.js/bindings/JSStringDecoder.cpp b/src/bun.js/bindings/JSStringDecoder.cpp index 5ec2585223..b8c2dd50cd 100644 --- a/src/bun.js/bindings/JSStringDecoder.cpp +++ b/src/bun.js/bindings/JSStringDecoder.cpp @@ -129,7 +129,7 @@ uint8_t JSStringDecoder::utf8CheckIncomplete(uint8_t* bufPtr, uint32_t length, u m_lastNeed = nb - 1; return nb; } - if (--j < i || nb == -2) + if (j == 0 || --j < i || nb == -2) return 0; nb = utf8CheckByte(bufPtr[j]); if (nb >= 0) { @@ -137,7 +137,7 @@ uint8_t JSStringDecoder::utf8CheckIncomplete(uint8_t* bufPtr, uint32_t length, u m_lastNeed = nb - 2; return nb; } - if (--j < i || nb == -2) + if (j == 0 || --j < i || nb == -2) return 0; nb = utf8CheckByte(bufPtr[j]); if (nb >= 0) { diff --git a/test/js/node/string_decoder/string-decoder.test.js b/test/js/node/string_decoder/string-decoder.test.js index f373266782..aba73401ab 100644 --- a/test/js/node/string_decoder/string-decoder.test.js +++ b/test/js/node/string_decoder/string-decoder.test.js @@ -241,3 +241,12 @@ for (const StringDecoder of [FakeStringDecoderCall, RealStringDecoder]) { }); }); } + +it("invalid utf-8 input, pr #3562", () => { + const decoder = new RealStringDecoder("utf-8"); + let output = ""; + output += decoder.write(Buffer.from("B9", "hex")); + output += decoder.write(Buffer.from("A9", "hex")); + output += decoder.end(); + expect(output).toStrictEqual("\uFFFD\uFFFD"); +});