mirror of
https://github.com/oven-sh/bun
synced 2026-03-08 00:10:00 +01:00
Compare commits
1 Commits
claude/imp
...
claude/fix
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
df6a1da3dc |
@@ -3,7 +3,6 @@
|
||||
#include "root.h"
|
||||
|
||||
#include "../bindings/JSBuffer.h"
|
||||
#include "../bindings/JSBufferEncodingType.h"
|
||||
#include "ErrorCode.h"
|
||||
#include "JavaScriptCore/PageCount.h"
|
||||
#include "NodeValidator.h"
|
||||
@@ -126,432 +125,6 @@ JSC_DEFINE_HOST_FUNCTION(jsBufferConstructorFunction_isAscii,
|
||||
|
||||
BUN_DECLARE_HOST_FUNCTION(jsFunctionResolveObjectURL);
|
||||
|
||||
// Transcode encoding enum - only the 4 encodings supported by Node.js transcode()
|
||||
enum class TranscodeEncoding : uint8_t {
|
||||
ASCII,
|
||||
LATIN1,
|
||||
UTF8,
|
||||
UCS2, // UTF-16LE
|
||||
};
|
||||
|
||||
static std::optional<TranscodeEncoding> parseTranscodeEncoding(JSC::JSGlobalObject& globalObject, JSValue value)
|
||||
{
|
||||
auto encoding = parseEnumeration<BufferEncodingType>(globalObject, value);
|
||||
if (!encoding.has_value())
|
||||
return std::nullopt;
|
||||
|
||||
switch (encoding.value()) {
|
||||
case BufferEncodingType::ascii:
|
||||
return TranscodeEncoding::ASCII;
|
||||
case BufferEncodingType::latin1:
|
||||
return TranscodeEncoding::LATIN1;
|
||||
case BufferEncodingType::utf8:
|
||||
return TranscodeEncoding::UTF8;
|
||||
case BufferEncodingType::ucs2:
|
||||
case BufferEncodingType::utf16le:
|
||||
return TranscodeEncoding::UCS2;
|
||||
default:
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
// Validate that all continuation bytes in a multi-byte UTF-8 sequence have the 10xxxxxx pattern.
|
||||
static inline bool validateUtf8Continuations(const char* source, size_t srcIdx, size_t seqLen)
|
||||
{
|
||||
for (size_t i = 1; i < seqLen; i++) {
|
||||
if ((static_cast<uint8_t>(source[srcIdx + i]) & 0xC0) != 0x80)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Count the number of output codepoints from UTF-8 input, handling invalid sequences safely.
|
||||
// Each valid codepoint produces one output byte; each invalid/truncated byte also produces one.
|
||||
static size_t countUtf8Codepoints(const char* source, size_t sourceLength)
|
||||
{
|
||||
size_t count = 0;
|
||||
size_t srcIdx = 0;
|
||||
while (srcIdx < sourceLength) {
|
||||
uint8_t byte = static_cast<uint8_t>(source[srcIdx]);
|
||||
size_t seqLen;
|
||||
if (byte < 0x80)
|
||||
seqLen = 1;
|
||||
else if ((byte & 0xE0) == 0xC0)
|
||||
seqLen = 2;
|
||||
else if ((byte & 0xF0) == 0xE0)
|
||||
seqLen = 3;
|
||||
else if ((byte & 0xF8) == 0xF0)
|
||||
seqLen = 4;
|
||||
else {
|
||||
// Invalid start byte: counts as one output
|
||||
count++;
|
||||
srcIdx++;
|
||||
continue;
|
||||
}
|
||||
if (srcIdx + seqLen > sourceLength) {
|
||||
// Truncated sequence: each remaining byte counts as one output
|
||||
count += (sourceLength - srcIdx);
|
||||
break;
|
||||
}
|
||||
if (seqLen > 1 && !validateUtf8Continuations(source, srcIdx, seqLen)) {
|
||||
// Bad continuation byte: treat start byte as one invalid output
|
||||
count++;
|
||||
srcIdx++;
|
||||
continue;
|
||||
}
|
||||
count++;
|
||||
srcIdx += seqLen;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// Transcode UTF-8 to single-byte encoding: codepoints > threshold become '?'.
|
||||
// Invalid/truncated UTF-8 sequences emit '?' for each bad byte.
|
||||
static JSC::JSUint8Array* transcodeUtf8ToSingleByte(JSC::JSGlobalObject* globalObject, const char* source, size_t sourceLength, uint32_t threshold)
|
||||
{
|
||||
size_t outputLength;
|
||||
if (simdutf::validate_utf8(source, sourceLength)) {
|
||||
outputLength = simdutf::utf32_length_from_utf8(source, sourceLength);
|
||||
} else {
|
||||
outputLength = countUtf8Codepoints(source, sourceLength);
|
||||
}
|
||||
|
||||
auto* result = WebCore::createUninitializedBuffer(globalObject, outputLength);
|
||||
if (!result)
|
||||
return nullptr;
|
||||
|
||||
auto* out = result->typedVector();
|
||||
size_t srcIdx = 0;
|
||||
size_t dstIdx = 0;
|
||||
while (srcIdx < sourceLength && dstIdx < outputLength) {
|
||||
uint8_t byte = static_cast<uint8_t>(source[srcIdx]);
|
||||
uint32_t codepoint;
|
||||
size_t seqLen;
|
||||
|
||||
if (byte < 0x80) {
|
||||
codepoint = byte;
|
||||
seqLen = 1;
|
||||
} else if ((byte & 0xE0) == 0xC0) {
|
||||
seqLen = 2;
|
||||
if (srcIdx + seqLen > sourceLength) {
|
||||
while (srcIdx < sourceLength && dstIdx < outputLength) {
|
||||
out[dstIdx++] = '?';
|
||||
srcIdx++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!validateUtf8Continuations(source, srcIdx, seqLen)) {
|
||||
out[dstIdx++] = '?';
|
||||
srcIdx++;
|
||||
continue;
|
||||
}
|
||||
codepoint = (byte & 0x1F) << 6;
|
||||
codepoint |= (static_cast<uint8_t>(source[srcIdx + 1]) & 0x3F);
|
||||
} else if ((byte & 0xF0) == 0xE0) {
|
||||
seqLen = 3;
|
||||
if (srcIdx + seqLen > sourceLength) {
|
||||
while (srcIdx < sourceLength && dstIdx < outputLength) {
|
||||
out[dstIdx++] = '?';
|
||||
srcIdx++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!validateUtf8Continuations(source, srcIdx, seqLen)) {
|
||||
out[dstIdx++] = '?';
|
||||
srcIdx++;
|
||||
continue;
|
||||
}
|
||||
codepoint = (byte & 0x0F) << 12;
|
||||
codepoint |= (static_cast<uint8_t>(source[srcIdx + 1]) & 0x3F) << 6;
|
||||
codepoint |= (static_cast<uint8_t>(source[srcIdx + 2]) & 0x3F);
|
||||
} else if ((byte & 0xF8) == 0xF0) {
|
||||
seqLen = 4;
|
||||
if (srcIdx + seqLen > sourceLength) {
|
||||
while (srcIdx < sourceLength && dstIdx < outputLength) {
|
||||
out[dstIdx++] = '?';
|
||||
srcIdx++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!validateUtf8Continuations(source, srcIdx, seqLen)) {
|
||||
out[dstIdx++] = '?';
|
||||
srcIdx++;
|
||||
continue;
|
||||
}
|
||||
codepoint = (byte & 0x07) << 18;
|
||||
codepoint |= (static_cast<uint8_t>(source[srcIdx + 1]) & 0x3F) << 12;
|
||||
codepoint |= (static_cast<uint8_t>(source[srcIdx + 2]) & 0x3F) << 6;
|
||||
codepoint |= (static_cast<uint8_t>(source[srcIdx + 3]) & 0x3F);
|
||||
} else {
|
||||
// Invalid UTF-8 start byte
|
||||
out[dstIdx++] = '?';
|
||||
srcIdx++;
|
||||
continue;
|
||||
}
|
||||
|
||||
out[dstIdx++] = (codepoint <= threshold) ? static_cast<uint8_t>(codepoint) : '?';
|
||||
srcIdx += seqLen;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static JSC::JSUint8Array* transcodeUtf8ToAscii(JSC::JSGlobalObject* globalObject, const char* source, size_t sourceLength)
|
||||
{
|
||||
return transcodeUtf8ToSingleByte(globalObject, source, sourceLength, 0x7F);
|
||||
}
|
||||
|
||||
static JSC::JSUint8Array* transcodeUtf8ToLatin1(JSC::JSGlobalObject* globalObject, const char* source, size_t sourceLength)
|
||||
{
|
||||
return transcodeUtf8ToSingleByte(globalObject, source, sourceLength, 0xFF);
|
||||
}
|
||||
|
||||
// Transcode UCS-2 to ASCII: each char16_t > 0x7F becomes '?'
|
||||
static JSC::JSUint8Array* transcodeUcs2ToAscii(JSC::JSGlobalObject* globalObject, const char16_t* source, size_t charLength)
|
||||
{
|
||||
auto* result = WebCore::createUninitializedBuffer(globalObject, charLength);
|
||||
if (!result)
|
||||
return nullptr;
|
||||
|
||||
auto* out = result->typedVector();
|
||||
for (size_t i = 0; i < charLength; i++) {
|
||||
out[i] = (source[i] <= 0x7F) ? static_cast<uint8_t>(source[i]) : '?';
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Transcode UCS-2 to Latin-1: each char16_t > 0xFF becomes '?'
|
||||
static JSC::JSUint8Array* transcodeUcs2ToLatin1(JSC::JSGlobalObject* globalObject, const char16_t* source, size_t charLength)
|
||||
{
|
||||
auto* result = WebCore::createUninitializedBuffer(globalObject, charLength);
|
||||
if (!result)
|
||||
return nullptr;
|
||||
|
||||
auto* out = result->typedVector();
|
||||
for (size_t i = 0; i < charLength; i++) {
|
||||
out[i] = (source[i] <= 0xFF) ? static_cast<uint8_t>(source[i]) : '?';
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
JSC_DEFINE_HOST_FUNCTION(jsFunction_transcode,
|
||||
(JSGlobalObject * globalObject,
|
||||
CallFrame* callFrame))
|
||||
{
|
||||
VM& vm = globalObject->vm();
|
||||
auto scope = DECLARE_THROW_SCOPE(vm);
|
||||
|
||||
JSValue sourceValue = callFrame->argument(0);
|
||||
|
||||
// Validate source is Buffer or Uint8Array
|
||||
auto* sourceView = JSC::jsDynamicCast<JSC::JSArrayBufferView*>(sourceValue);
|
||||
if (!sourceView) {
|
||||
Bun::ERR::INVALID_ARG_TYPE_INSTANCE(scope, globalObject,
|
||||
"source"_s, "Buffer"_s, "Uint8Array"_s, sourceValue);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (sourceView->isDetached()) [[unlikely]] {
|
||||
Bun::ERR::INVALID_STATE(scope, globalObject,
|
||||
"Cannot transcode a detached buffer"_s);
|
||||
return {};
|
||||
}
|
||||
|
||||
const char* sourceData = reinterpret_cast<const char*>(sourceView->vector());
|
||||
size_t sourceLength = sourceView->byteLength();
|
||||
|
||||
// Empty input → empty Buffer
|
||||
if (sourceLength == 0) {
|
||||
return JSValue::encode(WebCore::createEmptyBuffer(globalObject));
|
||||
}
|
||||
|
||||
// Parse encodings
|
||||
auto fromEncoding = parseTranscodeEncoding(*globalObject, callFrame->argument(1));
|
||||
RETURN_IF_EXCEPTION(scope, {});
|
||||
auto toEncoding = parseTranscodeEncoding(*globalObject, callFrame->argument(2));
|
||||
RETURN_IF_EXCEPTION(scope, {});
|
||||
|
||||
if (!fromEncoding.has_value() || !toEncoding.has_value()) {
|
||||
throwException(globalObject, scope,
|
||||
createError(globalObject, "Unable to transcode Buffer [U_ILLEGAL_ARGUMENT_ERROR]"_s));
|
||||
return {};
|
||||
}
|
||||
|
||||
auto from = fromEncoding.value();
|
||||
auto to = toEncoding.value();
|
||||
|
||||
JSC::JSUint8Array* resultBuffer = nullptr;
|
||||
|
||||
// Same encoding → copy (except ASCII which needs 0x7F masking)
|
||||
if (from == to && from != TranscodeEncoding::ASCII) {
|
||||
resultBuffer = WebCore::createBuffer(globalObject, reinterpret_cast<const uint8_t*>(sourceData), sourceLength);
|
||||
RETURN_IF_EXCEPTION(scope, {});
|
||||
return JSValue::encode(resultBuffer);
|
||||
}
|
||||
|
||||
switch (from) {
|
||||
case TranscodeEncoding::ASCII: {
|
||||
// Node.js ASCII encoding masks bytes > 0x7F with & 0x7F
|
||||
Vector<char> masked(sourceLength);
|
||||
for (size_t i = 0; i < sourceLength; i++)
|
||||
masked[i] = static_cast<char>(static_cast<uint8_t>(sourceData[i]) & 0x7F);
|
||||
const char* maskedData = masked.data();
|
||||
|
||||
switch (to) {
|
||||
case TranscodeEncoding::ASCII: {
|
||||
resultBuffer = WebCore::createBuffer(globalObject, reinterpret_cast<const uint8_t*>(maskedData), sourceLength);
|
||||
break;
|
||||
}
|
||||
case TranscodeEncoding::UCS2: {
|
||||
auto* result = WebCore::createUninitializedBuffer(globalObject, sourceLength * 2);
|
||||
if (!result) {
|
||||
RETURN_IF_EXCEPTION(scope, {});
|
||||
return {};
|
||||
}
|
||||
(void)simdutf::convert_latin1_to_utf16le(maskedData, sourceLength,
|
||||
reinterpret_cast<char16_t*>(result->typedVector()));
|
||||
resultBuffer = result;
|
||||
break;
|
||||
}
|
||||
case TranscodeEncoding::UTF8: {
|
||||
// All bytes are <= 0x7F after masking, so UTF-8 output == sourceLength
|
||||
resultBuffer = WebCore::createBuffer(globalObject, reinterpret_cast<const uint8_t*>(maskedData), sourceLength);
|
||||
break;
|
||||
}
|
||||
case TranscodeEncoding::LATIN1: {
|
||||
resultBuffer = WebCore::createBuffer(globalObject, reinterpret_cast<const uint8_t*>(maskedData), sourceLength);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TranscodeEncoding::LATIN1: {
|
||||
switch (to) {
|
||||
case TranscodeEncoding::UCS2: {
|
||||
auto* result = WebCore::createUninitializedBuffer(globalObject, sourceLength * 2);
|
||||
if (!result) {
|
||||
RETURN_IF_EXCEPTION(scope, {});
|
||||
return {};
|
||||
}
|
||||
(void)simdutf::convert_latin1_to_utf16le(sourceData, sourceLength,
|
||||
reinterpret_cast<char16_t*>(result->typedVector()));
|
||||
resultBuffer = result;
|
||||
break;
|
||||
}
|
||||
case TranscodeEncoding::UTF8: {
|
||||
size_t utf8Length = simdutf::utf8_length_from_latin1(sourceData, sourceLength);
|
||||
auto* result = WebCore::createUninitializedBuffer(globalObject, utf8Length);
|
||||
if (!result) {
|
||||
RETURN_IF_EXCEPTION(scope, {});
|
||||
return {};
|
||||
}
|
||||
(void)simdutf::convert_latin1_to_utf8(sourceData, sourceLength,
|
||||
reinterpret_cast<char*>(result->typedVector()));
|
||||
resultBuffer = result;
|
||||
break;
|
||||
}
|
||||
case TranscodeEncoding::ASCII: {
|
||||
// Latin1 → ASCII: clamp bytes > 0x7F to '?'
|
||||
auto* result = WebCore::createUninitializedBuffer(globalObject, sourceLength);
|
||||
if (!result) {
|
||||
RETURN_IF_EXCEPTION(scope, {});
|
||||
return {};
|
||||
}
|
||||
auto* out = result->typedVector();
|
||||
for (size_t i = 0; i < sourceLength; i++) {
|
||||
uint8_t byte = static_cast<uint8_t>(sourceData[i]);
|
||||
out[i] = (byte <= 0x7F) ? byte : '?';
|
||||
}
|
||||
resultBuffer = result;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TranscodeEncoding::UTF8: {
|
||||
switch (to) {
|
||||
case TranscodeEncoding::UCS2: {
|
||||
// UTF-8 → UCS-2: use simdutf
|
||||
size_t utf16Length = simdutf::utf16_length_from_utf8(sourceData, sourceLength);
|
||||
auto* result = WebCore::createUninitializedBuffer(globalObject, utf16Length * sizeof(char16_t));
|
||||
if (!result) {
|
||||
RETURN_IF_EXCEPTION(scope, {});
|
||||
return {};
|
||||
}
|
||||
size_t actual = simdutf::convert_utf8_to_utf16le(sourceData, sourceLength,
|
||||
reinterpret_cast<char16_t*>(result->typedVector()));
|
||||
if (actual == 0 && sourceLength > 0) {
|
||||
throwException(globalObject, scope,
|
||||
createError(globalObject, "Unable to transcode Buffer [U_INVALID_CHAR_FOUND]"_s));
|
||||
return {};
|
||||
}
|
||||
resultBuffer = result;
|
||||
break;
|
||||
}
|
||||
case TranscodeEncoding::ASCII: {
|
||||
resultBuffer = transcodeUtf8ToAscii(globalObject, sourceData, sourceLength);
|
||||
break;
|
||||
}
|
||||
case TranscodeEncoding::LATIN1: {
|
||||
resultBuffer = transcodeUtf8ToLatin1(globalObject, sourceData, sourceLength);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TranscodeEncoding::UCS2: {
|
||||
const char16_t* utf16Data = reinterpret_cast<const char16_t*>(sourceData);
|
||||
size_t charLength = sourceLength / sizeof(char16_t);
|
||||
|
||||
switch (to) {
|
||||
case TranscodeEncoding::UTF8: {
|
||||
// UCS-2 → UTF-8: use simdutf
|
||||
size_t utf8Length = simdutf::utf8_length_from_utf16le(utf16Data, charLength);
|
||||
auto* result = WebCore::createUninitializedBuffer(globalObject, utf8Length);
|
||||
if (!result) {
|
||||
RETURN_IF_EXCEPTION(scope, {});
|
||||
return {};
|
||||
}
|
||||
size_t actual = simdutf::convert_utf16le_to_utf8(utf16Data, charLength,
|
||||
reinterpret_cast<char*>(result->typedVector()));
|
||||
if (actual == 0 && charLength > 0) {
|
||||
throwException(globalObject, scope,
|
||||
createError(globalObject, "Unable to transcode Buffer [U_INVALID_CHAR_FOUND]"_s));
|
||||
return {};
|
||||
}
|
||||
resultBuffer = result;
|
||||
break;
|
||||
}
|
||||
case TranscodeEncoding::ASCII: {
|
||||
resultBuffer = transcodeUcs2ToAscii(globalObject, utf16Data, charLength);
|
||||
break;
|
||||
}
|
||||
case TranscodeEncoding::LATIN1: {
|
||||
resultBuffer = transcodeUcs2ToLatin1(globalObject, utf16Data, charLength);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!resultBuffer) {
|
||||
RETURN_IF_EXCEPTION(scope, {});
|
||||
throwException(globalObject, scope,
|
||||
createError(globalObject, "Unable to transcode Buffer [U_ILLEGAL_ARGUMENT_ERROR]"_s));
|
||||
return {};
|
||||
}
|
||||
|
||||
RETURN_IF_EXCEPTION(scope, {});
|
||||
return JSValue::encode(resultBuffer);
|
||||
}
|
||||
|
||||
JSC_DEFINE_HOST_FUNCTION(jsFunctionNotImplemented,
|
||||
(JSGlobalObject * globalObject,
|
||||
CallFrame* callFrame))
|
||||
@@ -630,7 +203,9 @@ DEFINE_NATIVE_MODULE(NodeBuffer)
|
||||
put(atobI, atobV);
|
||||
put(btoaI, btoaV);
|
||||
|
||||
put(JSC::Identifier::fromString(vm, "transcode"_s), JSC::JSFunction::create(vm, globalObject, 3, "transcode"_s, jsFunction_transcode, ImplementationVisibility::Public, NoIntrinsic, jsFunction_transcode));
|
||||
auto* transcode = InternalFunction::createFunctionThatMasqueradesAsUndefined(vm, globalObject, 1, "transcode"_s, jsFunctionNotImplemented);
|
||||
|
||||
put(JSC::Identifier::fromString(vm, "transcode"_s), transcode);
|
||||
|
||||
auto* resolveObjectURL = JSC::JSFunction::create(vm, globalObject, 1, "resolveObjectURL"_s, jsFunctionResolveObjectURL, ImplementationVisibility::Public, NoIntrinsic, jsFunctionResolveObjectURL);
|
||||
|
||||
|
||||
@@ -320,15 +320,17 @@ pub fn onReadChunk(this: *@This(), init_buf: []const u8, state: bun.io.ReadState
|
||||
|
||||
if (buf.len > 0) {
|
||||
if (this.max_size) |max_size| {
|
||||
if (this.total_readed >= max_size) return false;
|
||||
if (this.total_readed >= max_size) {
|
||||
close = true;
|
||||
return false;
|
||||
}
|
||||
const len = @min(max_size - this.total_readed, buf.len);
|
||||
if (buf.len > len) {
|
||||
buf = buf[0..len];
|
||||
}
|
||||
this.total_readed += len;
|
||||
|
||||
if (buf.len == 0) {
|
||||
close = true;
|
||||
if (this.total_readed >= max_size) {
|
||||
hasMore = false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2188,39 +2188,10 @@ for (let withOverridenBufferWrite of [false, true]) {
|
||||
});
|
||||
|
||||
it("transcode", () => {
|
||||
expect(typeof BufferModule.transcode).toBe("function");
|
||||
expect(typeof BufferModule.transcode).toBe("undefined");
|
||||
|
||||
// Basic UTF-8 to ASCII
|
||||
const euroUtf8 = Buffer.from("€", "utf8");
|
||||
const asciiResult = BufferModule.transcode(euroUtf8, "utf8", "ascii");
|
||||
expect(asciiResult.toString("ascii")).toBe("?");
|
||||
|
||||
// UTF-8 to Latin-1
|
||||
const orig = Buffer.from("těst ☕", "utf8");
|
||||
const latin1 = BufferModule.transcode(orig, "utf8", "latin1");
|
||||
expect(Array.from(latin1)).toEqual([0x74, 0x3f, 0x73, 0x74, 0x20, 0x3f]);
|
||||
|
||||
// UTF-8 to UCS-2
|
||||
const ucs2 = BufferModule.transcode(orig, "utf8", "ucs2");
|
||||
expect(Array.from(ucs2)).toEqual([0x74, 0x00, 0x1b, 0x01, 0x73, 0x00, 0x74, 0x00, 0x20, 0x00, 0x15, 0x26]);
|
||||
|
||||
// Round-trip UCS-2 → UTF-8
|
||||
const backToUtf8 = BufferModule.transcode(Buffer.from(ucs2), "ucs2", "utf8");
|
||||
expect(backToUtf8.toString()).toBe(orig.toString());
|
||||
|
||||
// Empty input
|
||||
const empty = BufferModule.transcode(new Uint8Array(), "utf8", "latin1");
|
||||
expect(empty.length).toBe(0);
|
||||
|
||||
// Invalid source type
|
||||
expect(() => BufferModule.transcode(null, "utf8", "ascii")).toThrow();
|
||||
|
||||
// Invalid encoding
|
||||
expect(() => BufferModule.transcode(Buffer.from("a"), "b", "utf8")).toThrow(/U_ILLEGAL_ARGUMENT_ERROR/);
|
||||
|
||||
// Uint8Array support
|
||||
const uint8arr = new Uint8Array([...Buffer.from("hä", "latin1")]);
|
||||
expect(BufferModule.transcode(uint8arr, "latin1", "utf16le")).toEqual(Buffer.from("hä", "utf16le"));
|
||||
// This is a masqueradesAsUndefined function
|
||||
expect(() => BufferModule.transcode()).toThrow("Not implemented");
|
||||
});
|
||||
|
||||
it("Buffer.from (Node.js test/test-buffer-from.js)", () => {
|
||||
|
||||
59
test/regression/issue/18192.test.ts
Normal file
59
test/regression/issue/18192.test.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
import { expect, test } from "bun:test";
|
||||
import { tempDir } from "harness";
|
||||
import { join } from "path";
|
||||
|
||||
// Regression test for https://github.com/oven-sh/bun/issues/18192
|
||||
// .stream() on a sliced Bun.file() hangs when the underlying file is larger than 640KB.
|
||||
|
||||
test("sliced Bun.file stream works for files larger than 640KB", async () => {
|
||||
using dir = tempDir("issue-18192", {});
|
||||
const filePath = join(String(dir), "large_file");
|
||||
|
||||
// Create a file larger than 640KB (the threshold that triggered the bug)
|
||||
const size = 768 * 1024;
|
||||
await Bun.write(filePath, Buffer.alloc(size, 0x41));
|
||||
|
||||
// Streaming a slice of the large file should not hang
|
||||
const slice = Bun.file(filePath).slice(0, 1);
|
||||
const text = await Bun.readableStreamToText(slice.stream());
|
||||
expect(text.length).toBe(1);
|
||||
expect(text).toBe("A");
|
||||
});
|
||||
|
||||
test("sliced Bun.file stream works at exact 640KB boundary", async () => {
|
||||
using dir = tempDir("issue-18192", {});
|
||||
const filePath = join(String(dir), "boundary_file");
|
||||
|
||||
// 640KB + 1 byte, the smallest size that triggered the bug
|
||||
const size = 640 * 1024 + 1;
|
||||
await Bun.write(filePath, Buffer.alloc(size, 0x42));
|
||||
|
||||
const slice = Bun.file(filePath).slice(0, 10);
|
||||
const text = await Bun.readableStreamToText(slice.stream());
|
||||
expect(text.length).toBe(10);
|
||||
expect(text).toBe("B".repeat(10));
|
||||
});
|
||||
|
||||
test("sliced Bun.file stream reads correct content from middle of large file", async () => {
|
||||
using dir = tempDir("issue-18192", {});
|
||||
const filePath = join(String(dir), "content_file");
|
||||
|
||||
// Create a 1MB file with identifiable content
|
||||
const size = 1024 * 1024;
|
||||
const buf = Buffer.alloc(size);
|
||||
for (let i = 0; i < size; i++) {
|
||||
buf[i] = i % 256;
|
||||
}
|
||||
await Bun.write(filePath, buf);
|
||||
|
||||
// Read a slice from the middle
|
||||
const offset = 500_000;
|
||||
const length = 1000;
|
||||
const slice = Bun.file(filePath).slice(offset, offset + length);
|
||||
const result = new Uint8Array(await slice.arrayBuffer());
|
||||
expect(result.length).toBe(length);
|
||||
|
||||
// Also test via stream
|
||||
const streamResult = await Bun.readableStreamToArrayBuffer(slice.stream());
|
||||
expect(new Uint8Array(streamResult)).toEqual(result);
|
||||
});
|
||||
@@ -1,76 +0,0 @@
|
||||
import { expect, test } from "bun:test";
|
||||
import * as buffer from "node:buffer";
|
||||
|
||||
test("buffer.transcode is a function, not undefined", () => {
|
||||
expect(typeof buffer.transcode).toBe("function");
|
||||
});
|
||||
|
||||
test("buffer.transcode converts UTF-8 to ASCII with ? substitution", () => {
|
||||
const newBuf = buffer.transcode(Buffer.from("€"), "utf8", "ascii");
|
||||
expect(newBuf.toString("ascii")).toBe("?");
|
||||
});
|
||||
|
||||
test("buffer.transcode converts UTF-8 to Latin-1 with ? substitution", () => {
|
||||
const orig = Buffer.from("těst ☕", "utf8");
|
||||
const dest = buffer.transcode(orig, "utf8", "latin1");
|
||||
// ě (U+011B) fits in latin1 → 0x3F because it's > 0xFF? No.
|
||||
// Actually ě is U+011B which is > 0xFF, so it becomes '?' (0x3F)
|
||||
// ☕ is U+2615, also > 0xFF, so '?' (0x3F)
|
||||
expect(Array.from(dest)).toEqual([0x74, 0x3f, 0x73, 0x74, 0x20, 0x3f]);
|
||||
});
|
||||
|
||||
test("buffer.transcode converts UTF-8 to UCS-2", () => {
|
||||
const orig = Buffer.from("těst ☕", "utf8");
|
||||
const dest = buffer.transcode(orig, "utf8", "ucs2");
|
||||
expect(Array.from(dest)).toEqual([0x74, 0x00, 0x1b, 0x01, 0x73, 0x00, 0x74, 0x00, 0x20, 0x00, 0x15, 0x26]);
|
||||
});
|
||||
|
||||
test("buffer.transcode round-trips UCS-2 to UTF-8", () => {
|
||||
const orig = Buffer.from("těst ☕", "utf8");
|
||||
const ucs2 = buffer.transcode(orig, "utf8", "ucs2");
|
||||
const back = buffer.transcode(Buffer.from(ucs2), "ucs2", "utf8");
|
||||
expect(back.toString()).toBe(orig.toString());
|
||||
});
|
||||
|
||||
test("buffer.transcode handles large data", () => {
|
||||
const repeated = Buffer.alloc(4000 * Buffer.byteLength("€"), "€").toString();
|
||||
const utf8 = Buffer.from(repeated, "utf8");
|
||||
const ucs2 = Buffer.from(repeated, "ucs2");
|
||||
const utf8_to_ucs2 = buffer.transcode(utf8, "utf8", "ucs2");
|
||||
const ucs2_to_utf8 = buffer.transcode(ucs2, "ucs2", "utf8");
|
||||
expect(Buffer.compare(utf8, ucs2_to_utf8)).toBe(0);
|
||||
expect(Buffer.compare(ucs2, utf8_to_ucs2)).toBe(0);
|
||||
});
|
||||
|
||||
test("buffer.transcode throws on invalid source type", () => {
|
||||
expect(() => buffer.transcode(null as any, "utf8", "ascii")).toThrow();
|
||||
});
|
||||
|
||||
test("buffer.transcode throws on unsupported encoding", () => {
|
||||
expect(() => buffer.transcode(Buffer.from("a"), "b" as any, "utf8")).toThrow(/U_ILLEGAL_ARGUMENT_ERROR/);
|
||||
expect(() => buffer.transcode(Buffer.from("a"), "uf8" as any, "b" as any)).toThrow(/U_ILLEGAL_ARGUMENT_ERROR/);
|
||||
});
|
||||
|
||||
test("buffer.transcode ASCII/Latin-1 to UTF-16LE", () => {
|
||||
expect(buffer.transcode(Buffer.from("hi", "ascii"), "ascii", "utf16le")).toEqual(Buffer.from("hi", "utf16le"));
|
||||
expect(buffer.transcode(Buffer.from("hi", "latin1"), "latin1", "utf16le")).toEqual(Buffer.from("hi", "utf16le"));
|
||||
expect(buffer.transcode(Buffer.from("hä", "latin1"), "latin1", "utf16le")).toEqual(Buffer.from("hä", "utf16le"));
|
||||
});
|
||||
|
||||
test("buffer.transcode accepts Uint8Array", () => {
|
||||
const uint8array = new Uint8Array([...Buffer.from("hä", "latin1")]);
|
||||
expect(buffer.transcode(uint8array, "latin1", "utf16le")).toEqual(Buffer.from("hä", "utf16le"));
|
||||
});
|
||||
|
||||
test("buffer.transcode empty input", () => {
|
||||
const dest = buffer.transcode(new Uint8Array(), "utf8", "latin1");
|
||||
expect(dest.length).toBe(0);
|
||||
});
|
||||
|
||||
test("buffer.transcode doesn't crash with allocUnsafeSlow", () => {
|
||||
// Test that allocUnsafeSlow buffers work with transcode
|
||||
const slowBuf = buffer.Buffer.allocUnsafeSlow(4);
|
||||
slowBuf.write("hi", "utf16le");
|
||||
const result = buffer.transcode(slowBuf, "utf16le", "utf8");
|
||||
expect(result.toString()).toBe("hi");
|
||||
});
|
||||
Reference in New Issue
Block a user