mirror of
https://github.com/oven-sh/bun
synced 2026-02-16 22:01:47 +00:00
Fix bun.String.toOwnedSliceReturningAllASCII (#23925)
`bun.String.toOwnedSliceReturningAllASCII` is supposed to return a boolean indicating whether or not the string is entirely composed of ASCII characters. However, the current implementation frequently produces incorrect results: * If the string is a `ZigString`, it always returns true, even though `ZigString`s can be UTF-16 or Latin-1. * If the string is a `StaticZigString`, it always returns false, even though `StaticZigStrings` can be all ASCII. * If the string is a 16-bit `WTFStringImpl`, it always returns false, even though 16-bit `WTFString`s can be all ASCII. * If the string is empty, it always returns false, even though empty strings are valid ASCII strings. `toOwnedSliceReturningAllASCII` is currently used in two places, both of which assume its answer is accurate: * `bun.webcore.Blob.fromJSWithoutDeferGC` * `bun.api.ServerConfig.fromJS` (For internal tracking: fixes ENG-21249)
This commit is contained in:
@@ -74,27 +74,48 @@ pub const String = extern struct {
|
||||
return BunString__transferToJS(this, globalThis);
|
||||
}
|
||||
|
||||
pub fn toOwnedSlice(this: String, allocator: std.mem.Allocator) ![]u8 {
|
||||
const bytes, _ = try this.toOwnedSliceReturningAllASCII(allocator);
|
||||
pub fn toOwnedSlice(this: String, allocator: std.mem.Allocator) OOM![]u8 {
|
||||
const bytes, _ = try this.toOwnedSliceImpl(allocator);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/// Returns `.{ utf8_bytes, is_all_ascii }`.
|
||||
///
|
||||
/// `false` means the string contains at least one non-ASCII character.
|
||||
pub fn toOwnedSliceReturningAllASCII(this: String, allocator: std.mem.Allocator) OOM!struct { []u8, bool } {
|
||||
switch (this.tag) {
|
||||
.ZigString => return .{ try this.value.ZigString.toOwnedSlice(allocator), true },
|
||||
.WTFStringImpl => {
|
||||
var utf8_slice = this.value.WTFStringImpl.toUTF8WithoutRef(allocator);
|
||||
if (utf8_slice.allocator.get()) |alloc| {
|
||||
if (!isWTFAllocator(alloc)) {
|
||||
return .{ @constCast(utf8_slice.slice()), false };
|
||||
}
|
||||
}
|
||||
const bytes, const ascii_status = try this.toOwnedSliceImpl(allocator);
|
||||
const is_ascii = switch (ascii_status) {
|
||||
.all_ascii => true,
|
||||
.non_ascii => false,
|
||||
.unknown => bun.strings.isAllASCII(bytes),
|
||||
};
|
||||
return .{ bytes, is_ascii };
|
||||
}
|
||||
|
||||
return .{ @constCast((try utf8_slice.cloneIfNeeded(allocator)).slice()), true };
|
||||
fn toOwnedSliceImpl(this: String, allocator: std.mem.Allocator) !struct { []u8, AsciiStatus } {
|
||||
return switch (this.tag) {
|
||||
.ZigString => .{ try this.value.ZigString.toOwnedSlice(allocator), .unknown },
|
||||
.WTFStringImpl => blk: {
|
||||
const utf8_slice = this.value.WTFStringImpl.toUTF8WithoutRef(allocator);
|
||||
// `utf8_slice.allocator` is either null, or `allocator`.
|
||||
errdefer utf8_slice.deinit();
|
||||
|
||||
const ascii_status: AsciiStatus = if (utf8_slice.allocator.isNull())
|
||||
.all_ascii // no allocation means the string was 8-bit and all ascii
|
||||
else if (this.value.WTFStringImpl.is8Bit())
|
||||
.non_ascii // otherwise the allocator would be null for an 8-bit string
|
||||
else
|
||||
.unknown; // string was 16-bit; may or may not be all ascii
|
||||
|
||||
const owned_slice = try utf8_slice.cloneIfNeeded(allocator);
|
||||
// `owned_slice.allocator` is guaranteed to be `allocator`.
|
||||
break :blk .{ owned_slice.mut(), ascii_status };
|
||||
},
|
||||
.StaticZigString => return .{ try this.value.StaticZigString.toOwnedSlice(allocator), false },
|
||||
else => return .{ &[_]u8{}, false },
|
||||
}
|
||||
.StaticZigString => .{
|
||||
try this.value.StaticZigString.toOwnedSlice(allocator), .unknown,
|
||||
},
|
||||
else => return .{ &.{}, .all_ascii }, // trivially all ascii
|
||||
};
|
||||
}
|
||||
|
||||
pub fn createIfDifferent(other: String, utf8_slice: []const u8) String {
|
||||
@@ -1237,6 +1258,7 @@ const std = @import("std");
|
||||
const bun = @import("bun");
|
||||
const JSError = bun.JSError;
|
||||
const OOM = bun.OOM;
|
||||
const AsciiStatus = bun.strings.AsciiStatus;
|
||||
|
||||
const jsc = bun.jsc;
|
||||
const JSValue = bun.jsc.JSValue;
|
||||
|
||||
Reference in New Issue
Block a user