Compare commits

...

4 Commits

Author SHA1 Message Date
Jarred-Sumner
16d53247cb bun run clang-format 2025-07-05 13:16:12 +00:00
Jarred-Sumner
353cb3e918 bun run prettier 2025-07-05 13:15:15 +00:00
Jarred-Sumner
affcac54dd bun scripts/glob-sources.mjs 2025-07-05 13:13:33 +00:00
Jarred Sumner
697eeb12f3 feat: add HTML entity decoder 2025-07-05 06:12:22 -07:00
5 changed files with 145 additions and 0 deletions

View File

@@ -50,6 +50,7 @@ src/bun.js/bindings/ExposeNodeModuleGlobals.cpp
src/bun.js/bindings/ffi.cpp
src/bun.js/bindings/helpers.cpp
src/bun.js/bindings/highway_strings.cpp
src/bun.js/bindings/HTMLEntityDecoder.cpp
src/bun.js/bindings/HTMLEntryPoint.cpp
src/bun.js/bindings/ImportMetaObject.cpp
src/bun.js/bindings/inlines.cpp

View File

@@ -33,6 +33,7 @@
#include "wtf/Compiler.h"
#include "PathInlines.h"
#include "wtf/text/ASCIILiteral.h"
#include <wtf/text/StringBuilder.h>
#include "BunObject+exports.h"
#include "ErrorCode.h"
#include "GeneratedBunObject.h"
@@ -489,6 +490,62 @@ JSC_DEFINE_HOST_FUNCTION(functionBunEscapeHTML, (JSC::JSGlobalObject * lexicalGl
}
}
extern "C" bool Bun__decodeEntity(const BunString* in, BunString* out);
JSC_DEFINE_HOST_FUNCTION(functionBunDecodeHTMLEntity,
(JSGlobalObject * lexicalGlobalObject, CallFrame* callFrame))
{
auto& vm = JSC::getVM(lexicalGlobalObject);
auto scope = DECLARE_THROW_SCOPE(vm);
JSValue argument = callFrame->argument(0);
auto string = argument.toWTFString(lexicalGlobalObject);
RETURN_IF_EXCEPTION(scope, {});
WTF::StringBuilder builder;
builder.reserveCapacity(string.length());
size_t index = 0;
while (true) {
size_t amp = string.find('&', index);
if (amp == WTF::notFound) {
builder.append(string.substring(index));
break;
}
builder.append(string.substring(index, amp - index));
size_t semi = string.find(';', amp + 1);
if (semi == WTF::notFound) {
builder.append(string.substring(amp));
break;
}
size_t len = semi - amp - 1;
if (len == 0) {
builder.append(string.substring(amp, semi - amp + 1));
index = semi + 1;
continue;
}
BunString bunIn;
if (string.is8Bit()) {
auto span = string.span8().subspan(amp + 1, len);
bunIn = BunString__fromLatin1(reinterpret_cast<const char*>(span.data()), span.size());
} else {
auto span = string.span16().subspan(amp + 1, len);
bunIn = BunString__fromUTF16(span.data(), span.size());
}
BunString bunOut;
bool ok = Bun__decodeEntity(&bunIn, &bunOut);
if (ok) {
builder.append(bunOut.toWTFString(BunString::NonNull));
} else {
builder.append(string.substring(amp, semi - amp + 1));
}
index = semi + 1;
}
return JSValue::encode(jsString(vm, builder.toString()));
}
JSC_DEFINE_HOST_FUNCTION(functionBunDeepEquals, (JSGlobalObject * globalObject, JSC::CallFrame* callFrame))
{
auto* global = reinterpret_cast<GlobalObject*>(globalObject);
@@ -727,6 +784,7 @@ JSC_DEFINE_HOST_FUNCTION(functionFileURLToPath, (JSC::JSGlobalObject * globalObj
color BunObject_callback_color DontDelete|Function 2
deepEquals functionBunDeepEquals DontDelete|Function 2
deepMatch functionBunDeepMatch DontDelete|Function 2
decodeHTMLEntity functionBunDecodeHTMLEntity DontDelete|Function 1
deflateSync BunObject_callback_deflateSync DontDelete|Function 1
dns constructDNSObject ReadOnly|DontDelete|PropertyCallback
enableANSIColors BunObject_getter_wrap_enableANSIColors DontDelete|PropertyCallback

View File

@@ -0,0 +1,61 @@
#include "root.h"
#include <wtf/text/WTFString.h>
#include <wtf/text/StringBuilder.h>
extern "C" bool Bun__decodeEntity(const BunString* in, BunString* out);
namespace Bun {
using namespace JSC;
using namespace WTF;
JSC_DEFINE_HOST_FUNCTION(jsFunctionDecodeHTMLEntity, (JSGlobalObject * globalObject, CallFrame* callFrame))
{
auto& vm = globalObject->vm();
auto scope = DECLARE_THROW_SCOPE(vm);
JSValue arg = callFrame->argument(0);
String input = arg.toWTFString(globalObject);
RETURN_IF_EXCEPTION(scope, {});
StringBuilder builder;
builder.reserveCapacity(input.length());
size_t index = 0;
while (true) {
size_t amp = input.find('&', index);
if (amp == WTF::notFound) {
builder.append(input.substring(index));
break;
}
builder.append(input.substring(index, amp - index));
size_t semi = input.find(';', amp + 1);
if (semi == WTF::notFound) {
builder.append(input.substring(amp));
break;
}
size_t len = semi - amp - 1;
if (len == 0) {
builder.append(input.substring(amp, semi - amp + 1));
index = semi + 1;
continue;
}
BunString bunIn;
if (input.is8Bit()) {
auto span = input.span8().subspan(amp + 1, len);
bunIn = BunString__fromLatin1(reinterpret_cast<const char*>(span.data()), span.size());
} else {
auto span = input.span16().subspan(amp + 1, len);
bunIn = BunString__fromUTF16(span.data(), span.size());
}
BunString bunOut;
bool ok = Bun__decodeEntity(&bunIn, &bunOut);
if (ok) {
builder.append(bunOut.toWTFString(BunString::NonNull));
} else {
builder.append(input.substring(amp, semi - amp + 1));
}
index = semi + 1;
}
return JSValue::encode(jsString(vm, builder.toString()));
}
} // namespace Bun

View File

@@ -194,6 +194,18 @@ pub const Keywords = ComptimeStringMap(T, .{
.{ "with", .t_with },
});
export fn Bun__decodeEntity(input: *bun.String, out: *bun.String) bool {
const utf8 = input.toUTF8(bun.default_allocator);
defer utf8.deinit();
if (jsxEntity.get(utf8.slice())) |cp| {
var buf: [4]u8 = undefined;
const len = std.unicode.utf8Encode(@as(u21, @intCast(cp)), &buf) catch unreachable;
out.* = bun.String.createUTF8(buf[0..len]);
return true;
}
return false;
}
pub const StrictModeReservedWords = ComptimeStringMap(void, .{
.{ "implements", {} },
.{ "interface", {} },

View File

@@ -0,0 +1,13 @@
import { describe, expect, it } from "bun:test";
describe("decodeHTMLEntity", () => {
it("decodes named entities", () => {
expect(Bun.decodeHTMLEntity("&amp;")).toBe("&");
expect(Bun.decodeHTMLEntity("Tom &amp; Jerry")).toBe("Tom & Jerry");
expect(Bun.decodeHTMLEntity("&lt;div&gt;")).toBe("<div>");
});
it("returns input when entity unknown", () => {
expect(Bun.decodeHTMLEntity("&notanentity;")).toBe("&notanentity;");
});
});