mirror of
https://github.com/oven-sh/bun
synced 2026-02-13 12:29:07 +00:00
impl #1
This commit is contained in:
86
bench/snippets/escapeHTML.js
Normal file
86
bench/snippets/escapeHTML.js
Normal file
@@ -0,0 +1,86 @@
|
||||
import { group } from "mitata";
|
||||
import { bench, run } from "mitata";
|
||||
|
||||
var bunEscapeHTML = Bun.escapeHTML;
|
||||
|
||||
const matchHtmlRegExp = /["'&<>]/;
|
||||
|
||||
/**
|
||||
* Escapes special characters and HTML entities in a given html string.
|
||||
*
|
||||
* @param {string} string HTML string to escape for later insertion
|
||||
* @return {string}
|
||||
* @public
|
||||
*/
|
||||
|
||||
function reactEscapeHtml(string) {
|
||||
const str = "" + string;
|
||||
const match = matchHtmlRegExp.exec(str);
|
||||
|
||||
if (!match) {
|
||||
return str;
|
||||
}
|
||||
|
||||
let escape;
|
||||
let html = "";
|
||||
let index;
|
||||
let lastIndex = 0;
|
||||
|
||||
for (index = match.index; index < str.length; index++) {
|
||||
switch (str.charCodeAt(index)) {
|
||||
case 34: // "
|
||||
escape = """;
|
||||
break;
|
||||
case 38: // &
|
||||
escape = "&";
|
||||
break;
|
||||
case 39: // '
|
||||
escape = "'"; // modified from escape-html; used to be '''
|
||||
break;
|
||||
case 60: // <
|
||||
escape = "<";
|
||||
break;
|
||||
case 62: // >
|
||||
escape = ">";
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (lastIndex !== index) {
|
||||
html += str.substring(lastIndex, index);
|
||||
}
|
||||
|
||||
lastIndex = index + 1;
|
||||
html += escape;
|
||||
}
|
||||
|
||||
return lastIndex !== index ? html + str.substring(lastIndex, index) : html;
|
||||
}
|
||||
|
||||
const long = ("lalala" + "<script>alert(1)</script>" + "lalala").repeat(9000);
|
||||
const short = "lalala" + "<script>alert(1)</script>" + "lalala";
|
||||
const middle =
|
||||
"lalala".repeat(2000) + "<script>alert(1)</script>" + "lalala".repeat(2000);
|
||||
const nothing = "lalala".repeat(9999);
|
||||
group(`long (${long.length})`, () => {
|
||||
bench("react's escapeHTML", () => reactEscapeHtml(long));
|
||||
bench("bun's escapeHTML", () => bunEscapeHTML(long));
|
||||
});
|
||||
|
||||
group(`short (${short.length})`, () => {
|
||||
bench("react's escapeHTML", () => reactEscapeHtml(short));
|
||||
bench("bun's escapeHTML", () => bunEscapeHTML(short));
|
||||
});
|
||||
|
||||
group(`middle (${middle.length})`, () => {
|
||||
bench("react's escapeHTML", () => reactEscapeHtml(middle));
|
||||
bench("bun's escapeHTML", () => bunEscapeHTML(middle));
|
||||
});
|
||||
|
||||
group(`nothing (${nothing.length})`, () => {
|
||||
bench("react's escapeHTML", () => reactEscapeHtml(nothing));
|
||||
bench("bun's escapeHTML", () => bunEscapeHTML(nothing));
|
||||
});
|
||||
|
||||
await run();
|
||||
54
integration/bunjs-only-snippets/escapeHTML.test.js
Normal file
54
integration/bunjs-only-snippets/escapeHTML.test.js
Normal file
@@ -0,0 +1,54 @@
|
||||
import { describe, it, expect } from "bun:test";
|
||||
import { gcTick } from "./gc";
|
||||
|
||||
describe("Bun.escapeHTML", () => {
|
||||
it("works", () => {
|
||||
expect(Bun.escapeHTML("<script>alert(1)</script>")).toBe(
|
||||
"<script>alert(1)</script>"
|
||||
);
|
||||
expect(Bun.escapeHTML("<")).toBe("<");
|
||||
expect(Bun.escapeHTML(">")).toBe(">");
|
||||
expect(Bun.escapeHTML("&")).toBe("&");
|
||||
expect(Bun.escapeHTML("'")).toBe("'");
|
||||
expect(Bun.escapeHTML('"')).toBe(""");
|
||||
expect(Bun.escapeHTML("\n")).toBe("\n");
|
||||
expect(Bun.escapeHTML("\r")).toBe("\r");
|
||||
expect(Bun.escapeHTML("\t")).toBe("\t");
|
||||
expect(Bun.escapeHTML("\f")).toBe("\f");
|
||||
expect(Bun.escapeHTML("\v")).toBe("\v");
|
||||
expect(Bun.escapeHTML("\b")).toBe("\b");
|
||||
expect(Bun.escapeHTML("\u00A0")).toBe("\u00A0");
|
||||
|
||||
// The matrix of cases we need to test for:
|
||||
// 1. Works with short strings
|
||||
// 2. Works with long strings
|
||||
// 3. Works with latin1 strings
|
||||
// 4. Works with utf16 strings
|
||||
// 5. Works when the text to escape is somewhere in the middle
|
||||
// 6. Works when the text to escape is in the beginning
|
||||
// 7. Works when the text to escape is in the end
|
||||
// 8. Returns the same string when there's no need to escape
|
||||
expect(
|
||||
Bun.escapeHTML("lalala" + "<script>alert(1)</script>" + "lalala")
|
||||
).toBe("lalala<script>alert(1)</script>lalala");
|
||||
|
||||
expect(Bun.escapeHTML("<script>alert(1)</script>" + "lalala")).toBe(
|
||||
"<script>alert(1)</script>lalala"
|
||||
);
|
||||
expect(Bun.escapeHTML("lalala" + "<script>alert(1)</script>")).toBe(
|
||||
"lalala" + "<script>alert(1)</script>"
|
||||
);
|
||||
|
||||
expect(
|
||||
Bun.escapeHTML(
|
||||
("lalala" + "<script>alert(1)</script>" + "lalala").repeat(900)
|
||||
)
|
||||
).toBe("lalala<script>alert(1)</script>lalala".repeat(900));
|
||||
expect(
|
||||
Bun.escapeHTML(("<script>alert(1)</script>" + "lalala").repeat(900))
|
||||
).toBe("<script>alert(1)</script>lalala".repeat(900));
|
||||
expect(
|
||||
Bun.escapeHTML(("lalala" + "<script>alert(1)</script>").repeat(900))
|
||||
).toBe(("lalala" + "<script>alert(1)</script>").repeat(900));
|
||||
});
|
||||
});
|
||||
@@ -1150,6 +1150,9 @@ pub const Class = NewClass(
|
||||
.inflateSync = .{
|
||||
.rfn = JSC.wrapWithHasContainer(JSZlib, "inflateSync", false, false, true),
|
||||
},
|
||||
.escapeHTML = .{
|
||||
.rfn = Bun.escapeHTML,
|
||||
},
|
||||
},
|
||||
.{
|
||||
.main = .{
|
||||
@@ -1612,6 +1615,42 @@ pub fn serve(
|
||||
unreachable;
|
||||
}
|
||||
|
||||
pub fn escapeHTML(
|
||||
_: void,
|
||||
ctx: js.JSContextRef,
|
||||
_: js.JSObjectRef,
|
||||
_: js.JSObjectRef,
|
||||
arguments: []const js.JSValueRef,
|
||||
exception: js.ExceptionRef,
|
||||
) js.JSValueRef {
|
||||
if (arguments.len < 1) {
|
||||
return ZigString.init("").toValue(ctx).asObjectRef();
|
||||
}
|
||||
|
||||
const input_value = arguments[0].?.value();
|
||||
const zig_str = input_value.getZigString(ctx);
|
||||
if (zig_str.is16Bit()) {
|
||||
return input_value.asObjectRef();
|
||||
} else {
|
||||
var input_slice = zig_str.slice();
|
||||
var escaped_html = strings.escapeHTMLForLatin1Input(ctx.bunVM().allocator, input_slice) catch {
|
||||
JSC.JSError(undefined, "Out of memory", .{}, ctx, exception);
|
||||
return null;
|
||||
};
|
||||
|
||||
if (escaped_html.ptr == input_slice.ptr and escaped_html.len == input_slice.len) {
|
||||
return input_value.asObjectRef();
|
||||
}
|
||||
|
||||
if (input_slice.len == 1) {
|
||||
// single character escaped strings are statically allocated
|
||||
return ZigString.init(escaped_html).toValue(ctx).asObjectRef();
|
||||
}
|
||||
|
||||
return ZigString.init(escaped_html).toExternalValue(ctx).asObjectRef();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn allocUnsafe(
|
||||
_: void,
|
||||
ctx: js.JSContextRef,
|
||||
|
||||
@@ -1316,6 +1316,230 @@ pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize {
|
||||
return count;
|
||||
}
|
||||
|
||||
pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) ![]const u8 {
|
||||
switch (latin1.len) {
|
||||
0 => return "",
|
||||
1 => return switch (latin1[0]) {
|
||||
'"' => """,
|
||||
'&' => "&",
|
||||
'\'' => "'",
|
||||
'<' => "<",
|
||||
'>' => ">",
|
||||
else => latin1,
|
||||
},
|
||||
else => {
|
||||
var remaining = latin1;
|
||||
|
||||
const vec_chars = "\"&'<>";
|
||||
const vecs: [vec_chars.len]AsciiVector = comptime brk: {
|
||||
var _vecs: [vec_chars.len]AsciiVector = undefined;
|
||||
for (vec_chars) |c, i| {
|
||||
_vecs[i] = @splat(ascii_vector_size, c);
|
||||
}
|
||||
break :brk _vecs;
|
||||
};
|
||||
|
||||
var buf: std.ArrayList(u8) = undefined;
|
||||
var any_needs_escape = false;
|
||||
|
||||
if (comptime Environment.isAarch64 or Environment.isX64) {
|
||||
|
||||
// pass #1: scan for any characters that need escaping
|
||||
// assume most strings won't need any escaping, so don't actually allocate the buffer
|
||||
scan_and_allocate_lazily: while (remaining.len >= ascii_vector_size) {
|
||||
if (comptime Environment.allow_assert) {
|
||||
std.debug.assert(!any_needs_escape);
|
||||
}
|
||||
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
if (@reduce(
|
||||
.Or,
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[0])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[1])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[2])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[3])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[4])),
|
||||
) == 1) {
|
||||
buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
|
||||
const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
|
||||
@memcpy(buf.items.ptr, latin1.ptr, copy_len);
|
||||
buf.items.len = copy_len;
|
||||
any_needs_escape = true;
|
||||
comptime var i: usize = 0;
|
||||
inline while (i < ascii_vector_size) : (i += 1) {
|
||||
switch (vec[i]) {
|
||||
'"' => {
|
||||
buf.appendSlice(""") catch unreachable;
|
||||
},
|
||||
'&' => {
|
||||
buf.appendSlice("&") catch unreachable;
|
||||
},
|
||||
'\'' => {
|
||||
buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be '''
|
||||
},
|
||||
'<' => {
|
||||
buf.appendSlice("<") catch unreachable;
|
||||
},
|
||||
'>' => {
|
||||
buf.appendSlice(">") catch unreachable;
|
||||
},
|
||||
else => |c| {
|
||||
buf.appendAssumeCapacity(c);
|
||||
},
|
||||
}
|
||||
}
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
break :scan_and_allocate_lazily;
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
}
|
||||
|
||||
if (any_needs_escape) {
|
||||
// pass #2: we found something that needed an escape
|
||||
// so we'll go ahead and copy the buffer into a new buffer
|
||||
while (remaining.len >= ascii_vector_size) {
|
||||
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
|
||||
if (@reduce(
|
||||
.Or,
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[0])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[1])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[2])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[3])) |
|
||||
@bitCast(AsciiVectorU1, (vec == vecs[4])),
|
||||
) == 1) {
|
||||
buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable;
|
||||
comptime var i: usize = 0;
|
||||
inline while (i < ascii_vector_size) : (i += 1) {
|
||||
switch (vec[i]) {
|
||||
'"' => {
|
||||
buf.appendSlice(""") catch unreachable;
|
||||
},
|
||||
'&' => {
|
||||
buf.appendSlice("&") catch unreachable;
|
||||
},
|
||||
'\'' => {
|
||||
buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be '''
|
||||
},
|
||||
'<' => {
|
||||
buf.appendSlice("<") catch unreachable;
|
||||
},
|
||||
'>' => {
|
||||
buf.appendSlice(">") catch unreachable;
|
||||
},
|
||||
else => |c| {
|
||||
buf.append(c) catch unreachable;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
continue;
|
||||
}
|
||||
|
||||
try buf.ensureUnusedCapacity(ascii_vector_size);
|
||||
buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*;
|
||||
buf.items.len += ascii_vector_size;
|
||||
remaining = remaining[ascii_vector_size..];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!any_needs_escape) {
|
||||
scan_and_allocate_lazily: while (remaining.len > 0) {
|
||||
switch (remaining[0]) {
|
||||
'"' => {
|
||||
const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
|
||||
buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
|
||||
@memcpy(buf.items.ptr, latin1.ptr, copy_len);
|
||||
buf.items.len = copy_len;
|
||||
buf.appendSlice(""") catch unreachable;
|
||||
remaining = remaining[1..];
|
||||
any_needs_escape = true;
|
||||
break :scan_and_allocate_lazily;
|
||||
},
|
||||
'&' => {
|
||||
const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
|
||||
buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
|
||||
@memcpy(buf.items.ptr, latin1.ptr, copy_len);
|
||||
buf.items.len = copy_len;
|
||||
buf.appendSlice("&") catch unreachable;
|
||||
remaining = remaining[1..];
|
||||
any_needs_escape = true;
|
||||
break :scan_and_allocate_lazily;
|
||||
},
|
||||
'\'' => {
|
||||
const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
|
||||
buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
|
||||
@memcpy(buf.items.ptr, latin1.ptr, copy_len);
|
||||
buf.items.len = copy_len;
|
||||
buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be '''
|
||||
remaining = remaining[1..];
|
||||
any_needs_escape = true;
|
||||
break :scan_and_allocate_lazily;
|
||||
},
|
||||
'<' => {
|
||||
const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
|
||||
buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
|
||||
@memcpy(buf.items.ptr, latin1.ptr, copy_len);
|
||||
buf.items.len = copy_len;
|
||||
buf.appendSlice("<") catch unreachable;
|
||||
remaining = remaining[1..];
|
||||
any_needs_escape = true;
|
||||
break :scan_and_allocate_lazily;
|
||||
},
|
||||
'>' => {
|
||||
const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
|
||||
buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
|
||||
@memcpy(buf.items.ptr, latin1.ptr, copy_len);
|
||||
buf.items.len = copy_len;
|
||||
buf.appendSlice(">") catch unreachable;
|
||||
remaining = remaining[1..];
|
||||
any_needs_escape = true;
|
||||
break :scan_and_allocate_lazily;
|
||||
},
|
||||
else => {
|
||||
remaining = remaining[1..];
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (remaining.len > 0) {
|
||||
std.debug.assert(any_needs_escape);
|
||||
for (remaining) |c| {
|
||||
switch (c) {
|
||||
'"' => {
|
||||
buf.appendSlice(""") catch unreachable;
|
||||
},
|
||||
'&' => {
|
||||
buf.appendSlice("&") catch unreachable;
|
||||
},
|
||||
'\'' => {
|
||||
buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be '''
|
||||
},
|
||||
'<' => {
|
||||
buf.appendSlice("<") catch unreachable;
|
||||
},
|
||||
'>' => {
|
||||
buf.appendSlice(">") catch unreachable;
|
||||
},
|
||||
else => {
|
||||
buf.append(c) catch unreachable;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (any_needs_escape) {
|
||||
return buf.toOwnedSlice();
|
||||
} else {
|
||||
return latin1;
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
test "copyLatin1IntoUTF8" {
|
||||
var input: string = "hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!";
|
||||
var output = std.mem.zeroes([500]u8);
|
||||
|
||||
Reference in New Issue
Block a user