aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2022-06-03 18:49:12 -0700
committerGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2022-06-03 18:49:12 -0700
commit9f640ffb51dc216e78af6ea5fa0eb8bc782e446b (patch)
tree19279f2f1b0d12ec3f2df651807201a76285cfd7 /src
parentaf6859acc27265e5a0cbb3107953547c74de281b (diff)
downloadbun-9f640ffb51dc216e78af6ea5fa0eb8bc782e446b.tar.gz
bun-9f640ffb51dc216e78af6ea5fa0eb8bc782e446b.tar.zst
bun-9f640ffb51dc216e78af6ea5fa0eb8bc782e446b.zip
impl #1
Diffstat (limited to 'src')
-rw-r--r--src/javascript/jsc/api/bun.zig39
-rw-r--r--src/string_immutable.zig224
2 files changed, 263 insertions, 0 deletions
diff --git a/src/javascript/jsc/api/bun.zig b/src/javascript/jsc/api/bun.zig
index 8bbcccfb5..1ee9cb96d 100644
--- a/src/javascript/jsc/api/bun.zig
+++ b/src/javascript/jsc/api/bun.zig
@@ -1150,6 +1150,9 @@ pub const Class = NewClass(
.inflateSync = .{
.rfn = JSC.wrapWithHasContainer(JSZlib, "inflateSync", false, false, true),
},
+ .escapeHTML = .{
+ .rfn = Bun.escapeHTML,
+ },
},
.{
.main = .{
@@ -1612,6 +1615,42 @@ pub fn serve(
unreachable;
}
+pub fn escapeHTML(
+ _: void,
+ ctx: js.JSContextRef,
+ _: js.JSObjectRef,
+ _: js.JSObjectRef,
+ arguments: []const js.JSValueRef,
+ exception: js.ExceptionRef,
+) js.JSValueRef {
+ if (arguments.len < 1) {
+ return ZigString.init("").toValue(ctx).asObjectRef();
+ }
+
+ const input_value = arguments[0].?.value();
+ const zig_str = input_value.getZigString(ctx);
+ if (zig_str.is16Bit()) {
+ return input_value.asObjectRef();
+ } else {
+ var input_slice = zig_str.slice();
+ var escaped_html = strings.escapeHTMLForLatin1Input(ctx.bunVM().allocator, input_slice) catch {
+ JSC.JSError(undefined, "Out of memory", .{}, ctx, exception);
+ return null;
+ };
+
+ if (escaped_html.ptr == input_slice.ptr and escaped_html.len == input_slice.len) {
+ return input_value.asObjectRef();
+ }
+
+ if (input_slice.len == 1) {
+ // single character escaped strings are statically allocated
+ return ZigString.init(escaped_html).toValue(ctx).asObjectRef();
+ }
+
+ return ZigString.init(escaped_html).toExternalValue(ctx).asObjectRef();
+ }
+}
+
pub fn allocUnsafe(
_: void,
ctx: js.JSContextRef,
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 094d63f91..367e6300d 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1316,6 +1316,230 @@ pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize {
return count;
}
+pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) ![]const u8 {
+ switch (latin1.len) {
+ 0 => return "",
+ 1 => return switch (latin1[0]) {
+ '"' => "&quot;",
+ '&' => "&amp;",
+ '\'' => "&#x27;",
+ '<' => "&lt;",
+ '>' => "&gt;",
+ else => latin1,
+ },
+ else => {
+ var remaining = latin1;
+
+ const vec_chars = "\"&'<>";
+ const vecs: [vec_chars.len]AsciiVector = comptime brk: {
+ var _vecs: [vec_chars.len]AsciiVector = undefined;
+ for (vec_chars) |c, i| {
+ _vecs[i] = @splat(ascii_vector_size, c);
+ }
+ break :brk _vecs;
+ };
+
+ var buf: std.ArrayList(u8) = undefined;
+ var any_needs_escape = false;
+
+ if (comptime Environment.isAarch64 or Environment.isX64) {
+
+ // pass #1: scan for any characters that need escaping
+ // assume most strings won't need any escaping, so don't actually allocate the buffer
+ scan_and_allocate_lazily: while (remaining.len >= ascii_vector_size) {
+ if (comptime Environment.allow_assert) {
+ std.debug.assert(!any_needs_escape);
+ }
+
+ const vec: AsciiVector = remaining[0..ascii_vector_size].*;
+ if (@reduce(
+ .Or,
+ @bitCast(AsciiVectorU1, (vec == vecs[0])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[1])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[2])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[3])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[4])),
+ ) == 1) {
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+ const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ buf.items.len = copy_len;
+ any_needs_escape = true;
+ comptime var i: usize = 0;
+ inline while (i < ascii_vector_size) : (i += 1) {
+ switch (vec[i]) {
+ '"' => {
+ buf.appendSlice("&quot;") catch unreachable;
+ },
+ '&' => {
+ buf.appendSlice("&amp;") catch unreachable;
+ },
+ '\'' => {
+ buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
+ },
+ '<' => {
+ buf.appendSlice("&lt;") catch unreachable;
+ },
+ '>' => {
+ buf.appendSlice("&gt;") catch unreachable;
+ },
+ else => |c| {
+ buf.appendAssumeCapacity(c);
+ },
+ }
+ }
+ remaining = remaining[ascii_vector_size..];
+ break :scan_and_allocate_lazily;
+ }
+
+ remaining = remaining[ascii_vector_size..];
+ }
+
+ if (any_needs_escape) {
+ // pass #2: we found something that needed an escape
+ // so we'll go ahead and copy the buffer into a new buffer
+ while (remaining.len >= ascii_vector_size) {
+ const vec: AsciiVector = remaining[0..ascii_vector_size].*;
+ if (@reduce(
+ .Or,
+ @bitCast(AsciiVectorU1, (vec == vecs[0])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[1])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[2])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[3])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[4])),
+ ) == 1) {
+ buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable;
+ comptime var i: usize = 0;
+ inline while (i < ascii_vector_size) : (i += 1) {
+ switch (vec[i]) {
+ '"' => {
+ buf.appendSlice("&quot;") catch unreachable;
+ },
+ '&' => {
+ buf.appendSlice("&amp;") catch unreachable;
+ },
+ '\'' => {
+ buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
+ },
+ '<' => {
+ buf.appendSlice("&lt;") catch unreachable;
+ },
+ '>' => {
+ buf.appendSlice("&gt;") catch unreachable;
+ },
+ else => |c| {
+ buf.append(c) catch unreachable;
+ },
+ }
+ }
+
+ remaining = remaining[ascii_vector_size..];
+ continue;
+ }
+
+ try buf.ensureUnusedCapacity(ascii_vector_size);
+ buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*;
+ buf.items.len += ascii_vector_size;
+ remaining = remaining[ascii_vector_size..];
+ }
+ }
+ }
+
+ if (!any_needs_escape) {
+ scan_and_allocate_lazily: while (remaining.len > 0) {
+ switch (remaining[0]) {
+ '"' => {
+ const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ buf.items.len = copy_len;
+ buf.appendSlice("&quot;") catch unreachable;
+ remaining = remaining[1..];
+ any_needs_escape = true;
+ break :scan_and_allocate_lazily;
+ },
+ '&' => {
+ const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ buf.items.len = copy_len;
+ buf.appendSlice("&amp;") catch unreachable;
+ remaining = remaining[1..];
+ any_needs_escape = true;
+ break :scan_and_allocate_lazily;
+ },
+ '\'' => {
+ const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ buf.items.len = copy_len;
+ buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
+ remaining = remaining[1..];
+ any_needs_escape = true;
+ break :scan_and_allocate_lazily;
+ },
+ '<' => {
+ const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ buf.items.len = copy_len;
+ buf.appendSlice("&lt;") catch unreachable;
+ remaining = remaining[1..];
+ any_needs_escape = true;
+ break :scan_and_allocate_lazily;
+ },
+ '>' => {
+ const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ buf.items.len = copy_len;
+ buf.appendSlice("&gt;") catch unreachable;
+ remaining = remaining[1..];
+ any_needs_escape = true;
+ break :scan_and_allocate_lazily;
+ },
+ else => {
+ remaining = remaining[1..];
+ },
+ }
+ }
+ }
+
+ if (remaining.len > 0) {
+ std.debug.assert(any_needs_escape);
+ for (remaining) |c| {
+ switch (c) {
+ '"' => {
+ buf.appendSlice("&quot;") catch unreachable;
+ },
+ '&' => {
+ buf.appendSlice("&amp;") catch unreachable;
+ },
+ '\'' => {
+ buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
+ },
+ '<' => {
+ buf.appendSlice("&lt;") catch unreachable;
+ },
+ '>' => {
+ buf.appendSlice("&gt;") catch unreachable;
+ },
+ else => {
+ buf.append(c) catch unreachable;
+ },
+ }
+ }
+ }
+
+ if (any_needs_escape) {
+ return buf.toOwnedSlice();
+ } else {
+ return latin1;
+ }
+ },
+ }
+}
+
test "copyLatin1IntoUTF8" {
var input: string = "hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!";
var output = std.mem.zeroes([500]u8);