impl #1

author: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2022-06-03 18:49:12 -0700
committer: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2022-06-03 18:49:12 -0700
commit: 9f640ffb51dc216e78af6ea5fa0eb8bc782e446b (patch)
tree: 19279f2f1b0d12ec3f2df651807201a76285cfd7 /src
parent: af6859acc27265e5a0cbb3107953547c74de281b (diff)
download: bun-9f640ffb51dc216e78af6ea5fa0eb8bc782e446b.tar.gz
bun-9f640ffb51dc216e78af6ea5fa0eb8bc782e446b.tar.zst
bun-9f640ffb51dc216e78af6ea5fa0eb8bc782e446b.zip
2 files changed, 263 insertions, 0 deletions
diff --git a/src/javascript/jsc/api/bun.zig b/src/javascript/jsc/api/bun.zig
index 8bbcccfb5..1ee9cb96d 100644
--- a/src/javascript/jsc/api/bun.zig
+++ b/src/javascript/jsc/api/bun.zig
@@ -1150,6 +1150,9 @@ pub const Class = NewClass(
         .inflateSync = .{
             .rfn = JSC.wrapWithHasContainer(JSZlib, "inflateSync", false, false, true),
         },
+        .escapeHTML = .{
+            .rfn = Bun.escapeHTML,
+        },
     },
     .{
         .main = .{
@@ -1612,6 +1615,42 @@ pub fn serve(
     unreachable;
 }
 
+pub fn escapeHTML(
+    _: void,
+    ctx: js.JSContextRef,
+    _: js.JSObjectRef,
+    _: js.JSObjectRef,
+    arguments: []const js.JSValueRef,
+    exception: js.ExceptionRef,
+) js.JSValueRef {
+    if (arguments.len < 1) {
+        return ZigString.init("").toValue(ctx).asObjectRef();
+    }
+
+    const input_value = arguments[0].?.value();
+    const zig_str = input_value.getZigString(ctx);
+    if (zig_str.is16Bit()) {
+        return input_value.asObjectRef();
+    } else {
+        var input_slice = zig_str.slice();
+        var escaped_html = strings.escapeHTMLForLatin1Input(ctx.bunVM().allocator, input_slice) catch {
+            JSC.JSError(undefined, "Out of memory", .{}, ctx, exception);
+            return null;
+        };
+
+        if (escaped_html.ptr == input_slice.ptr and escaped_html.len == input_slice.len) {
+            return input_value.asObjectRef();
+        }
+
+        if (input_slice.len == 1) {
+            // single character escaped strings are statically allocated
+            return ZigString.init(escaped_html).toValue(ctx).asObjectRef();
+        }
+
+        return ZigString.init(escaped_html).toExternalValue(ctx).asObjectRef();
+    }
+}
+
 pub fn allocUnsafe(
     _: void,
     ctx: js.JSContextRef,
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 094d63f91..367e6300d 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1316,6 +1316,230 @@ pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize {
     return count;
 }
 
+pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) ![]const u8 {
+    switch (latin1.len) {
+        0 => return "",
+        1 => return switch (latin1[0]) {
+            '"' => "&quot;",
+            '&' => "&amp;",
+            '\'' => "&#x27;",
+            '<' => "&lt;",
+            '>' => "&gt;",
+            else => latin1,
+        },
+        else => {
+            var remaining = latin1;
+
+            const vec_chars = "\"&'<>";
+            const vecs: [vec_chars.len]AsciiVector = comptime brk: {
+                var _vecs: [vec_chars.len]AsciiVector = undefined;
+                for (vec_chars) |c, i| {
+                    _vecs[i] = @splat(ascii_vector_size, c);
+                }
+                break :brk _vecs;
+            };
+
+            var buf: std.ArrayList(u8) = undefined;
+            var any_needs_escape = false;
+
+            if (comptime Environment.isAarch64 or Environment.isX64) {
+
+                // pass #1: scan for any characters that need escaping
+                // assume most strings won't need any escaping, so don't actually allocate the buffer
+                scan_and_allocate_lazily: while (remaining.len >= ascii_vector_size) {
+                    if (comptime Environment.allow_assert) {
+                        std.debug.assert(!any_needs_escape);
+                    }
+
+                    const vec: AsciiVector = remaining[0..ascii_vector_size].*;
+                    if (@reduce(
+                        .Or,
+                        @bitCast(AsciiVectorU1, (vec == vecs[0])) |
+                            @bitCast(AsciiVectorU1, (vec == vecs[1])) |
+                            @bitCast(AsciiVectorU1, (vec == vecs[2])) |
+                            @bitCast(AsciiVectorU1, (vec == vecs[3])) |
+                            @bitCast(AsciiVectorU1, (vec == vecs[4])),
+                    ) == 1) {
+                        buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+                        const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+                        @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+                        buf.items.len = copy_len;
+                        any_needs_escape = true;
+                        comptime var i: usize = 0;
+                        inline while (i < ascii_vector_size) : (i += 1) {
+                            switch (vec[i]) {
+                                '"' => {
+                                    buf.appendSlice("&quot;") catch unreachable;
+                                },
+                                '&' => {
+                                    buf.appendSlice("&amp;") catch unreachable;
+                                },
+                                '\'' => {
+                                    buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
+                                },
+                                '<' => {
+                                    buf.appendSlice("&lt;") catch unreachable;
+                                },
+                                '>' => {
+                                    buf.appendSlice("&gt;") catch unreachable;
+                                },
+                                else => |c| {
+                                    buf.appendAssumeCapacity(c);
+                                },
+                            }
+                        }
+                        remaining = remaining[ascii_vector_size..];
+                        break :scan_and_allocate_lazily;
+                    }
+
+                    remaining = remaining[ascii_vector_size..];
+                }
+
+                if (any_needs_escape) {
+                    // pass #2: we found something that needed an escape
+                    // so we'll go ahead and copy the buffer into a new buffer
+                    while (remaining.len >= ascii_vector_size) {
+                        const vec: AsciiVector = remaining[0..ascii_vector_size].*;
+                        if (@reduce(
+                            .Or,
+                            @bitCast(AsciiVectorU1, (vec == vecs[0])) |
+                                @bitCast(AsciiVectorU1, (vec == vecs[1])) |
+                                @bitCast(AsciiVectorU1, (vec == vecs[2])) |
+                                @bitCast(AsciiVectorU1, (vec == vecs[3])) |
+                                @bitCast(AsciiVectorU1, (vec == vecs[4])),
+                        ) == 1) {
+                            buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable;
+                            comptime var i: usize = 0;
+                            inline while (i < ascii_vector_size) : (i += 1) {
+                                switch (vec[i]) {
+                                    '"' => {
+                                        buf.appendSlice("&quot;") catch unreachable;
+                                    },
+                                    '&' => {
+                                        buf.appendSlice("&amp;") catch unreachable;
+                                    },
+                                    '\'' => {
+                                        buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
+                                    },
+                                    '<' => {
+                                        buf.appendSlice("&lt;") catch unreachable;
+                                    },
+                                    '>' => {
+                                        buf.appendSlice("&gt;") catch unreachable;
+                                    },
+                                    else => |c| {
+                                        buf.append(c) catch unreachable;
+                                    },
+                                }
+                            }
+
+                            remaining = remaining[ascii_vector_size..];
+                            continue;
+                        }
+
+                        try buf.ensureUnusedCapacity(ascii_vector_size);
+                        buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*;
+                        buf.items.len += ascii_vector_size;
+                        remaining = remaining[ascii_vector_size..];
+                    }
+                }
+            }
+
+            if (!any_needs_escape) {
+                scan_and_allocate_lazily: while (remaining.len > 0) {
+                    switch (remaining[0]) {
+                        '"' => {
+                            const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+                            buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+                            @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+                            buf.items.len = copy_len;
+                            buf.appendSlice("&quot;") catch unreachable;
+                            remaining = remaining[1..];
+                            any_needs_escape = true;
+                            break :scan_and_allocate_lazily;
+                        },
+                        '&' => {
+                            const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+                            buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+                            @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+                            buf.items.len = copy_len;
+                            buf.appendSlice("&amp;") catch unreachable;
+                            remaining = remaining[1..];
+                            any_needs_escape = true;
+                            break :scan_and_allocate_lazily;
+                        },
+                        '\'' => {
+                            const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+                            buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+                            @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+                            buf.items.len = copy_len;
+                            buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
+                            remaining = remaining[1..];
+                            any_needs_escape = true;
+                            break :scan_and_allocate_lazily;
+                        },
+                        '<' => {
+                            const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+                            buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+                            @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+                            buf.items.len = copy_len;
+                            buf.appendSlice("&lt;") catch unreachable;
+                            remaining = remaining[1..];
+                            any_needs_escape = true;
+                            break :scan_and_allocate_lazily;
+                        },
+                        '>' => {
+                            const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+                            buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+                            @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+                            buf.items.len = copy_len;
+                            buf.appendSlice("&gt;") catch unreachable;
+                            remaining = remaining[1..];
+                            any_needs_escape = true;
+                            break :scan_and_allocate_lazily;
+                        },
+                        else => {
+                            remaining = remaining[1..];
+                        },
+                    }
+                }
+            }
+
+            if (remaining.len > 0) {
+                std.debug.assert(any_needs_escape);
+                for (remaining) |c| {
+                    switch (c) {
+                        '"' => {
+                            buf.appendSlice("&quot;") catch unreachable;
+                        },
+                        '&' => {
+                            buf.appendSlice("&amp;") catch unreachable;
+                        },
+                        '\'' => {
+                            buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
+                        },
+                        '<' => {
+                            buf.appendSlice("&lt;") catch unreachable;
+                        },
+                        '>' => {
+                            buf.appendSlice("&gt;") catch unreachable;
+                        },
+                        else => {
+                            buf.append(c) catch unreachable;
+                        },
+                    }
+                }
+            }
+
+            if (any_needs_escape) {
+                return buf.toOwnedSlice();
+            } else {
+                return latin1;
+            }
+        },
+    }
+}
+
 test "copyLatin1IntoUTF8" {
     var input: string = "hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!";
     var output = std.mem.zeroes([500]u8);
author	Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>	2022-06-03 18:49:12 -0700
committer	Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>	2022-06-03 18:49:12 -0700
commit	9f640ffb51dc216e78af6ea5fa0eb8bc782e446b (patch)
tree	19279f2f1b0d12ec3f2df651807201a76285cfd7 /src
parent	af6859acc27265e5a0cbb3107953547c74de281b (diff)
download	bun-9f640ffb51dc216e78af6ea5fa0eb8bc782e446b.tar.gz bun-9f640ffb51dc216e78af6ea5fa0eb8bc782e446b.tar.zst bun-9f640ffb51dc216e78af6ea5fa0eb8bc782e446b.zip