diff options
author | 2022-06-03 18:49:12 -0700 | |
---|---|---|
committer | 2022-06-03 18:49:12 -0700 | |
commit | 9f640ffb51dc216e78af6ea5fa0eb8bc782e446b (patch) | |
tree | 19279f2f1b0d12ec3f2df651807201a76285cfd7 /src | |
parent | af6859acc27265e5a0cbb3107953547c74de281b (diff) | |
download | bun-9f640ffb51dc216e78af6ea5fa0eb8bc782e446b.tar.gz bun-9f640ffb51dc216e78af6ea5fa0eb8bc782e446b.tar.zst bun-9f640ffb51dc216e78af6ea5fa0eb8bc782e446b.zip |
impl #1
Diffstat (limited to 'src')
-rw-r--r-- | src/javascript/jsc/api/bun.zig | 39 | ||||
-rw-r--r-- | src/string_immutable.zig | 224 |
2 files changed, 263 insertions, 0 deletions
diff --git a/src/javascript/jsc/api/bun.zig b/src/javascript/jsc/api/bun.zig index 8bbcccfb5..1ee9cb96d 100644 --- a/src/javascript/jsc/api/bun.zig +++ b/src/javascript/jsc/api/bun.zig @@ -1150,6 +1150,9 @@ pub const Class = NewClass( .inflateSync = .{ .rfn = JSC.wrapWithHasContainer(JSZlib, "inflateSync", false, false, true), }, + .escapeHTML = .{ + .rfn = Bun.escapeHTML, + }, }, .{ .main = .{ @@ -1612,6 +1615,42 @@ pub fn serve( unreachable; } +pub fn escapeHTML( + _: void, + ctx: js.JSContextRef, + _: js.JSObjectRef, + _: js.JSObjectRef, + arguments: []const js.JSValueRef, + exception: js.ExceptionRef, +) js.JSValueRef { + if (arguments.len < 1) { + return ZigString.init("").toValue(ctx).asObjectRef(); + } + + const input_value = arguments[0].?.value(); + const zig_str = input_value.getZigString(ctx); + if (zig_str.is16Bit()) { + return input_value.asObjectRef(); + } else { + var input_slice = zig_str.slice(); + var escaped_html = strings.escapeHTMLForLatin1Input(ctx.bunVM().allocator, input_slice) catch { + JSC.JSError(undefined, "Out of memory", .{}, ctx, exception); + return null; + }; + + if (escaped_html.ptr == input_slice.ptr and escaped_html.len == input_slice.len) { + return input_value.asObjectRef(); + } + + if (input_slice.len == 1) { + // single character escaped strings are statically allocated + return ZigString.init(escaped_html).toValue(ctx).asObjectRef(); + } + + return ZigString.init(escaped_html).toExternalValue(ctx).asObjectRef(); + } +} + pub fn allocUnsafe( _: void, ctx: js.JSContextRef, diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 094d63f91..367e6300d 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -1316,6 +1316,230 @@ pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize { return count; } +pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) ![]const u8 { + switch (latin1.len) { + 0 => return "", + 1 => return switch (latin1[0]) { + '"' => """, + '&' => "&", + '\'' => "'", + '<' => "<", + '>' => ">", + else => latin1, + }, + else => { + var remaining = latin1; + + const vec_chars = "\"&'<>"; + const vecs: [vec_chars.len]AsciiVector = comptime brk: { + var _vecs: [vec_chars.len]AsciiVector = undefined; + for (vec_chars) |c, i| { + _vecs[i] = @splat(ascii_vector_size, c); + } + break :brk _vecs; + }; + + var buf: std.ArrayList(u8) = undefined; + var any_needs_escape = false; + + if (comptime Environment.isAarch64 or Environment.isX64) { + + // pass #1: scan for any characters that need escaping + // assume most strings won't need any escaping, so don't actually allocate the buffer + scan_and_allocate_lazily: while (remaining.len >= ascii_vector_size) { + if (comptime Environment.allow_assert) { + std.debug.assert(!any_needs_escape); + } + + const vec: AsciiVector = remaining[0..ascii_vector_size].*; + if (@reduce( + .Or, + @bitCast(AsciiVectorU1, (vec == vecs[0])) | + @bitCast(AsciiVectorU1, (vec == vecs[1])) | + @bitCast(AsciiVectorU1, (vec == vecs[2])) | + @bitCast(AsciiVectorU1, (vec == vecs[3])) | + @bitCast(AsciiVectorU1, (vec == vecs[4])), + ) == 1) { + buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); + const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); + @memcpy(buf.items.ptr, latin1.ptr, copy_len); + buf.items.len = copy_len; + any_needs_escape = true; + comptime var i: usize = 0; + inline while (i < ascii_vector_size) : (i += 1) { + switch (vec[i]) { + '"' => { + buf.appendSlice(""") catch unreachable; + }, + '&' => { + buf.appendSlice("&") catch unreachable; + }, + '\'' => { + buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be ''' + }, + '<' => { + buf.appendSlice("<") catch unreachable; + }, + '>' => { + buf.appendSlice(">") catch unreachable; + }, + else => |c| { + buf.appendAssumeCapacity(c); + }, + } + } + remaining = remaining[ascii_vector_size..]; + break :scan_and_allocate_lazily; + } + + remaining = remaining[ascii_vector_size..]; + } + + if (any_needs_escape) { + // pass #2: we found something that needed an escape + // so we'll go ahead and copy the buffer into a new buffer + while (remaining.len >= ascii_vector_size) { + const vec: AsciiVector = remaining[0..ascii_vector_size].*; + if (@reduce( + .Or, + @bitCast(AsciiVectorU1, (vec == vecs[0])) | + @bitCast(AsciiVectorU1, (vec == vecs[1])) | + @bitCast(AsciiVectorU1, (vec == vecs[2])) | + @bitCast(AsciiVectorU1, (vec == vecs[3])) | + @bitCast(AsciiVectorU1, (vec == vecs[4])), + ) == 1) { + buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable; + comptime var i: usize = 0; + inline while (i < ascii_vector_size) : (i += 1) { + switch (vec[i]) { + '"' => { + buf.appendSlice(""") catch unreachable; + }, + '&' => { + buf.appendSlice("&") catch unreachable; + }, + '\'' => { + buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be ''' + }, + '<' => { + buf.appendSlice("<") catch unreachable; + }, + '>' => { + buf.appendSlice(">") catch unreachable; + }, + else => |c| { + buf.append(c) catch unreachable; + }, + } + } + + remaining = remaining[ascii_vector_size..]; + continue; + } + + try buf.ensureUnusedCapacity(ascii_vector_size); + buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*; + buf.items.len += ascii_vector_size; + remaining = remaining[ascii_vector_size..]; + } + } + } + + if (!any_needs_escape) { + scan_and_allocate_lazily: while (remaining.len > 0) { + switch (remaining[0]) { + '"' => { + const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); + buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); + @memcpy(buf.items.ptr, latin1.ptr, copy_len); + buf.items.len = copy_len; + buf.appendSlice(""") catch unreachable; + remaining = remaining[1..]; + any_needs_escape = true; + break :scan_and_allocate_lazily; + }, + '&' => { + const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); + buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); + @memcpy(buf.items.ptr, latin1.ptr, copy_len); + buf.items.len = copy_len; + buf.appendSlice("&") catch unreachable; + remaining = remaining[1..]; + any_needs_escape = true; + break :scan_and_allocate_lazily; + }, + '\'' => { + const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); + buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); + @memcpy(buf.items.ptr, latin1.ptr, copy_len); + buf.items.len = copy_len; + buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be ''' + remaining = remaining[1..]; + any_needs_escape = true; + break :scan_and_allocate_lazily; + }, + '<' => { + const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); + buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); + @memcpy(buf.items.ptr, latin1.ptr, copy_len); + buf.items.len = copy_len; + buf.appendSlice("<") catch unreachable; + remaining = remaining[1..]; + any_needs_escape = true; + break :scan_and_allocate_lazily; + }, + '>' => { + const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); + buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); + @memcpy(buf.items.ptr, latin1.ptr, copy_len); + buf.items.len = copy_len; + buf.appendSlice(">") catch unreachable; + remaining = remaining[1..]; + any_needs_escape = true; + break :scan_and_allocate_lazily; + }, + else => { + remaining = remaining[1..]; + }, + } + } + } + + if (remaining.len > 0) { + std.debug.assert(any_needs_escape); + for (remaining) |c| { + switch (c) { + '"' => { + buf.appendSlice(""") catch unreachable; + }, + '&' => { + buf.appendSlice("&") catch unreachable; + }, + '\'' => { + buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be ''' + }, + '<' => { + buf.appendSlice("<") catch unreachable; + }, + '>' => { + buf.appendSlice(">") catch unreachable; + }, + else => { + buf.append(c) catch unreachable; + }, + } + } + } + + if (any_needs_escape) { + return buf.toOwnedSlice(); + } else { + return latin1; + } + }, + } +} + test "copyLatin1IntoUTF8" { var input: string = "hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!"; var output = std.mem.zeroes([500]u8); |