aboutsummaryrefslogtreecommitdiff
path: root/src/string_immutable.zig
diff options
context:
space:
mode:
Diffstat (limited to 'src/string_immutable.zig')
-rw-r--r--src/string_immutable.zig224
1 files changed, 224 insertions, 0 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 094d63f91..367e6300d 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1316,6 +1316,230 @@ pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize {
return count;
}
+pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) ![]const u8 {
+ switch (latin1.len) {
+ 0 => return "",
+ 1 => return switch (latin1[0]) {
+ '"' => """,
+ '&' => "&",
+ '\'' => "'",
+ '<' => "&lt;",
+ '>' => "&gt;",
+ else => latin1,
+ },
+ else => {
+ var remaining = latin1;
+
+ const vec_chars = "\"&'<>";
+ const vecs: [vec_chars.len]AsciiVector = comptime brk: {
+ var _vecs: [vec_chars.len]AsciiVector = undefined;
+ for (vec_chars) |c, i| {
+ _vecs[i] = @splat(ascii_vector_size, c);
+ }
+ break :brk _vecs;
+ };
+
+ var buf: std.ArrayList(u8) = undefined;
+ var any_needs_escape = false;
+
+ if (comptime Environment.isAarch64 or Environment.isX64) {
+
+ // pass #1: scan for any characters that need escaping
+ // assume most strings won't need any escaping, so don't actually allocate the buffer
+ scan_and_allocate_lazily: while (remaining.len >= ascii_vector_size) {
+ if (comptime Environment.allow_assert) {
+ std.debug.assert(!any_needs_escape);
+ }
+
+ const vec: AsciiVector = remaining[0..ascii_vector_size].*;
+ if (@reduce(
+ .Or,
+ @bitCast(AsciiVectorU1, (vec == vecs[0])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[1])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[2])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[3])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[4])),
+ ) == 1) {
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+ const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ buf.items.len = copy_len;
+ any_needs_escape = true;
+ comptime var i: usize = 0;
+ inline while (i < ascii_vector_size) : (i += 1) {
+ switch (vec[i]) {
+ '"' => {
+ buf.appendSlice("&quot;") catch unreachable;
+ },
+ '&' => {
+ buf.appendSlice("&amp;") catch unreachable;
+ },
+ '\'' => {
+ buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
+ },
+ '<' => {
+ buf.appendSlice("&lt;") catch unreachable;
+ },
+ '>' => {
+ buf.appendSlice("&gt;") catch unreachable;
+ },
+ else => |c| {
+ buf.appendAssumeCapacity(c);
+ },
+ }
+ }
+ remaining = remaining[ascii_vector_size..];
+ break :scan_and_allocate_lazily;
+ }
+
+ remaining = remaining[ascii_vector_size..];
+ }
+
+ if (any_needs_escape) {
+ // pass #2: we found something that needed an escape
+ // so we'll go ahead and copy the buffer into a new buffer
+ while (remaining.len >= ascii_vector_size) {
+ const vec: AsciiVector = remaining[0..ascii_vector_size].*;
+ if (@reduce(
+ .Or,
+ @bitCast(AsciiVectorU1, (vec == vecs[0])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[1])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[2])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[3])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[4])),
+ ) == 1) {
+ buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable;
+ comptime var i: usize = 0;
+ inline while (i < ascii_vector_size) : (i += 1) {
+ switch (vec[i]) {
+ '"' => {
+ buf.appendSlice("&quot;") catch unreachable;
+ },
+ '&' => {
+ buf.appendSlice("&amp;") catch unreachable;
+ },
+ '\'' => {
+ buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
+ },
+ '<' => {
+ buf.appendSlice("&lt;") catch unreachable;
+ },
+ '>' => {
+ buf.appendSlice("&gt;") catch unreachable;
+ },
+ else => |c| {
+ buf.append(c) catch unreachable;
+ },
+ }
+ }
+
+ remaining = remaining[ascii_vector_size..];
+ continue;
+ }
+
+ try buf.ensureUnusedCapacity(ascii_vector_size);
+ buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*;
+ buf.items.len += ascii_vector_size;
+ remaining = remaining[ascii_vector_size..];
+ }
+ }
+ }
+
+ if (!any_needs_escape) {
+ scan_and_allocate_lazily: while (remaining.len > 0) {
+ switch (remaining[0]) {
+ '"' => {
+ const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ buf.items.len = copy_len;
+ buf.appendSlice("&quot;") catch unreachable;
+ remaining = remaining[1..];
+ any_needs_escape = true;
+ break :scan_and_allocate_lazily;
+ },
+ '&' => {
+ const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ buf.items.len = copy_len;
+ buf.appendSlice("&amp;") catch unreachable;
+ remaining = remaining[1..];
+ any_needs_escape = true;
+ break :scan_and_allocate_lazily;
+ },
+ '\'' => {
+ const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ buf.items.len = copy_len;
+ buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
+ remaining = remaining[1..];
+ any_needs_escape = true;
+ break :scan_and_allocate_lazily;
+ },
+ '<' => {
+ const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ buf.items.len = copy_len;
+ buf.appendSlice("&lt;") catch unreachable;
+ remaining = remaining[1..];
+ any_needs_escape = true;
+ break :scan_and_allocate_lazily;
+ },
+ '>' => {
+ const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ buf.items.len = copy_len;
+ buf.appendSlice("&gt;") catch unreachable;
+ remaining = remaining[1..];
+ any_needs_escape = true;
+ break :scan_and_allocate_lazily;
+ },
+ else => {
+ remaining = remaining[1..];
+ },
+ }
+ }
+ }
+
+ if (remaining.len > 0) {
+ std.debug.assert(any_needs_escape);
+ for (remaining) |c| {
+ switch (c) {
+ '"' => {
+ buf.appendSlice("&quot;") catch unreachable;
+ },
+ '&' => {
+ buf.appendSlice("&amp;") catch unreachable;
+ },
+ '\'' => {
+ buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
+ },
+ '<' => {
+ buf.appendSlice("&lt;") catch unreachable;
+ },
+ '>' => {
+ buf.appendSlice("&gt;") catch unreachable;
+ },
+ else => {
+ buf.append(c) catch unreachable;
+ },
+ }
+ }
+ }
+
+ if (any_needs_escape) {
+ return buf.toOwnedSlice();
+ } else {
+ return latin1;
+ }
+ },
+ }
+}
+
test "copyLatin1IntoUTF8" {
var input: string = "hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!";
var output = std.mem.zeroes([500]u8);