diff options
Diffstat (limited to 'src/string_immutable.zig')
-rw-r--r-- | src/string_immutable.zig | 224 |
1 files changed, 224 insertions, 0 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 094d63f91..367e6300d 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -1316,6 +1316,230 @@ pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize { return count; } +pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) ![]const u8 { + switch (latin1.len) { + 0 => return "", + 1 => return switch (latin1[0]) { + '"' => """, + '&' => "&", + '\'' => "'", + '<' => "<", + '>' => ">", + else => latin1, + }, + else => { + var remaining = latin1; + + const vec_chars = "\"&'<>"; + const vecs: [vec_chars.len]AsciiVector = comptime brk: { + var _vecs: [vec_chars.len]AsciiVector = undefined; + for (vec_chars) |c, i| { + _vecs[i] = @splat(ascii_vector_size, c); + } + break :brk _vecs; + }; + + var buf: std.ArrayList(u8) = undefined; + var any_needs_escape = false; + + if (comptime Environment.isAarch64 or Environment.isX64) { + + // pass #1: scan for any characters that need escaping + // assume most strings won't need any escaping, so don't actually allocate the buffer + scan_and_allocate_lazily: while (remaining.len >= ascii_vector_size) { + if (comptime Environment.allow_assert) { + std.debug.assert(!any_needs_escape); + } + + const vec: AsciiVector = remaining[0..ascii_vector_size].*; + if (@reduce( + .Or, + @bitCast(AsciiVectorU1, (vec == vecs[0])) | + @bitCast(AsciiVectorU1, (vec == vecs[1])) | + @bitCast(AsciiVectorU1, (vec == vecs[2])) | + @bitCast(AsciiVectorU1, (vec == vecs[3])) | + @bitCast(AsciiVectorU1, (vec == vecs[4])), + ) == 1) { + buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); + const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); + @memcpy(buf.items.ptr, latin1.ptr, copy_len); + buf.items.len = copy_len; + any_needs_escape = true; + comptime var i: usize = 0; + inline while (i < ascii_vector_size) : (i += 1) { + switch (vec[i]) { + '"' => { + buf.appendSlice(""") catch unreachable; + }, + '&' => { + buf.appendSlice("&") catch unreachable; + }, + '\'' => { + buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be ''' + }, + '<' => { + buf.appendSlice("<") catch unreachable; + }, + '>' => { + buf.appendSlice(">") catch unreachable; + }, + else => |c| { + buf.appendAssumeCapacity(c); + }, + } + } + remaining = remaining[ascii_vector_size..]; + break :scan_and_allocate_lazily; + } + + remaining = remaining[ascii_vector_size..]; + } + + if (any_needs_escape) { + // pass #2: we found something that needed an escape + // so we'll go ahead and copy the buffer into a new buffer + while (remaining.len >= ascii_vector_size) { + const vec: AsciiVector = remaining[0..ascii_vector_size].*; + if (@reduce( + .Or, + @bitCast(AsciiVectorU1, (vec == vecs[0])) | + @bitCast(AsciiVectorU1, (vec == vecs[1])) | + @bitCast(AsciiVectorU1, (vec == vecs[2])) | + @bitCast(AsciiVectorU1, (vec == vecs[3])) | + @bitCast(AsciiVectorU1, (vec == vecs[4])), + ) == 1) { + buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable; + comptime var i: usize = 0; + inline while (i < ascii_vector_size) : (i += 1) { + switch (vec[i]) { + '"' => { + buf.appendSlice(""") catch unreachable; + }, + '&' => { + buf.appendSlice("&") catch unreachable; + }, + '\'' => { + buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be ''' + }, + '<' => { + buf.appendSlice("<") catch unreachable; + }, + '>' => { + buf.appendSlice(">") catch unreachable; + }, + else => |c| { + buf.append(c) catch unreachable; + }, + } + } + + remaining = remaining[ascii_vector_size..]; + continue; + } + + try buf.ensureUnusedCapacity(ascii_vector_size); + buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*; + buf.items.len += ascii_vector_size; + remaining = remaining[ascii_vector_size..]; + } + } + } + + if (!any_needs_escape) { + scan_and_allocate_lazily: while (remaining.len > 0) { + switch (remaining[0]) { + '"' => { + const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); + buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); + @memcpy(buf.items.ptr, latin1.ptr, copy_len); + buf.items.len = copy_len; + buf.appendSlice(""") catch unreachable; + remaining = remaining[1..]; + any_needs_escape = true; + break :scan_and_allocate_lazily; + }, + '&' => { + const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); + buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); + @memcpy(buf.items.ptr, latin1.ptr, copy_len); + buf.items.len = copy_len; + buf.appendSlice("&") catch unreachable; + remaining = remaining[1..]; + any_needs_escape = true; + break :scan_and_allocate_lazily; + }, + '\'' => { + const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); + buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); + @memcpy(buf.items.ptr, latin1.ptr, copy_len); + buf.items.len = copy_len; + buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be ''' + remaining = remaining[1..]; + any_needs_escape = true; + break :scan_and_allocate_lazily; + }, + '<' => { + const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); + buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); + @memcpy(buf.items.ptr, latin1.ptr, copy_len); + buf.items.len = copy_len; + buf.appendSlice("<") catch unreachable; + remaining = remaining[1..]; + any_needs_escape = true; + break :scan_and_allocate_lazily; + }, + '>' => { + const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); + buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); + @memcpy(buf.items.ptr, latin1.ptr, copy_len); + buf.items.len = copy_len; + buf.appendSlice(">") catch unreachable; + remaining = remaining[1..]; + any_needs_escape = true; + break :scan_and_allocate_lazily; + }, + else => { + remaining = remaining[1..]; + }, + } + } + } + + if (remaining.len > 0) { + std.debug.assert(any_needs_escape); + for (remaining) |c| { + switch (c) { + '"' => { + buf.appendSlice(""") catch unreachable; + }, + '&' => { + buf.appendSlice("&") catch unreachable; + }, + '\'' => { + buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be ''' + }, + '<' => { + buf.appendSlice("<") catch unreachable; + }, + '>' => { + buf.appendSlice(">") catch unreachable; + }, + else => { + buf.append(c) catch unreachable; + }, + } + } + } + + if (any_needs_escape) { + return buf.toOwnedSlice(); + } else { + return latin1; + } + }, + } +} + test "copyLatin1IntoUTF8" { var input: string = "hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!hello world!"; var output = std.mem.zeroes([500]u8); |