aboutsummaryrefslogtreecommitdiff
path: root/src/string_immutable.zig
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2022-06-04 20:01:33 -0700
committerGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2022-06-04 20:01:33 -0700
commit5aa196b361f58b4ba70d21464b4f0995164e269c (patch)
treef282f32595c5d5dac5c7c9ce57367cac66a1140e /src/string_immutable.zig
parent9f640ffb51dc216e78af6ea5fa0eb8bc782e446b (diff)
downloadbun-5aa196b361f58b4ba70d21464b4f0995164e269c.tar.gz
bun-5aa196b361f58b4ba70d21464b4f0995164e269c.tar.zst
bun-5aa196b361f58b4ba70d21464b4f0995164e269c.zip
take two
Diffstat (limited to 'src/string_immutable.zig')
-rw-r--r--src/string_immutable.zig237
1 files changed, 168 insertions, 69 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 367e6300d..849691ca2 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1207,8 +1207,7 @@ pub fn copyLatin1IntoUTF8(buf_: []u8, comptime Type: type, latin1_: Type) Encode
break;
}
- buf[0..8].* = @bitCast([ascii_vector_size]u8, vec)[0..8].*;
- buf[8..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[8..ascii_vector_size].*;
+ buf[0..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[0..ascii_vector_size].*;
latin1 = latin1[ascii_vector_size..];
buf = buf[ascii_vector_size..];
}
@@ -1317,6 +1316,74 @@ pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize {
}
pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) ![]const u8 {
+ const Pusher = struct {
+ const lengths: [std.math.maxInt(u8)]u4 = brk: {
+ var values: [std.math.maxInt(u8)]u4 = undefined;
+ for (values) |_, i| {
+ switch (i) {
+ '"' => {
+ values[i] = "&quot;".len;
+ },
+ '&' => {
+ values[i] = "&amp;".len;
+ },
+ '\'' => {
+ values[i] = "&#x27;".len;
+ },
+ '<' => {
+ values[i] = "&lt;".len;
+ },
+ '>' => {
+ values[i] = "&gt;".len;
+ },
+ else => {
+ values[i] = 1;
+ },
+ }
+ }
+
+ break :brk values;
+ };
+
+ inline fn appendString(buf: [*]u8, comptime str: []const u8) usize {
+ buf[0..str.len].* = str[0..str.len].*;
+ return str.len;
+ }
+
+ pub inline fn append(buf: [*]u8, char: u8) usize {
+ if (lengths[char] == 1) {
+ buf[0] = char;
+ return 1;
+ }
+
+ return switch (char) {
+ '"' => appendString(buf, "&quot;"),
+ '&' => appendString(buf, "&amp;"),
+ '\'' => appendString(buf, "&#x27;"),
+ '<' => appendString(buf, "&lt;"),
+ '>' => appendString(buf, "&gt;"),
+ else => unreachable,
+ };
+ }
+ pub inline fn push(comptime c: anytype, chars: []const u8, allo: std.mem.Allocator) []const u8 {
+ var total: usize = 0;
+ inline for (comptime bun.range(0, c)) |i| {
+ total += @as(usize, lengths[chars[i]]);
+ }
+
+ if (total == c) {
+ return chars;
+ }
+
+ var output = allo.alloc(u8, total) catch unreachable;
+ var head = output.ptr;
+ inline for (comptime bun.range(0, c)) |i| {
+ head += @This().append(head, chars[i]);
+ }
+
+ return output;
+ }
+ };
switch (latin1.len) {
0 => return "",
1 => return switch (latin1[0]) {
@@ -1327,6 +1394,43 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8
'>' => "&gt;",
else => latin1,
},
+ 2 => {
+ const first: []const u8 = switch (latin1[0]) {
+ '"' => "&quot;",
+ '&' => "&amp;",
+ '\'' => "&#x27;",
+ '<' => "&lt;",
+ '>' => "&gt;",
+ else => latin1[0..1],
+ };
+ const second: []const u8 = switch (latin1[1]) {
+ '"' => "&quot;",
+ '&' => "&amp;",
+ '\'' => "&#x27;",
+ '<' => "&lt;",
+ '>' => "&gt;",
+ else => latin1[1..2],
+ };
+ if (first.len == 1 and second.len == 1) {
+ return latin1;
+ }
+
+ return strings.append(allocator, first, second);
+ },
+ 3 => return Pusher.push(3, latin1, allocator),
+ 4 => return Pusher.push(4, latin1, allocator),
+ 5 => return Pusher.push(5, latin1, allocator),
+ 6 => return Pusher.push(6, latin1, allocator),
+ 7 => return Pusher.push(7, latin1, allocator),
+ 8 => return Pusher.push(8, latin1, allocator),
+ 9 => return Pusher.push(9, latin1, allocator),
+ 10 => return Pusher.push(10, latin1, allocator),
+ 11 => return Pusher.push(11, latin1, allocator),
+ 12 => return Pusher.push(12, latin1, allocator),
+ 13 => return Pusher.push(13, latin1, allocator),
+ 14 => return Pusher.push(14, latin1, allocator),
+ 15 => return Pusher.push(15, latin1, allocator),
+
else => {
var remaining = latin1;
@@ -1339,34 +1443,72 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8
break :brk _vecs;
};
- var buf: std.ArrayList(u8) = undefined;
var any_needs_escape = false;
+ var buf: std.ArrayList(u8) = undefined;
if (comptime Environment.isAarch64 or Environment.isX64) {
-
// pass #1: scan for any characters that need escaping
// assume most strings won't need any escaping, so don't actually allocate the buffer
scan_and_allocate_lazily: while (remaining.len >= ascii_vector_size) {
if (comptime Environment.allow_assert) {
std.debug.assert(!any_needs_escape);
}
-
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
- if (@reduce(
- .Or,
- @bitCast(AsciiVectorU1, (vec == vecs[0])) |
- @bitCast(AsciiVectorU1, (vec == vecs[1])) |
- @bitCast(AsciiVectorU1, (vec == vecs[2])) |
- @bitCast(AsciiVectorU1, (vec == vecs[3])) |
- @bitCast(AsciiVectorU1, (vec == vecs[4])),
- ) == 1) {
+ if (@reduce(.Min, (vec ^ vecs[0]) &
+ (vec ^ vecs[1]) &
+ (vec ^ vecs[2]) &
+ (vec ^ vecs[3]) &
+ (vec ^ vecs[4])) == 0)
+ {
buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
@memcpy(buf.items.ptr, latin1.ptr, copy_len);
buf.items.len = copy_len;
any_needs_escape = true;
- comptime var i: usize = 0;
- inline while (i < ascii_vector_size) : (i += 1) {
+ var i: usize = 0;
+ while (i < ascii_vector_size) : (i += 1) {
+ switch (vec[i]) {
+ '"', '&', '\'', '<', '>' => |c| {
+ const result = switch (c) {
+ '"' => "&quot;",
+ '&' => "&amp;",
+ '\'' => "&#x27;",
+ '<' => "&lt;",
+ '>' => "&gt;",
+ else => unreachable,
+ };
+
+ buf.appendSlice(result) catch unreachable;
+ remaining = remaining[1..];
+ },
+ else => |c| {
+ buf.append(c) catch unreachable;
+ remaining = remaining[1..];
+ },
+ }
+ }
+
+ break :scan_and_allocate_lazily;
+ }
+
+ remaining = remaining[ascii_vector_size..];
+ }
+ }
+
+ if (any_needs_escape) {
+ // pass #2: we found something that needed an escape
+ // so we'll go ahead and copy the buffer into a new buffer
+ while (remaining.len >= ascii_vector_size) {
+ const vec: AsciiVector = remaining[0..ascii_vector_size].*;
+ if (@reduce(.Min, (vec ^ vecs[0]) &
+ (vec ^ vecs[1]) &
+ (vec ^ vecs[2]) &
+ (vec ^ vecs[3]) &
+ (vec ^ vecs[4])) == 0)
+ {
+ buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable;
+ var i: usize = 0;
+ while (i < ascii_vector_size) : (i += 1) {
switch (vec[i]) {
'"' => {
buf.appendSlice("&quot;") catch unreachable;
@@ -1384,65 +1526,20 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8
buf.appendSlice("&gt;") catch unreachable;
},
else => |c| {
- buf.appendAssumeCapacity(c);
+ buf.append(c) catch unreachable;
},
}
}
+
remaining = remaining[ascii_vector_size..];
- break :scan_and_allocate_lazily;
+ continue;
}
+ try buf.ensureUnusedCapacity(ascii_vector_size);
+ buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*;
+ buf.items.len += ascii_vector_size;
remaining = remaining[ascii_vector_size..];
}
-
- if (any_needs_escape) {
- // pass #2: we found something that needed an escape
- // so we'll go ahead and copy the buffer into a new buffer
- while (remaining.len >= ascii_vector_size) {
- const vec: AsciiVector = remaining[0..ascii_vector_size].*;
- if (@reduce(
- .Or,
- @bitCast(AsciiVectorU1, (vec == vecs[0])) |
- @bitCast(AsciiVectorU1, (vec == vecs[1])) |
- @bitCast(AsciiVectorU1, (vec == vecs[2])) |
- @bitCast(AsciiVectorU1, (vec == vecs[3])) |
- @bitCast(AsciiVectorU1, (vec == vecs[4])),
- ) == 1) {
- buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable;
- comptime var i: usize = 0;
- inline while (i < ascii_vector_size) : (i += 1) {
- switch (vec[i]) {
- '"' => {
- buf.appendSlice("&quot;") catch unreachable;
- },
- '&' => {
- buf.appendSlice("&amp;") catch unreachable;
- },
- '\'' => {
- buf.appendSlice("&#x27;") catch unreachable; // modified from escape-html; used to be '&#39'
- },
- '<' => {
- buf.appendSlice("&lt;") catch unreachable;
- },
- '>' => {
- buf.appendSlice("&gt;") catch unreachable;
- },
- else => |c| {
- buf.append(c) catch unreachable;
- },
- }
- }
-
- remaining = remaining[ascii_vector_size..];
- continue;
- }
-
- try buf.ensureUnusedCapacity(ascii_vector_size);
- buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*;
- buf.items.len += ascii_vector_size;
- remaining = remaining[ascii_vector_size..];
- }
- }
}
if (!any_needs_escape) {
@@ -1531,11 +1628,11 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8
}
}
- if (any_needs_escape) {
- return buf.toOwnedSlice();
- } else {
+ if (!any_needs_escape) {
return latin1;
}
+
+ return buf.toOwnedSlice();
},
}
}
@@ -1811,7 +1908,9 @@ pub const min_16_ascii = @splat(ascii_vector_size, @as(u8, 0x20));
pub const max_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 127));
pub const min_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 0x20));
pub const AsciiVector = std.meta.Vector(ascii_vector_size, u8);
+pub const AsciiVectorSmall = std.meta.Vector(8, u8);
pub const AsciiVectorU1 = std.meta.Vector(ascii_vector_size, u1);
+pub const AsciiVectorU1Small = std.meta.Vector(8, u1);
pub const AsciiVectorU16U1 = std.meta.Vector(ascii_u16_vector_size, u1);
pub const AsciiU16Vector = std.meta.Vector(ascii_u16_vector_size, u16);
pub const max_4_ascii = @splat(4, @as(u8, 127));