diff options
author | 2022-06-04 20:01:33 -0700 | |
---|---|---|
committer | 2022-06-04 20:01:33 -0700 | |
commit | 5aa196b361f58b4ba70d21464b4f0995164e269c (patch) | |
tree | f282f32595c5d5dac5c7c9ce57367cac66a1140e | |
parent | 9f640ffb51dc216e78af6ea5fa0eb8bc782e446b (diff) | |
download | bun-5aa196b361f58b4ba70d21464b4f0995164e269c.tar.gz bun-5aa196b361f58b4ba70d21464b4f0995164e269c.tar.zst bun-5aa196b361f58b4ba70d21464b4f0995164e269c.zip |
take two
-rw-r--r-- | bench/snippets/escapeHTML.js | 78 | ||||
-rw-r--r-- | integration/bunjs-only-snippets/escapeHTML.test.js | 44 | ||||
-rw-r--r-- | src/global.zig | 12 | ||||
-rw-r--r-- | src/javascript/jsc/api/bun.zig | 42 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/ZigGlobalObject.cpp | 125 | ||||
-rw-r--r-- | src/string_immutable.zig | 237 |
6 files changed, 401 insertions, 137 deletions
diff --git a/bench/snippets/escapeHTML.js b/bench/snippets/escapeHTML.js index 61bb74c7d..63e68861d 100644 --- a/bench/snippets/escapeHTML.js +++ b/bench/snippets/escapeHTML.js @@ -1,7 +1,27 @@ import { group } from "mitata"; import { bench, run } from "mitata"; -var bunEscapeHTML = Bun.escapeHTML; +var bunEscapeHTML_ = globalThis.escapeHTML || Bun.escapeHTML; +var bunEscapeHTML = function (str) { + if (str.length === 1) { + switch (str.charCodeAt(0)) { + case 34: // " + return """; + case 38: // & + return "&"; + case 39: // ' + return "'"; // modified from escape-html; used to be ''' + case 60: // < + return "<"; + case 62: // > + return ">"; + default: + return str; + } + } + + return bunEscapeHTML_(str); +}; const matchHtmlRegExp = /["'&<>]/; @@ -13,6 +33,17 @@ const matchHtmlRegExp = /["'&<>]/; * @public */ +const FIXTURE = require("fs") + .readFileSync(import.meta.dir + "/_fixture.txt", "utf8") + .split("") + .map((a) => { + if (a.charCodeAt(0) > 127) { + return "a"; + } + return a; + }) + .join(""); + function reactEscapeHtml(string) { const str = "" + string; const match = matchHtmlRegExp.exec(str); @@ -58,29 +89,28 @@ function reactEscapeHtml(string) { return lastIndex !== index ? html + str.substring(lastIndex, index) : html; } -const long = ("lalala" + "<script>alert(1)</script>" + "lalala").repeat(9000); -const short = "lalala" + "<script>alert(1)</script>" + "lalala"; -const middle = - "lalala".repeat(2000) + "<script>alert(1)</script>" + "lalala".repeat(2000); -const nothing = "lalala".repeat(9999); -group(`long (${long.length})`, () => { - bench("react's escapeHTML", () => reactEscapeHtml(long)); - bench("bun's escapeHTML", () => bunEscapeHTML(long)); -}); - -group(`short (${short.length})`, () => { - bench("react's escapeHTML", () => reactEscapeHtml(short)); - bench("bun's escapeHTML", () => bunEscapeHTML(short)); -}); +for (let input of [ + // " ", + // "<script>alert('xss')</script>", + // "hello world", + // "hello world<script>alert('xss')</script>", + // "<", + // ">", + // `short value`, + `nothing to escape `.repeat(99999), + FIXTURE, +]) { + group( + { + summary: true, + name: `"` + input.substring(0, Math.min(input.length, 32)) + `"`, + }, + () => { + bench(`react's escapeHTML`, () => reactEscapeHtml(input)); -group(`middle (${middle.length})`, () => { - bench("react's escapeHTML", () => reactEscapeHtml(middle)); - bench("bun's escapeHTML", () => bunEscapeHTML(middle)); -}); - -group(`nothing (${nothing.length})`, () => { - bench("react's escapeHTML", () => reactEscapeHtml(nothing)); - bench("bun's escapeHTML", () => bunEscapeHTML(nothing)); -}); + bench(`bun's escapeHTML`, () => bunEscapeHTML(input)); + } + ); +} await run(); diff --git a/integration/bunjs-only-snippets/escapeHTML.test.js b/integration/bunjs-only-snippets/escapeHTML.test.js index ca0ff5a36..13ff138c9 100644 --- a/integration/bunjs-only-snippets/escapeHTML.test.js +++ b/integration/bunjs-only-snippets/escapeHTML.test.js @@ -1,23 +1,23 @@ import { describe, it, expect } from "bun:test"; import { gcTick } from "./gc"; -describe("Bun.escapeHTML", () => { +describe("escapeHTML", () => { it("works", () => { - expect(Bun.escapeHTML("<script>alert(1)</script>")).toBe( + expect(escapeHTML("<script>alert(1)</script>")).toBe( "<script>alert(1)</script>" ); - expect(Bun.escapeHTML("<")).toBe("<"); - expect(Bun.escapeHTML(">")).toBe(">"); - expect(Bun.escapeHTML("&")).toBe("&"); - expect(Bun.escapeHTML("'")).toBe("'"); - expect(Bun.escapeHTML('"')).toBe("""); - expect(Bun.escapeHTML("\n")).toBe("\n"); - expect(Bun.escapeHTML("\r")).toBe("\r"); - expect(Bun.escapeHTML("\t")).toBe("\t"); - expect(Bun.escapeHTML("\f")).toBe("\f"); - expect(Bun.escapeHTML("\v")).toBe("\v"); - expect(Bun.escapeHTML("\b")).toBe("\b"); - expect(Bun.escapeHTML("\u00A0")).toBe("\u00A0"); + expect(escapeHTML("<")).toBe("<"); + expect(escapeHTML(">")).toBe(">"); + expect(escapeHTML("&")).toBe("&"); + expect(escapeHTML("'")).toBe("'"); + expect(escapeHTML('"')).toBe("""); + expect(escapeHTML("\n")).toBe("\n"); + expect(escapeHTML("\r")).toBe("\r"); + expect(escapeHTML("\t")).toBe("\t"); + expect(escapeHTML("\f")).toBe("\f"); + expect(escapeHTML("\v")).toBe("\v"); + expect(escapeHTML("\b")).toBe("\b"); + expect(escapeHTML("\u00A0")).toBe("\u00A0"); // The matrix of cases we need to test for: // 1. Works with short strings @@ -28,27 +28,27 @@ describe("Bun.escapeHTML", () => { // 6. Works when the text to escape is in the beginning // 7. Works when the text to escape is in the end // 8. Returns the same string when there's no need to escape - expect( - Bun.escapeHTML("lalala" + "<script>alert(1)</script>" + "lalala") - ).toBe("lalala<script>alert(1)</script>lalala"); + expect(escapeHTML("lalala" + "<script>alert(1)</script>" + "lalala")).toBe( + "lalala<script>alert(1)</script>lalala" + ); - expect(Bun.escapeHTML("<script>alert(1)</script>" + "lalala")).toBe( + expect(escapeHTML("<script>alert(1)</script>" + "lalala")).toBe( "<script>alert(1)</script>lalala" ); - expect(Bun.escapeHTML("lalala" + "<script>alert(1)</script>")).toBe( + expect(escapeHTML("lalala" + "<script>alert(1)</script>")).toBe( "lalala" + "<script>alert(1)</script>" ); expect( - Bun.escapeHTML( + escapeHTML( ("lalala" + "<script>alert(1)</script>" + "lalala").repeat(900) ) ).toBe("lalala<script>alert(1)</script>lalala".repeat(900)); expect( - Bun.escapeHTML(("<script>alert(1)</script>" + "lalala").repeat(900)) + escapeHTML(("<script>alert(1)</script>" + "lalala").repeat(900)) ).toBe("<script>alert(1)</script>lalala".repeat(900)); expect( - Bun.escapeHTML(("lalala" + "<script>alert(1)</script>").repeat(900)) + escapeHTML(("lalala" + "<script>alert(1)</script>").repeat(900)) ).toBe(("lalala" + "<script>alert(1)</script>").repeat(900)); }); }); diff --git a/src/global.zig b/src/global.zig index 6378bb2ba..6160ce781 100644 --- a/src/global.zig +++ b/src/global.zig @@ -189,3 +189,15 @@ pub fn DebugOnlyDefault(comptime val: anytype) if (Environment.isDebug) @TypeOf( return {}; } + +pub inline fn range(comptime min: anytype, comptime max: anytype) [max - min]usize { + return comptime brk: { + var slice: [max - min]usize = undefined; + var i: usize = min; + while (i < max) { + slice[i - min] = i; + i += 1; + } + break :brk slice; + }; +} diff --git a/src/javascript/jsc/api/bun.zig b/src/javascript/jsc/api/bun.zig index 1ee9cb96d..8228139de 100644 --- a/src/javascript/jsc/api/bun.zig +++ b/src/javascript/jsc/api/bun.zig @@ -1150,9 +1150,6 @@ pub const Class = NewClass( .inflateSync = .{ .rfn = JSC.wrapWithHasContainer(JSZlib, "inflateSync", false, false, true), }, - .escapeHTML = .{ - .rfn = Bun.escapeHTML, - }, }, .{ .main = .{ @@ -1615,39 +1612,42 @@ pub fn serve( unreachable; } -pub fn escapeHTML( - _: void, - ctx: js.JSContextRef, - _: js.JSObjectRef, - _: js.JSObjectRef, - arguments: []const js.JSValueRef, - exception: js.ExceptionRef, -) js.JSValueRef { +pub export fn Bun__escapeHTML( + globalObject: *JSGlobalObject, + callframe: *JSC.CallFrame, +) JSC.JSValue { + const arguments = callframe.arguments(); if (arguments.len < 1) { - return ZigString.init("").toValue(ctx).asObjectRef(); + return ZigString.Empty.toValue(globalObject); } - const input_value = arguments[0].?.value(); - const zig_str = input_value.getZigString(ctx); + const input_value = arguments[0]; + const zig_str = input_value.getZigString(globalObject); if (zig_str.is16Bit()) { - return input_value.asObjectRef(); + return input_value; } else { var input_slice = zig_str.slice(); - var escaped_html = strings.escapeHTMLForLatin1Input(ctx.bunVM().allocator, input_slice) catch { - JSC.JSError(undefined, "Out of memory", .{}, ctx, exception); - return null; + var escaped_html = strings.escapeHTMLForLatin1Input(globalObject.bunVM().allocator, input_slice) catch { + globalObject.vm().throwError(globalObject, ZigString.init("Out of memory").toValue(globalObject)); + return JSC.JSValue.jsUndefined(); }; if (escaped_html.ptr == input_slice.ptr and escaped_html.len == input_slice.len) { - return input_value.asObjectRef(); + return input_value; } if (input_slice.len == 1) { // single character escaped strings are statically allocated - return ZigString.init(escaped_html).toValue(ctx).asObjectRef(); + return ZigString.init(escaped_html).toValue(globalObject); } - return ZigString.init(escaped_html).toExternalValue(ctx).asObjectRef(); + return ZigString.init(escaped_html).toExternalValue(globalObject); + } +} + +comptime { + if (!JSC.is_bindgen) { + _ = Bun__escapeHTML; } } diff --git a/src/javascript/jsc/bindings/ZigGlobalObject.cpp b/src/javascript/jsc/bindings/ZigGlobalObject.cpp index 692c69b96..80211227a 100644 --- a/src/javascript/jsc/bindings/ZigGlobalObject.cpp +++ b/src/javascript/jsc/bindings/ZigGlobalObject.cpp @@ -1248,6 +1248,119 @@ extern "C" JSC__JSValue ZigGlobalObject__createNativeReadableStream(Zig::GlobalO return JSC::JSValue::encode(call(globalObject, function, callData, JSC::jsUndefined(), arguments)); } +// static inline EncodedJSValue flattenArrayOfBuffersIntoArrayBuffer(JSGlobalObject* globalObject, JSValue arrayValue) +// { +// auto& vm = globalObject->vm(); + +// auto clientData = WebCore::clientData(vm); +// if (arrayValue.isUndefinedOrNull() || !arrayValue) { +// return JSC::JSValue::encode(JSC::JSArrayBuffer::create(vm, 0)); +// } + +// auto scope = DECLARE_THROW_SCOPE(vm); + +// auto array = JSC::jsDynamicCast<JSC::JSArray*>(arrayValue); +// if (!array) { +// throwTypeError(lexicalGlobalObject, throwScope, "Argument must be an array"_s); +// return JSValue::encode(jsUndefined()); +// } + +// size_t arrayLength = array->length(); +// if (arrayLength < 1) { +// RELEASE_AND_RETURN(throwScope, JSC::JSArrayBuffer::create(lexicalGlobalObject, 0)); +// } + +// size_t byteLength = 0; + +// for (size_t i = 0; i < arrayLength; i++) { +// auto element = array->getIndex(lexicalGlobalObject, i); +// RETURN_IF_EXCEPTION(throwScope, {}); + +// auto* typedArray = JSC::jsDynamicCast<JSC::JSArrayBufferView*>(element); +// if (UNLIKELY(!typedArray)) { +// throwTypeError(lexicalGlobalObject, throwScope, "Expected TypedArray"_s); +// return JSValue::encode(jsUndefined()); +// } +// byteLength += typedArray->byteLength(); +// } + +// if (byteLength == 0) { +// RELEASE_AND_RETURN(throwScope, JSC::JSArrayBuffer::create(lexicalGlobalObject, 0)); +// } + +// auto& buffer = JSC::ArrayBuffer::tryCreateUninitialized(byteLength, 1); +// if (UNLIKELY(!buffer)) { +// throwTypeError(lexicalGlobalObject, throwScope, "Failed to allocate ArrayBuffer"_s); +// return JSValue::encode(jsUndefined()); +// } + +// size_t remain = byteLength; +// auto* head = outBuffer->data(); + +// for (size_t i = 0; i < arrayLength && remain > 0; i++) { +// auto element = array->getIndex(lexicalGlobalObject, i); +// RETURN_IF_EXCEPTION(throwScope, {}); +// auto* typedArray = JSC::jsCast<JSC::JSArrayBufferView*>(element); +// size_t length = std::min(remain, typedArray->byteLength()); +// memcpy(head, typedArray->vector(), length); +// remain -= length; +// head += length; +// } + +// return JSValue::encode(JSC::JSArrayBuffer::create(lexicalGlobalObject, WTFMove(buffer))); +// } + +// static EncodedJSValue ZigGlobalObject__readableStreamToArrayBuffer_resolve(JSGlobalObject* globalObject, JSC::CallFrame* callFrame) +// { +// auto& vm = globalObject->vm(); + +// if (callFrame->argumentCount() < 1) { +// auto scope = DECLARE_THROW_SCOPE(vm); +// throwTypeError(lexicalGlobalObject, throwScope, "Expected at least one argument"_s); +// return JSValue::encode(jsUndefined()); +// } + +// auto arrayValue = callFrame->uncheckedArgument(0); + +// return flattenArrayOfBuffersIntoArrayBuffer(globalObject, arrayValue); +// } + +// extern "C" JSC__JSValue ZigGlobalObject__readableStreamToArrayBuffer(Zig::GlobalObject* globalObject, JSC__JSValue readableStreamValue); +// extern "C" JSC__JSValue ZigGlobalObject__readableStreamToArrayBuffer(Zig::GlobalObject* globalObject, JSC__JSValue readableStreamValue) +// { +// auto& vm = globalObject->vm(); + +// auto clientData = WebCore::clientData(vm); +// auto& builtinNames = WebCore::builtinNames(vm); + +// auto function = globalObject->getDirect(vm, builtinNames.readableStreamToArrayPrivateName()).getObject(); +// JSC::MarkedArgumentBuffer arguments = JSC::MarkedArgumentBuffer(); +// arguments.append(JSValue::decode(readableStreamValue)); + +// auto callData = JSC::getCallData(function); +// JSValue result = call(globalObject, function, callData, JSC::jsUndefined(), arguments); +// if (UNLIKELY(result.isError())) +// return JSValue::encode(result); +// } + +// extern "C" JSC__JSValue ZigGlobalObject__readableStreamToText(Zig::GlobalObject* globalObject, JSC__JSValue readableStreamValue); +// extern "C" JSC__JSValue ZigGlobalObject__readableStreamToText(Zig::GlobalObject* globalObject, JSC__JSValue readableStreamValue) +// { +// auto& vm = globalObject->vm(); +// auto scope = DECLARE_THROW_SCOPE(vm); + +// auto clientData = WebCore::clientData(vm); +// auto& builtinNames = WebCore::builtinNames(vm); + +// auto function = globalObject->getDirect(vm, builtinNames.createNativeReadableStreamPrivateName()).getObject(); +// JSC::MarkedArgumentBuffer arguments = JSC::MarkedArgumentBuffer(); +// arguments.append(JSValue::decode(nativeType)); +// arguments.append(JSValue::decode(nativePtr)); + +// auto callData = JSC::getCallData(function); +// return JSC::JSValue::encode(call(globalObject, function, callData, JSC::jsUndefined(), arguments)); +// } + void GlobalObject::finishCreation(VM& vm) { Base::finishCreation(vm); @@ -1258,6 +1371,8 @@ void GlobalObject::finishCreation(VM& vm) RELEASE_ASSERT(classInfo()); } +extern "C" EncodedJSValue Bun__escapeHTML(JSGlobalObject* globalObject, CallFrame* callFrame); + void GlobalObject::addBuiltinGlobals(JSC::VM& vm) { m_builtinInternalFunctions.initialize(*this); @@ -1266,7 +1381,7 @@ void GlobalObject::addBuiltinGlobals(JSC::VM& vm) auto& builtinNames = WebCore::builtinNames(vm); WTF::Vector<GlobalPropertyInfo> extraStaticGlobals; - extraStaticGlobals.reserveCapacity(27); + extraStaticGlobals.reserveCapacity(28); JSC::Identifier queueMicrotaskIdentifier = JSC::Identifier::fromString(vm, "queueMicrotask"_s); extraStaticGlobals.uncheckedAppend( @@ -1303,6 +1418,13 @@ void GlobalObject::addBuiltinGlobals(JSC::VM& vm) "clearInterval"_s, functionClearInterval), JSC::PropertyAttribute::Function | JSC::PropertyAttribute::DontDelete | 0 }); + JSC::Identifier escapeHTMLIdentifier = JSC::Identifier::fromString(vm, "escapeHTML"_s); + extraStaticGlobals.uncheckedAppend( + GlobalPropertyInfo { escapeHTMLIdentifier, + JSC::JSFunction::create(vm, JSC::jsCast<JSC::JSGlobalObject*>(globalObject()), 0, + "escapeHTML"_s, Bun__escapeHTML), + JSC::PropertyAttribute::Function | JSC::PropertyAttribute::DontDelete | 0 }); + JSC::Identifier atobIdentifier = JSC::Identifier::fromString(vm, "atob"_s); extraStaticGlobals.uncheckedAppend( GlobalPropertyInfo { atobIdentifier, @@ -1355,6 +1477,7 @@ void GlobalObject::addBuiltinGlobals(JSC::VM& vm) putDirectBuiltinFunction(vm, this, builtinNames.createFIFOPrivateName(), streamInternalsCreateFIFOCodeGenerator(vm), PropertyAttribute::Builtin | PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly); putDirectBuiltinFunction(vm, this, builtinNames.createNativeReadableStreamPrivateName(), readableStreamCreateNativeReadableStreamCodeGenerator(vm), PropertyAttribute::Builtin | PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly); putDirectBuiltinFunction(vm, this, builtinNames.createEmptyReadableStreamPrivateName(), readableStreamCreateEmptyReadableStreamCodeGenerator(vm), PropertyAttribute::Builtin | PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly); + // putDirectBuiltinFunction(vm, this, builtinNames.readableStreamToArrayPrivateName(), readableStreamReadableStreamToArrayCodeGenerator(vm), PropertyAttribute::Builtin | PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly); putDirectNativeFunction(vm, this, builtinNames.createUninitializedArrayBufferPrivateName(), 1, functionCreateUninitializedArrayBuffer, NoIntrinsic, PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly | PropertyAttribute::Function); diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 367e6300d..849691ca2 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -1207,8 +1207,7 @@ pub fn copyLatin1IntoUTF8(buf_: []u8, comptime Type: type, latin1_: Type) Encode break; } - buf[0..8].* = @bitCast([ascii_vector_size]u8, vec)[0..8].*; - buf[8..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[8..ascii_vector_size].*; + buf[0..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[0..ascii_vector_size].*; latin1 = latin1[ascii_vector_size..]; buf = buf[ascii_vector_size..]; } @@ -1317,6 +1316,74 @@ pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize { } pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) ![]const u8 { + const Pusher = struct { + const lengths: [std.math.maxInt(u8)]u4 = brk: { + var values: [std.math.maxInt(u8)]u4 = undefined; + for (values) |_, i| { + switch (i) { + '"' => { + values[i] = """.len; + }, + '&' => { + values[i] = "&".len; + }, + '\'' => { + values[i] = "'".len; + }, + '<' => { + values[i] = "<".len; + }, + '>' => { + values[i] = ">".len; + }, + else => { + values[i] = 1; + }, + } + } + + break :brk values; + }; + + inline fn appendString(buf: [*]u8, comptime str: []const u8) usize { + buf[0..str.len].* = str[0..str.len].*; + return str.len; + } + + pub inline fn append(buf: [*]u8, char: u8) usize { + if (lengths[char] == 1) { + buf[0] = char; + return 1; + } + + return switch (char) { + '"' => appendString(buf, """), + '&' => appendString(buf, "&"), + '\'' => appendString(buf, "'"), + '<' => appendString(buf, "<"), + '>' => appendString(buf, ">"), + else => unreachable, + }; + } + pub inline fn push(comptime c: anytype, chars: []const u8, allo: std.mem.Allocator) []const u8 { + var total: usize = 0; + inline for (comptime bun.range(0, c)) |i| { + total += @as(usize, lengths[chars[i]]); + } + + if (total == c) { + return chars; + } + + var output = allo.alloc(u8, total) catch unreachable; + var head = output.ptr; + inline for (comptime bun.range(0, c)) |i| { + head += @This().append(head, chars[i]); + } + + return output; + } + }; switch (latin1.len) { 0 => return "", 1 => return switch (latin1[0]) { @@ -1327,6 +1394,43 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8 '>' => ">", else => latin1, }, + 2 => { + const first: []const u8 = switch (latin1[0]) { + '"' => """, + '&' => "&", + '\'' => "'", + '<' => "<", + '>' => ">", + else => latin1[0..1], + }; + const second: []const u8 = switch (latin1[1]) { + '"' => """, + '&' => "&", + '\'' => "'", + '<' => "<", + '>' => ">", + else => latin1[1..2], + }; + if (first.len == 1 and second.len == 1) { + return latin1; + } + + return strings.append(allocator, first, second); + }, + 3 => return Pusher.push(3, latin1, allocator), + 4 => return Pusher.push(4, latin1, allocator), + 5 => return Pusher.push(5, latin1, allocator), + 6 => return Pusher.push(6, latin1, allocator), + 7 => return Pusher.push(7, latin1, allocator), + 8 => return Pusher.push(8, latin1, allocator), + 9 => return Pusher.push(9, latin1, allocator), + 10 => return Pusher.push(10, latin1, allocator), + 11 => return Pusher.push(11, latin1, allocator), + 12 => return Pusher.push(12, latin1, allocator), + 13 => return Pusher.push(13, latin1, allocator), + 14 => return Pusher.push(14, latin1, allocator), + 15 => return Pusher.push(15, latin1, allocator), + else => { var remaining = latin1; @@ -1339,34 +1443,72 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8 break :brk _vecs; }; - var buf: std.ArrayList(u8) = undefined; var any_needs_escape = false; + var buf: std.ArrayList(u8) = undefined; if (comptime Environment.isAarch64 or Environment.isX64) { - // pass #1: scan for any characters that need escaping // assume most strings won't need any escaping, so don't actually allocate the buffer scan_and_allocate_lazily: while (remaining.len >= ascii_vector_size) { if (comptime Environment.allow_assert) { std.debug.assert(!any_needs_escape); } - const vec: AsciiVector = remaining[0..ascii_vector_size].*; - if (@reduce( - .Or, - @bitCast(AsciiVectorU1, (vec == vecs[0])) | - @bitCast(AsciiVectorU1, (vec == vecs[1])) | - @bitCast(AsciiVectorU1, (vec == vecs[2])) | - @bitCast(AsciiVectorU1, (vec == vecs[3])) | - @bitCast(AsciiVectorU1, (vec == vecs[4])), - ) == 1) { + if (@reduce(.Min, (vec ^ vecs[0]) & + (vec ^ vecs[1]) & + (vec ^ vecs[2]) & + (vec ^ vecs[3]) & + (vec ^ vecs[4])) == 0) + { buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); @memcpy(buf.items.ptr, latin1.ptr, copy_len); buf.items.len = copy_len; any_needs_escape = true; - comptime var i: usize = 0; - inline while (i < ascii_vector_size) : (i += 1) { + var i: usize = 0; + while (i < ascii_vector_size) : (i += 1) { + switch (vec[i]) { + '"', '&', '\'', '<', '>' => |c| { + const result = switch (c) { + '"' => """, + '&' => "&", + '\'' => "'", + '<' => "<", + '>' => ">", + else => unreachable, + }; + + buf.appendSlice(result) catch unreachable; + remaining = remaining[1..]; + }, + else => |c| { + buf.append(c) catch unreachable; + remaining = remaining[1..]; + }, + } + } + + break :scan_and_allocate_lazily; + } + + remaining = remaining[ascii_vector_size..]; + } + } + + if (any_needs_escape) { + // pass #2: we found something that needed an escape + // so we'll go ahead and copy the buffer into a new buffer + while (remaining.len >= ascii_vector_size) { + const vec: AsciiVector = remaining[0..ascii_vector_size].*; + if (@reduce(.Min, (vec ^ vecs[0]) & + (vec ^ vecs[1]) & + (vec ^ vecs[2]) & + (vec ^ vecs[3]) & + (vec ^ vecs[4])) == 0) + { + buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable; + var i: usize = 0; + while (i < ascii_vector_size) : (i += 1) { switch (vec[i]) { '"' => { buf.appendSlice(""") catch unreachable; @@ -1384,65 +1526,20 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8 buf.appendSlice(">") catch unreachable; }, else => |c| { - buf.appendAssumeCapacity(c); + buf.append(c) catch unreachable; }, } } + remaining = remaining[ascii_vector_size..]; - break :scan_and_allocate_lazily; + continue; } + try buf.ensureUnusedCapacity(ascii_vector_size); + buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*; + buf.items.len += ascii_vector_size; remaining = remaining[ascii_vector_size..]; } - - if (any_needs_escape) { - // pass #2: we found something that needed an escape - // so we'll go ahead and copy the buffer into a new buffer - while (remaining.len >= ascii_vector_size) { - const vec: AsciiVector = remaining[0..ascii_vector_size].*; - if (@reduce( - .Or, - @bitCast(AsciiVectorU1, (vec == vecs[0])) | - @bitCast(AsciiVectorU1, (vec == vecs[1])) | - @bitCast(AsciiVectorU1, (vec == vecs[2])) | - @bitCast(AsciiVectorU1, (vec == vecs[3])) | - @bitCast(AsciiVectorU1, (vec == vecs[4])), - ) == 1) { - buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable; - comptime var i: usize = 0; - inline while (i < ascii_vector_size) : (i += 1) { - switch (vec[i]) { - '"' => { - buf.appendSlice(""") catch unreachable; - }, - '&' => { - buf.appendSlice("&") catch unreachable; - }, - '\'' => { - buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be ''' - }, - '<' => { - buf.appendSlice("<") catch unreachable; - }, - '>' => { - buf.appendSlice(">") catch unreachable; - }, - else => |c| { - buf.append(c) catch unreachable; - }, - } - } - - remaining = remaining[ascii_vector_size..]; - continue; - } - - try buf.ensureUnusedCapacity(ascii_vector_size); - buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*; - buf.items.len += ascii_vector_size; - remaining = remaining[ascii_vector_size..]; - } - } } if (!any_needs_escape) { @@ -1531,11 +1628,11 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8 } } - if (any_needs_escape) { - return buf.toOwnedSlice(); - } else { + if (!any_needs_escape) { return latin1; } + + return buf.toOwnedSlice(); }, } } @@ -1811,7 +1908,9 @@ pub const min_16_ascii = @splat(ascii_vector_size, @as(u8, 0x20)); pub const max_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 127)); pub const min_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 0x20)); pub const AsciiVector = std.meta.Vector(ascii_vector_size, u8); +pub const AsciiVectorSmall = std.meta.Vector(8, u8); pub const AsciiVectorU1 = std.meta.Vector(ascii_vector_size, u1); +pub const AsciiVectorU1Small = std.meta.Vector(8, u1); pub const AsciiVectorU16U1 = std.meta.Vector(ascii_u16_vector_size, u1); pub const AsciiU16Vector = std.meta.Vector(ascii_u16_vector_size, u16); pub const max_4_ascii = @splat(4, @as(u8, 127)); |