diff options
author | 2022-06-04 20:01:33 -0700 | |
---|---|---|
committer | 2022-06-04 20:01:33 -0700 | |
commit | 5aa196b361f58b4ba70d21464b4f0995164e269c (patch) | |
tree | f282f32595c5d5dac5c7c9ce57367cac66a1140e /src | |
parent | 9f640ffb51dc216e78af6ea5fa0eb8bc782e446b (diff) | |
download | bun-5aa196b361f58b4ba70d21464b4f0995164e269c.tar.gz bun-5aa196b361f58b4ba70d21464b4f0995164e269c.tar.zst bun-5aa196b361f58b4ba70d21464b4f0995164e269c.zip |
take two
Diffstat (limited to 'src')
-rw-r--r-- | src/global.zig | 12 | ||||
-rw-r--r-- | src/javascript/jsc/api/bun.zig | 42 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/ZigGlobalObject.cpp | 125 | ||||
-rw-r--r-- | src/string_immutable.zig | 237 |
4 files changed, 325 insertions, 91 deletions
diff --git a/src/global.zig b/src/global.zig index 6378bb2ba..6160ce781 100644 --- a/src/global.zig +++ b/src/global.zig @@ -189,3 +189,15 @@ pub fn DebugOnlyDefault(comptime val: anytype) if (Environment.isDebug) @TypeOf( return {}; } + +pub inline fn range(comptime min: anytype, comptime max: anytype) [max - min]usize { + return comptime brk: { + var slice: [max - min]usize = undefined; + var i: usize = min; + while (i < max) { + slice[i - min] = i; + i += 1; + } + break :brk slice; + }; +} diff --git a/src/javascript/jsc/api/bun.zig b/src/javascript/jsc/api/bun.zig index 1ee9cb96d..8228139de 100644 --- a/src/javascript/jsc/api/bun.zig +++ b/src/javascript/jsc/api/bun.zig @@ -1150,9 +1150,6 @@ pub const Class = NewClass( .inflateSync = .{ .rfn = JSC.wrapWithHasContainer(JSZlib, "inflateSync", false, false, true), }, - .escapeHTML = .{ - .rfn = Bun.escapeHTML, - }, }, .{ .main = .{ @@ -1615,39 +1612,42 @@ pub fn serve( unreachable; } -pub fn escapeHTML( - _: void, - ctx: js.JSContextRef, - _: js.JSObjectRef, - _: js.JSObjectRef, - arguments: []const js.JSValueRef, - exception: js.ExceptionRef, -) js.JSValueRef { +pub export fn Bun__escapeHTML( + globalObject: *JSGlobalObject, + callframe: *JSC.CallFrame, +) JSC.JSValue { + const arguments = callframe.arguments(); if (arguments.len < 1) { - return ZigString.init("").toValue(ctx).asObjectRef(); + return ZigString.Empty.toValue(globalObject); } - const input_value = arguments[0].?.value(); - const zig_str = input_value.getZigString(ctx); + const input_value = arguments[0]; + const zig_str = input_value.getZigString(globalObject); if (zig_str.is16Bit()) { - return input_value.asObjectRef(); + return input_value; } else { var input_slice = zig_str.slice(); - var escaped_html = strings.escapeHTMLForLatin1Input(ctx.bunVM().allocator, input_slice) catch { - JSC.JSError(undefined, "Out of memory", .{}, ctx, exception); - return null; + var escaped_html = strings.escapeHTMLForLatin1Input(globalObject.bunVM().allocator, input_slice) catch { + globalObject.vm().throwError(globalObject, ZigString.init("Out of memory").toValue(globalObject)); + return JSC.JSValue.jsUndefined(); }; if (escaped_html.ptr == input_slice.ptr and escaped_html.len == input_slice.len) { - return input_value.asObjectRef(); + return input_value; } if (input_slice.len == 1) { // single character escaped strings are statically allocated - return ZigString.init(escaped_html).toValue(ctx).asObjectRef(); + return ZigString.init(escaped_html).toValue(globalObject); } - return ZigString.init(escaped_html).toExternalValue(ctx).asObjectRef(); + return ZigString.init(escaped_html).toExternalValue(globalObject); + } +} + +comptime { + if (!JSC.is_bindgen) { + _ = Bun__escapeHTML; } } diff --git a/src/javascript/jsc/bindings/ZigGlobalObject.cpp b/src/javascript/jsc/bindings/ZigGlobalObject.cpp index 692c69b96..80211227a 100644 --- a/src/javascript/jsc/bindings/ZigGlobalObject.cpp +++ b/src/javascript/jsc/bindings/ZigGlobalObject.cpp @@ -1248,6 +1248,119 @@ extern "C" JSC__JSValue ZigGlobalObject__createNativeReadableStream(Zig::GlobalO return JSC::JSValue::encode(call(globalObject, function, callData, JSC::jsUndefined(), arguments)); } +// static inline EncodedJSValue flattenArrayOfBuffersIntoArrayBuffer(JSGlobalObject* globalObject, JSValue arrayValue) +// { +// auto& vm = globalObject->vm(); + +// auto clientData = WebCore::clientData(vm); +// if (arrayValue.isUndefinedOrNull() || !arrayValue) { +// return JSC::JSValue::encode(JSC::JSArrayBuffer::create(vm, 0)); +// } + +// auto scope = DECLARE_THROW_SCOPE(vm); + +// auto array = JSC::jsDynamicCast<JSC::JSArray*>(arrayValue); +// if (!array) { +// throwTypeError(lexicalGlobalObject, throwScope, "Argument must be an array"_s); +// return JSValue::encode(jsUndefined()); +// } + +// size_t arrayLength = array->length(); +// if (arrayLength < 1) { +// RELEASE_AND_RETURN(throwScope, JSC::JSArrayBuffer::create(lexicalGlobalObject, 0)); +// } + +// size_t byteLength = 0; + +// for (size_t i = 0; i < arrayLength; i++) { +// auto element = array->getIndex(lexicalGlobalObject, i); +// RETURN_IF_EXCEPTION(throwScope, {}); + +// auto* typedArray = JSC::jsDynamicCast<JSC::JSArrayBufferView*>(element); +// if (UNLIKELY(!typedArray)) { +// throwTypeError(lexicalGlobalObject, throwScope, "Expected TypedArray"_s); +// return JSValue::encode(jsUndefined()); +// } +// byteLength += typedArray->byteLength(); +// } + +// if (byteLength == 0) { +// RELEASE_AND_RETURN(throwScope, JSC::JSArrayBuffer::create(lexicalGlobalObject, 0)); +// } + +// auto& buffer = JSC::ArrayBuffer::tryCreateUninitialized(byteLength, 1); +// if (UNLIKELY(!buffer)) { +// throwTypeError(lexicalGlobalObject, throwScope, "Failed to allocate ArrayBuffer"_s); +// return JSValue::encode(jsUndefined()); +// } + +// size_t remain = byteLength; +// auto* head = outBuffer->data(); + +// for (size_t i = 0; i < arrayLength && remain > 0; i++) { +// auto element = array->getIndex(lexicalGlobalObject, i); +// RETURN_IF_EXCEPTION(throwScope, {}); +// auto* typedArray = JSC::jsCast<JSC::JSArrayBufferView*>(element); +// size_t length = std::min(remain, typedArray->byteLength()); +// memcpy(head, typedArray->vector(), length); +// remain -= length; +// head += length; +// } + +// return JSValue::encode(JSC::JSArrayBuffer::create(lexicalGlobalObject, WTFMove(buffer))); +// } + +// static EncodedJSValue ZigGlobalObject__readableStreamToArrayBuffer_resolve(JSGlobalObject* globalObject, JSC::CallFrame* callFrame) +// { +// auto& vm = globalObject->vm(); + +// if (callFrame->argumentCount() < 1) { +// auto scope = DECLARE_THROW_SCOPE(vm); +// throwTypeError(lexicalGlobalObject, throwScope, "Expected at least one argument"_s); +// return JSValue::encode(jsUndefined()); +// } + +// auto arrayValue = callFrame->uncheckedArgument(0); + +// return flattenArrayOfBuffersIntoArrayBuffer(globalObject, arrayValue); +// } + +// extern "C" JSC__JSValue ZigGlobalObject__readableStreamToArrayBuffer(Zig::GlobalObject* globalObject, JSC__JSValue readableStreamValue); +// extern "C" JSC__JSValue ZigGlobalObject__readableStreamToArrayBuffer(Zig::GlobalObject* globalObject, JSC__JSValue readableStreamValue) +// { +// auto& vm = globalObject->vm(); + +// auto clientData = WebCore::clientData(vm); +// auto& builtinNames = WebCore::builtinNames(vm); + +// auto function = globalObject->getDirect(vm, builtinNames.readableStreamToArrayPrivateName()).getObject(); +// JSC::MarkedArgumentBuffer arguments = JSC::MarkedArgumentBuffer(); +// arguments.append(JSValue::decode(readableStreamValue)); + +// auto callData = JSC::getCallData(function); +// JSValue result = call(globalObject, function, callData, JSC::jsUndefined(), arguments); +// if (UNLIKELY(result.isError())) +// return JSValue::encode(result); +// } + +// extern "C" JSC__JSValue ZigGlobalObject__readableStreamToText(Zig::GlobalObject* globalObject, JSC__JSValue readableStreamValue); +// extern "C" JSC__JSValue ZigGlobalObject__readableStreamToText(Zig::GlobalObject* globalObject, JSC__JSValue readableStreamValue) +// { +// auto& vm = globalObject->vm(); +// auto scope = DECLARE_THROW_SCOPE(vm); + +// auto clientData = WebCore::clientData(vm); +// auto& builtinNames = WebCore::builtinNames(vm); + +// auto function = globalObject->getDirect(vm, builtinNames.createNativeReadableStreamPrivateName()).getObject(); +// JSC::MarkedArgumentBuffer arguments = JSC::MarkedArgumentBuffer(); +// arguments.append(JSValue::decode(nativeType)); +// arguments.append(JSValue::decode(nativePtr)); + +// auto callData = JSC::getCallData(function); +// return JSC::JSValue::encode(call(globalObject, function, callData, JSC::jsUndefined(), arguments)); +// } + void GlobalObject::finishCreation(VM& vm) { Base::finishCreation(vm); @@ -1258,6 +1371,8 @@ void GlobalObject::finishCreation(VM& vm) RELEASE_ASSERT(classInfo()); } +extern "C" EncodedJSValue Bun__escapeHTML(JSGlobalObject* globalObject, CallFrame* callFrame); + void GlobalObject::addBuiltinGlobals(JSC::VM& vm) { m_builtinInternalFunctions.initialize(*this); @@ -1266,7 +1381,7 @@ void GlobalObject::addBuiltinGlobals(JSC::VM& vm) auto& builtinNames = WebCore::builtinNames(vm); WTF::Vector<GlobalPropertyInfo> extraStaticGlobals; - extraStaticGlobals.reserveCapacity(27); + extraStaticGlobals.reserveCapacity(28); JSC::Identifier queueMicrotaskIdentifier = JSC::Identifier::fromString(vm, "queueMicrotask"_s); extraStaticGlobals.uncheckedAppend( @@ -1303,6 +1418,13 @@ void GlobalObject::addBuiltinGlobals(JSC::VM& vm) "clearInterval"_s, functionClearInterval), JSC::PropertyAttribute::Function | JSC::PropertyAttribute::DontDelete | 0 }); + JSC::Identifier escapeHTMLIdentifier = JSC::Identifier::fromString(vm, "escapeHTML"_s); + extraStaticGlobals.uncheckedAppend( + GlobalPropertyInfo { escapeHTMLIdentifier, + JSC::JSFunction::create(vm, JSC::jsCast<JSC::JSGlobalObject*>(globalObject()), 0, + "escapeHTML"_s, Bun__escapeHTML), + JSC::PropertyAttribute::Function | JSC::PropertyAttribute::DontDelete | 0 }); + JSC::Identifier atobIdentifier = JSC::Identifier::fromString(vm, "atob"_s); extraStaticGlobals.uncheckedAppend( GlobalPropertyInfo { atobIdentifier, @@ -1355,6 +1477,7 @@ void GlobalObject::addBuiltinGlobals(JSC::VM& vm) putDirectBuiltinFunction(vm, this, builtinNames.createFIFOPrivateName(), streamInternalsCreateFIFOCodeGenerator(vm), PropertyAttribute::Builtin | PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly); putDirectBuiltinFunction(vm, this, builtinNames.createNativeReadableStreamPrivateName(), readableStreamCreateNativeReadableStreamCodeGenerator(vm), PropertyAttribute::Builtin | PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly); putDirectBuiltinFunction(vm, this, builtinNames.createEmptyReadableStreamPrivateName(), readableStreamCreateEmptyReadableStreamCodeGenerator(vm), PropertyAttribute::Builtin | PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly); + // putDirectBuiltinFunction(vm, this, builtinNames.readableStreamToArrayPrivateName(), readableStreamReadableStreamToArrayCodeGenerator(vm), PropertyAttribute::Builtin | PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly); putDirectNativeFunction(vm, this, builtinNames.createUninitializedArrayBufferPrivateName(), 1, functionCreateUninitializedArrayBuffer, NoIntrinsic, PropertyAttribute::DontDelete | PropertyAttribute::ReadOnly | PropertyAttribute::Function); diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 367e6300d..849691ca2 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -1207,8 +1207,7 @@ pub fn copyLatin1IntoUTF8(buf_: []u8, comptime Type: type, latin1_: Type) Encode break; } - buf[0..8].* = @bitCast([ascii_vector_size]u8, vec)[0..8].*; - buf[8..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[8..ascii_vector_size].*; + buf[0..ascii_vector_size].* = @bitCast([ascii_vector_size]u8, vec)[0..ascii_vector_size].*; latin1 = latin1[ascii_vector_size..]; buf = buf[ascii_vector_size..]; } @@ -1317,6 +1316,74 @@ pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize { } pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) ![]const u8 { + const Pusher = struct { + const lengths: [std.math.maxInt(u8)]u4 = brk: { + var values: [std.math.maxInt(u8)]u4 = undefined; + for (values) |_, i| { + switch (i) { + '"' => { + values[i] = """.len; + }, + '&' => { + values[i] = "&".len; + }, + '\'' => { + values[i] = "'".len; + }, + '<' => { + values[i] = "<".len; + }, + '>' => { + values[i] = ">".len; + }, + else => { + values[i] = 1; + }, + } + } + + break :brk values; + }; + + inline fn appendString(buf: [*]u8, comptime str: []const u8) usize { + buf[0..str.len].* = str[0..str.len].*; + return str.len; + } + + pub inline fn append(buf: [*]u8, char: u8) usize { + if (lengths[char] == 1) { + buf[0] = char; + return 1; + } + + return switch (char) { + '"' => appendString(buf, """), + '&' => appendString(buf, "&"), + '\'' => appendString(buf, "'"), + '<' => appendString(buf, "<"), + '>' => appendString(buf, ">"), + else => unreachable, + }; + } + pub inline fn push(comptime c: anytype, chars: []const u8, allo: std.mem.Allocator) []const u8 { + var total: usize = 0; + inline for (comptime bun.range(0, c)) |i| { + total += @as(usize, lengths[chars[i]]); + } + + if (total == c) { + return chars; + } + + var output = allo.alloc(u8, total) catch unreachable; + var head = output.ptr; + inline for (comptime bun.range(0, c)) |i| { + head += @This().append(head, chars[i]); + } + + return output; + } + }; switch (latin1.len) { 0 => return "", 1 => return switch (latin1[0]) { @@ -1327,6 +1394,43 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8 '>' => ">", else => latin1, }, + 2 => { + const first: []const u8 = switch (latin1[0]) { + '"' => """, + '&' => "&", + '\'' => "'", + '<' => "<", + '>' => ">", + else => latin1[0..1], + }; + const second: []const u8 = switch (latin1[1]) { + '"' => """, + '&' => "&", + '\'' => "'", + '<' => "<", + '>' => ">", + else => latin1[1..2], + }; + if (first.len == 1 and second.len == 1) { + return latin1; + } + + return strings.append(allocator, first, second); + }, + 3 => return Pusher.push(3, latin1, allocator), + 4 => return Pusher.push(4, latin1, allocator), + 5 => return Pusher.push(5, latin1, allocator), + 6 => return Pusher.push(6, latin1, allocator), + 7 => return Pusher.push(7, latin1, allocator), + 8 => return Pusher.push(8, latin1, allocator), + 9 => return Pusher.push(9, latin1, allocator), + 10 => return Pusher.push(10, latin1, allocator), + 11 => return Pusher.push(11, latin1, allocator), + 12 => return Pusher.push(12, latin1, allocator), + 13 => return Pusher.push(13, latin1, allocator), + 14 => return Pusher.push(14, latin1, allocator), + 15 => return Pusher.push(15, latin1, allocator), + else => { var remaining = latin1; @@ -1339,34 +1443,72 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8 break :brk _vecs; }; - var buf: std.ArrayList(u8) = undefined; var any_needs_escape = false; + var buf: std.ArrayList(u8) = undefined; if (comptime Environment.isAarch64 or Environment.isX64) { - // pass #1: scan for any characters that need escaping // assume most strings won't need any escaping, so don't actually allocate the buffer scan_and_allocate_lazily: while (remaining.len >= ascii_vector_size) { if (comptime Environment.allow_assert) { std.debug.assert(!any_needs_escape); } - const vec: AsciiVector = remaining[0..ascii_vector_size].*; - if (@reduce( - .Or, - @bitCast(AsciiVectorU1, (vec == vecs[0])) | - @bitCast(AsciiVectorU1, (vec == vecs[1])) | - @bitCast(AsciiVectorU1, (vec == vecs[2])) | - @bitCast(AsciiVectorU1, (vec == vecs[3])) | - @bitCast(AsciiVectorU1, (vec == vecs[4])), - ) == 1) { + if (@reduce(.Min, (vec ^ vecs[0]) & + (vec ^ vecs[1]) & + (vec ^ vecs[2]) & + (vec ^ vecs[3]) & + (vec ^ vecs[4])) == 0) + { buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6); const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr); @memcpy(buf.items.ptr, latin1.ptr, copy_len); buf.items.len = copy_len; any_needs_escape = true; - comptime var i: usize = 0; - inline while (i < ascii_vector_size) : (i += 1) { + var i: usize = 0; + while (i < ascii_vector_size) : (i += 1) { + switch (vec[i]) { + '"', '&', '\'', '<', '>' => |c| { + const result = switch (c) { + '"' => """, + '&' => "&", + '\'' => "'", + '<' => "<", + '>' => ">", + else => unreachable, + }; + + buf.appendSlice(result) catch unreachable; + remaining = remaining[1..]; + }, + else => |c| { + buf.append(c) catch unreachable; + remaining = remaining[1..]; + }, + } + } + + break :scan_and_allocate_lazily; + } + + remaining = remaining[ascii_vector_size..]; + } + } + + if (any_needs_escape) { + // pass #2: we found something that needed an escape + // so we'll go ahead and copy the buffer into a new buffer + while (remaining.len >= ascii_vector_size) { + const vec: AsciiVector = remaining[0..ascii_vector_size].*; + if (@reduce(.Min, (vec ^ vecs[0]) & + (vec ^ vecs[1]) & + (vec ^ vecs[2]) & + (vec ^ vecs[3]) & + (vec ^ vecs[4])) == 0) + { + buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable; + var i: usize = 0; + while (i < ascii_vector_size) : (i += 1) { switch (vec[i]) { '"' => { buf.appendSlice(""") catch unreachable; @@ -1384,65 +1526,20 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8 buf.appendSlice(">") catch unreachable; }, else => |c| { - buf.appendAssumeCapacity(c); + buf.append(c) catch unreachable; }, } } + remaining = remaining[ascii_vector_size..]; - break :scan_and_allocate_lazily; + continue; } + try buf.ensureUnusedCapacity(ascii_vector_size); + buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*; + buf.items.len += ascii_vector_size; remaining = remaining[ascii_vector_size..]; } - - if (any_needs_escape) { - // pass #2: we found something that needed an escape - // so we'll go ahead and copy the buffer into a new buffer - while (remaining.len >= ascii_vector_size) { - const vec: AsciiVector = remaining[0..ascii_vector_size].*; - if (@reduce( - .Or, - @bitCast(AsciiVectorU1, (vec == vecs[0])) | - @bitCast(AsciiVectorU1, (vec == vecs[1])) | - @bitCast(AsciiVectorU1, (vec == vecs[2])) | - @bitCast(AsciiVectorU1, (vec == vecs[3])) | - @bitCast(AsciiVectorU1, (vec == vecs[4])), - ) == 1) { - buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable; - comptime var i: usize = 0; - inline while (i < ascii_vector_size) : (i += 1) { - switch (vec[i]) { - '"' => { - buf.appendSlice(""") catch unreachable; - }, - '&' => { - buf.appendSlice("&") catch unreachable; - }, - '\'' => { - buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be ''' - }, - '<' => { - buf.appendSlice("<") catch unreachable; - }, - '>' => { - buf.appendSlice(">") catch unreachable; - }, - else => |c| { - buf.append(c) catch unreachable; - }, - } - } - - remaining = remaining[ascii_vector_size..]; - continue; - } - - try buf.ensureUnusedCapacity(ascii_vector_size); - buf.items.ptr[buf.items.len .. buf.items.len + ascii_vector_size][0..ascii_vector_size].* = remaining[0..ascii_vector_size].*; - buf.items.len += ascii_vector_size; - remaining = remaining[ascii_vector_size..]; - } - } } if (!any_needs_escape) { @@ -1531,11 +1628,11 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8 } } - if (any_needs_escape) { - return buf.toOwnedSlice(); - } else { + if (!any_needs_escape) { return latin1; } + + return buf.toOwnedSlice(); }, } } @@ -1811,7 +1908,9 @@ pub const min_16_ascii = @splat(ascii_vector_size, @as(u8, 0x20)); pub const max_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 127)); pub const min_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 0x20)); pub const AsciiVector = std.meta.Vector(ascii_vector_size, u8); +pub const AsciiVectorSmall = std.meta.Vector(8, u8); pub const AsciiVectorU1 = std.meta.Vector(ascii_vector_size, u1); +pub const AsciiVectorU1Small = std.meta.Vector(8, u1); pub const AsciiVectorU16U1 = std.meta.Vector(ascii_u16_vector_size, u1); pub const AsciiU16Vector = std.meta.Vector(ascii_u16_vector_size, u16); pub const max_4_ascii = @splat(4, @as(u8, 127)); |