diff options
author | 2022-04-26 01:12:28 -0700 | |
---|---|---|
committer | 2022-04-26 01:12:28 -0700 | |
commit | 77fbfb3fbb4259150d6b59fe182ccd64e1fb9f0d (patch) | |
tree | df930fa1bb1e7eb408762e8138aec580fed4e4e8 | |
parent | 6590d1f8bf09838e4530848af74990c5ba89eb81 (diff) | |
download | bun-77fbfb3fbb4259150d6b59fe182ccd64e1fb9f0d.tar.gz bun-77fbfb3fbb4259150d6b59fe182ccd64e1fb9f0d.tar.zst bun-77fbfb3fbb4259150d6b59fe182ccd64e1fb9f0d.zip |
Most of Buffer.toString
Diffstat (limited to '')
-rw-r--r-- | integration/bunjs-only-snippets/buffer.test.js | 12 | ||||
-rw-r--r-- | src/base64/base64.zig | 2 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/Buffer.h | 8 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/JSBuffer.cpp | 102 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/bindings.zig | 7 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/headers-handwritten.h | 1 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/wtf-bindings.cpp | 8 | ||||
-rw-r--r-- | src/javascript/jsc/bindings/wtf-bindings.h | 3 | ||||
-rw-r--r-- | src/javascript/jsc/webcore/encoding.zig | 122 | ||||
-rw-r--r-- | src/string_immutable.zig | 34 |
10 files changed, 288 insertions, 11 deletions
diff --git a/integration/bunjs-only-snippets/buffer.test.js b/integration/bunjs-only-snippets/buffer.test.js new file mode 100644 index 000000000..f8cd3aa5b --- /dev/null +++ b/integration/bunjs-only-snippets/buffer.test.js @@ -0,0 +1,12 @@ +import { describe, it, expect } from "bun:test"; + +it("buffer", () => { + var buf = new Buffer(1024); + expect(buf.write("hello world ")).toBe(12); + expect(buf.toString("utf8", 0, "hello world ".length)).toBe("hello world "); + expect(buf.toString("base64url", 0, "hello world ".length)).toBe( + btoa("hello world ") + ); + expect(buf instanceof Uint8Array).toBe(true); + expect(buf instanceof Buffer).toBe(true); +}); diff --git a/src/base64/base64.zig b/src/base64/base64.zig index fa4fc9dc9..a5dcff5e0 100644 --- a/src/base64/base64.zig +++ b/src/base64/base64.zig @@ -51,3 +51,5 @@ pub const urlsafe = std.base64.Base64DecoderWithIgnore.init( null, "= \t\r\n" ++ [_]u8{ std.ascii.control_code.VT, std.ascii.control_code.FF }, ); + +pub const urlsafeEncoder = std.base64.url_safe_no_pad.Encoder; diff --git a/src/javascript/jsc/bindings/Buffer.h b/src/javascript/jsc/bindings/Buffer.h index ac57e4d25..8efc95787 100644 --- a/src/javascript/jsc/bindings/Buffer.h +++ b/src/javascript/jsc/bindings/Buffer.h @@ -10,6 +10,14 @@ #include "BufferEncodingType.h" #include "JavaScriptCore/GenericTypedArrayView.h" +extern "C" JSC__JSValue Bun__encoding__toStringUTF16(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); +extern "C" JSC__JSValue Bun__encoding__toStringUTF8(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); +extern "C" JSC__JSValue Bun__encoding__toStringASCII(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); +extern "C" JSC__JSValue Bun__encoding__toStringLatin1(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); +extern "C" JSC__JSValue Bun__encoding__toStringHex(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); +extern "C" JSC__JSValue Bun__encoding__toStringBase64(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); +extern "C" JSC__JSValue Bun__encoding__toStringURLSafeBase64(const uint8_t* input, size_t len, JSC__JSGlobalObject* globalObject); + namespace WebCore { class Buffer final : public RefCounted<Buffer> { diff --git a/src/javascript/jsc/bindings/JSBuffer.cpp b/src/javascript/jsc/bindings/JSBuffer.cpp index f45fd2a9a..05cc1f23a 100644 --- a/src/javascript/jsc/bindings/JSBuffer.cpp +++ b/src/javascript/jsc/bindings/JSBuffer.cpp @@ -568,7 +568,107 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_swap64Body(JSC::JSGl static inline JSC::EncodedJSValue jsBufferPrototypeFunction_toStringBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis) { auto& vm = JSC::getVM(lexicalGlobalObject); - return JSC::JSValue::encode(jsUndefined()); + uint32_t offset = 0; + uint32_t length = castedThis->length(); + WebCore::BufferEncodingType encoding = WebCore::BufferEncodingType::utf8; + + if (length == 0) + return JSC::JSValue::encode(JSC::jsEmptyString(vm)); + + auto scope = DECLARE_THROW_SCOPE(vm); + + switch (callFrame->argumentCount()) { + case 0: { + break; + } + case 2: + case 3: + case 1: { + JSC::JSValue arg1 = callFrame->uncheckedArgument(0); + std::optional<BufferEncodingType> encoded = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, arg1); + if (!encoded) { + throwTypeError(lexicalGlobalObject, scope, "Invalid encoding"); + return JSC::JSValue::encode(jsUndefined()); + } + + encoding = encoded.value(); + if (callFrame->argumentCount() == 1) + break; + } + // any + case 5: { + JSC::JSValue arg2 = callFrame->uncheckedArgument(1); + int32_t ioffset = arg2.toInt32(lexicalGlobalObject); + if (ioffset < 0) { + throwTypeError(lexicalGlobalObject, scope, "Offset must be a positive integer"); + return JSC::JSValue::encode(jsUndefined()); + } + offset = static_cast<uint32_t>(ioffset); + + if (callFrame->argumentCount() == 2) + break; + } + + default: { + length = static_cast<uint32_t>(callFrame->argument(2).toInt32(lexicalGlobalObject)); + break; + } + } + + length -= std::min(offset, length); + + if (UNLIKELY(length == 0)) { + RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsEmptyString(vm))); + } + + JSC::EncodedJSValue ret = 0; + + switch (encoding) { + case WebCore::BufferEncodingType::buffer: + case WebCore::BufferEncodingType::utf8: { + ret = Bun__encoding__toStringUTF8(castedThis->typedVector() + offset, length, lexicalGlobalObject); + break; + } + + case WebCore::BufferEncodingType::latin1: + case WebCore::BufferEncodingType::ascii: { + ret = Bun__encoding__toStringASCII(castedThis->typedVector() + offset, length, lexicalGlobalObject); + break; + } + + case WebCore::BufferEncodingType::ucs2: + case WebCore::BufferEncodingType::utf16le: { + ret = Bun__encoding__toStringUTF16(castedThis->typedVector() + offset, length, lexicalGlobalObject); + break; + } + + case WebCore::BufferEncodingType::base64: { + ret = Bun__encoding__toStringBase64(castedThis->typedVector() + offset, length, lexicalGlobalObject); + break; + } + + case WebCore::BufferEncodingType::base64url: { + ret = Bun__encoding__toStringURLSafeBase64(castedThis->typedVector() + offset, length, lexicalGlobalObject); + break; + } + + case WebCore::BufferEncodingType::hex: { + ret = Bun__encoding__toStringHex(castedThis->typedVector() + offset, length, lexicalGlobalObject); + break; + } + default: { + throwTypeError(lexicalGlobalObject, scope, "Unsupported encoding? This shouldn't happen"); + break; + } + } + + JSC::JSValue retValue = JSC::JSValue::decode(ret); + if (UNLIKELY(!retValue.isString())) { + scope.throwException(lexicalGlobalObject, retValue); + return JSC::JSValue::encode(jsUndefined()); + } + + RELEASE_AND_RETURN(scope, JSC::JSValue::encode(retValue)); } static inline JSC::EncodedJSValue jsBufferPrototypeFunction_writeBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame, typename IDLOperation<JSBuffer>::ClassParameter castedThis) { diff --git a/src/javascript/jsc/bindings/bindings.zig b/src/javascript/jsc/bindings/bindings.zig index f09d56e3e..2643abfed 100644 --- a/src/javascript/jsc/bindings/bindings.zig +++ b/src/javascript/jsc/bindings/bindings.zig @@ -3463,6 +3463,7 @@ pub const StringView = extern struct { pub const WTF = struct { extern fn WTF__copyLCharsFromUCharSource(dest: [*]u8, source: *const anyopaque, len: usize) void; + extern fn WTF__toBase64URLStringValue(bytes: [*]const u8, length: usize, globalObject: *JSGlobalObject) JSValue; /// This uses SSE2 instructions and/or ARM NEON to copy 16-bit characters efficiently /// See wtf/Text/ASCIIFastPath.h for details @@ -3472,6 +3473,12 @@ pub const WTF = struct { // This is any alignment WTF__copyLCharsFromUCharSource(destination, source.ptr, source.len); } + + /// Encode a byte array to a URL-safe base64 string for use with JS + /// Memory is managed by JavaScriptCore instead of us + pub fn toBase64URLStringValue(bytes: []const u8, globalObject: *JSGlobalObject) JSValue { + return WTF__toBase64URLStringValue(bytes.ptr, bytes.len, globalObject); + } }; pub const Callback = struct { diff --git a/src/javascript/jsc/bindings/headers-handwritten.h b/src/javascript/jsc/bindings/headers-handwritten.h index d955e3138..3d03187b7 100644 --- a/src/javascript/jsc/bindings/headers-handwritten.h +++ b/src/javascript/jsc/bindings/headers-handwritten.h @@ -217,4 +217,5 @@ extern "C" int64_t Bun__encoding__writeLatin1AsUTF8(const unsigned char* ptr, si extern "C" int64_t Bun__encoding__writeUTF16AsUTF8(const UChar* ptr, size_t len, unsigned char* to, size_t other_len); extern "C" int64_t Bun__encoding__writeLatin1AsASCII(const unsigned char* ptr, size_t len, unsigned char* to, size_t other_len); extern "C" int64_t Bun__encoding__writeUTF16AsASCII(const UChar* ptr, size_t len, unsigned char* to, size_t other_len); + #endif diff --git a/src/javascript/jsc/bindings/wtf-bindings.cpp b/src/javascript/jsc/bindings/wtf-bindings.cpp index 5dae85930..bc3b96f73 100644 --- a/src/javascript/jsc/bindings/wtf-bindings.cpp +++ b/src/javascript/jsc/bindings/wtf-bindings.cpp @@ -1,6 +1,14 @@ #include "wtf-bindings.h" +#include "wtf/text/Base64.h" extern "C" void WTF__copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length) { WTF::copyLCharsFromUCharSource(destination, source, length); +} + +extern "C" JSC::EncodedJSValue WTF__toBase64URLStringValue(const uint8_t* bytes, size_t length, JSC::JSGlobalObject* globalObject) +{ + WTF::String string = WTF::base64URLEncodeToString(reinterpret_cast<const LChar*>(bytes), static_cast<unsigned int>(length)); + string.impl()->ref(); + return JSC::JSValue::encode(JSC::jsString(globalObject->vm(), string.impl())); }
\ No newline at end of file diff --git a/src/javascript/jsc/bindings/wtf-bindings.h b/src/javascript/jsc/bindings/wtf-bindings.h index 3f71ff0c2..2abd398fe 100644 --- a/src/javascript/jsc/bindings/wtf-bindings.h +++ b/src/javascript/jsc/bindings/wtf-bindings.h @@ -3,4 +3,5 @@ #include "root.h" #include "wtf/text/ASCIIFastPath.h" -extern "C" void WTF__copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length);
\ No newline at end of file +extern "C" void WTF__copyLCharsFromUCharSource(LChar* destination, const UChar* source, size_t length); +extern "C" JSC::EncodedJSValue WTF__toBase64URLStringValue(const uint8_t* bytes, size_t length, JSC::JSGlobalObject* globalObject);
\ No newline at end of file diff --git a/src/javascript/jsc/webcore/encoding.zig b/src/javascript/jsc/webcore/encoding.zig index 78d3f2877..b4ac59fa8 100644 --- a/src/javascript/jsc/webcore/encoding.zig +++ b/src/javascript/jsc/webcore/encoding.zig @@ -688,11 +688,99 @@ pub const Encoder = struct { return writeU8(input, len, to, to_len, .ascii); } + export fn Bun__encoding__toStringUTF16(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, JSC.Node.Encoding.utf16le); + } + export fn Bun__encoding__toStringUTF8(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .utf8); + } + export fn Bun__encoding__toStringASCII(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .ascii); + } + export fn Bun__encoding__toStringLatin1(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .latin1); + } + + export fn Bun__encoding__toStringHex(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .hex); + } + + export fn Bun__encoding__toStringBase64(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .base64); + } + + export fn Bun__encoding__toStringURLSafeBase64(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .base64url); + } + // pub fn writeUTF16AsUTF8(utf16: [*]const u16, len: usize, to: [*]u8, to_len: usize) callconv(.C) i32 { // return @intCast(i32, strings.copyUTF16IntoUTF8(to[0..to_len], []const u16, utf16[0..len]).written); // } - // pub fn toString(input: [*]const u8, len: usize, zig_str: *ZigString, comptime encoding: JSC.Node.Encoding) callconv(.C) i64 {} + pub fn toString(input_ptr: [*]const u8, len: usize, global: *JSGlobalObject, comptime encoding: JSC.Node.Encoding) JSValue { + if (len == 0) + return ZigString.Empty.toValue(global); + + const input = input_ptr[0..len]; + const allocator = VirtualMachine.vm.allocator; + + switch (comptime encoding) { + .latin1, .ascii => { + var to = allocator.alloc(u8, len) catch return ZigString.init("Out of memory").toErrorInstance(global); + + @memcpy(to.ptr, input_ptr, to.len); + + // Hoping this gets auto vectorized + for (to[0..to.len]) |c, i| { + to[i] = @as(u8, @truncate(u7, c)); + } + + return ZigString.init(to).toExternalValue(global); + }, + .buffer, .utf8 => { + // JSC only supports UTF-16 strings for non-ascii text + const converted = strings.toUTF16Alloc(allocator, input, false) catch return ZigString.init("Out of memory").toErrorInstance(global); + if (converted) |utf16| { + return ZigString.toExternalU16(utf16.ptr, utf16.len, global); + } + + // If we get here, it means we can safely assume the string is 100% ASCII characters + // For this, we rely on the GC to manage the memory to minimize potential for memory leaks + return ZigString.init(input).toValueGC(global); + }, + // potentially convert UTF-16 to UTF-8 + JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { + const converted = strings.toUTF16Alloc(allocator, input, false) catch return ZigString.init("Out of memory").toErrorInstance(global); + if (converted) |utf16| { + return ZigString.toExternalU16(utf16.ptr, utf16.len, global); + } + + var output = allocator.alloc(u8, input.len) catch return ZigString.init("Out of memory").toErrorInstance(global); + JSC.WTF.copyLCharsFromUCharSource(output.ptr, []align(1) const u16, @ptrCast([*]align(1) const u16, input.ptr)[0 .. input.len / 2]); + return ZigString.init(output).toExternalValue(global); + }, + + JSC.Node.Encoding.hex => { + var output = allocator.alloc(u8, input.len * 2) catch return ZigString.init("Out of memory").toErrorInstance(global); + const wrote = strings.encodeBytesToHex(output, input); + std.debug.assert(wrote == output.len); + var val = ZigString.init(output); + val.mark(); + return val.toExternalValue(global); + }, + + JSC.Node.Encoding.base64url => { + return JSC.WTF.toBase64URLStringValue(input, global); + }, + + JSC.Node.Encoding.base64 => { + const to_len = bun.base64.encodeLen(input); + var to = allocator.alloc(u8, to_len) catch return ZigString.init("Out of memory").toErrorInstance(global); + const wrote = bun.base64.encode(to, input); + return ZigString.init(to[0..wrote]).toExternalValue(global); + }, + } + } pub fn writeU8(input: [*]const u8, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 { if (len == 0 or to_len == 0) @@ -706,14 +794,26 @@ pub const Encoder = struct { // if (comptime encoding.isBinaryToText()) {} switch (comptime encoding) { - JSC.Node.Encoding.ascii => { - const written = @truncate(u32, @minimum(len, to_len)); + JSC.Node.Encoding.buffer => { + const written = @minimum(len, to_len); @memcpy(to, input, written); - return @intCast(i32, written); + + return @intCast(i64, written); + }, + .latin1, .ascii => { + const written = @minimum(len, to_len); + @memcpy(to, input, written); + + // Hoping this gets auto vectorized + for (to[0..written]) |c, i| { + to[i] = @as(u8, @truncate(u7, c)); + } + + return @intCast(i64, written); }, .utf8 => { // need to encode - return @intCast(i32, strings.copyLatin1IntoUTF8(to[0..to_len], []const u8, input[0..len]).written); + return @intCast(i64, strings.copyLatin1IntoUTF8(to[0..to_len], []const u8, input[0..len]).written); }, // encode latin1 into UTF16 JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { @@ -762,7 +862,7 @@ pub const Encoder = struct { return @intCast(i64, bun.base64.decode(to[0..outlen], slice).written); }, - else => return 0, + // else => return 0, } } @@ -780,7 +880,7 @@ pub const Encoder = struct { return @intCast(i32, strings.copyUTF16IntoUTF8(to[0..to_len], []const u16, input[0..len]).written); }, // string is already encoded, just need to copy the data - JSC.Node.Encoding.ucs2, JSC.Node.Encoding.ascii, JSC.Node.Encoding.utf16le => { + JSC.Node.Encoding.ascii, JSC.Node.Encoding.ucs2, JSC.Node.Encoding.buffer, JSC.Node.Encoding.utf16le => { strings.copyU16IntoU8(to[0..to_len], []const u16, input[0..len]); return @intCast(i64, @minimum(len, to_len)); @@ -868,6 +968,14 @@ pub const Encoder = struct { _ = Bun__encoding__writeUTF16AsUTF8; _ = Bun__encoding__writeLatin1AsASCII; _ = Bun__encoding__writeUTF16AsASCII; + + _ = Bun__encoding__toStringUTF16; + _ = Bun__encoding__toStringUTF8; + _ = Bun__encoding__toStringASCII; + _ = Bun__encoding__toStringLatin1; + _ = Bun__encoding__toStringHex; + _ = Bun__encoding__toStringBase64; + _ = Bun__encoding__toStringURLSafeBase64; } } }; diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 7a48dd1fb..6114c7e06 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -4,6 +4,7 @@ const Environment = @import("./env.zig"); const string = @import("string_types.zig").string; const stringZ = @import("string_types.zig").stringZ; const CodePoint = @import("string_types.zig").CodePoint; +const bun = @import("global.zig"); const assert = std.debug.assert; pub inline fn containsChar(self: string, char: u8) bool { return indexOfChar(self, char) != null; @@ -809,9 +810,9 @@ pub inline fn copyU16IntoU8(output_: []u8, comptime InputType: type, input_: Inp const strings = @This(); -/// If there are non-ascii characters in the string, this encodes UTF-8 into a new UTF-16 string. +/// Convert a UTF-8 string to a UTF-16 string IF there are any non-ascii characters /// If there are no non-ascii characters, this returns null -/// This is intended to be used for strings that go to +/// This is intended to be used for strings that go to JavaScript pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fail_if_invalid: bool) !?[]u16 { if (strings.firstNonASCII(bytes)) |i| { const ascii = bytes[0..i]; @@ -1715,6 +1716,22 @@ pub fn decodeHexToBytes(destination: []u8, comptime Char: type, source: []const return destination.len - remain.len; } +pub fn encodeBytesToHex(destination: []u8, source: []const u8) usize { + std.debug.assert(destination.len > 0); + std.debug.assert(source.len > 0); + const to_write = if (destination.len < source.len * 2) + destination.len - destination.len % 2 + else + source.len * 2; + + const to_read = to_write / 2; + + const formatter = std.fmt.fmtSliceHexLower(source[0..to_read]); + const written = std.fmt.bufPrint(destination, "{}", .{formatter}) catch unreachable; + + return written.len; +} + test "decodeHexToBytes" { var buffer = std.mem.zeroes([1024]u8); for (buffer) |_, i| { @@ -1730,6 +1747,19 @@ test "decodeHexToBytes" { try std.testing.expectEqualSlices(u8, &buffer, ours_buf[0..ours]); } +// test "formatBytesToHex" { +// var buffer = std.mem.zeroes([1024]u8); +// for (buffer) |_, i| { +// buffer[i] = @truncate(u8, i % 256); +// } +// var written: [2048]u8 = undefined; +// var hex = std.fmt.bufPrint(&written, "{}", .{std.fmt.fmtSliceHexLower(&buffer)}) catch unreachable; +// var ours_buf: [4096]u8 = undefined; +// // var ours = formatBytesToHex(&ours_buf, &buffer); +// // try std.testing.expectEqualSlices(u8, match, ours_buf[0..ours]); +// try std.testing.expectEqualSlices(u8, &buffer, ours_buf[0..ours]); +// } + pub fn trimLeadingChar(slice: []const u8, char: u8) []const u8 { if (indexOfNotChar(slice, char)) |i| { return slice[i..]; |