diff options
author | 2022-06-22 23:21:48 -0700 | |
---|---|---|
committer | 2022-06-22 23:21:48 -0700 | |
commit | 729d445b6885f69dd2c6355f38707bd42851c791 (patch) | |
tree | f87a7c408929ea3f57bbb7ace380cf869da83c0e /src/bun.js/webcore/encoding.zig | |
parent | 25f820c6bf1d8ec6d444ef579cc036b8c0607b75 (diff) | |
download | bun-jarred/rename.tar.gz bun-jarred/rename.tar.zst bun-jarred/rename.zip |
change the directory structurejarred/rename
Diffstat (limited to 'src/bun.js/webcore/encoding.zig')
-rw-r--r-- | src/bun.js/webcore/encoding.zig | 1247 |
1 files changed, 1247 insertions, 0 deletions
diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig new file mode 100644 index 000000000..cfef1c0d7 --- /dev/null +++ b/src/bun.js/webcore/encoding.zig @@ -0,0 +1,1247 @@ +const std = @import("std"); +const Api = @import("../../api/schema.zig").Api; +const RequestContext = @import("../../http.zig").RequestContext; +const MimeType = @import("../../http.zig").MimeType; +const ZigURL = @import("../../url.zig").URL; +const HTTPClient = @import("http"); +const NetworkThread = HTTPClient.NetworkThread; + +const JSC = @import("../../jsc.zig"); +const js = JSC.C; + +const Method = @import("../../http/method.zig").Method; + +const ObjectPool = @import("../../pool.zig").ObjectPool; +const bun = @import("../../global.zig"); +const Output = @import("../../global.zig").Output; +const MutableString = @import("../../global.zig").MutableString; +const strings = @import("../../global.zig").strings; +const string = @import("../../global.zig").string; +const default_allocator = @import("../../global.zig").default_allocator; +const FeatureFlags = @import("../../global.zig").FeatureFlags; +const ArrayBuffer = @import("../base.zig").ArrayBuffer; +const Properties = @import("../base.zig").Properties; +const NewClass = @import("../base.zig").NewClass; +const d = @import("../base.zig").d; +const castObj = @import("../base.zig").castObj; +const getAllocator = @import("../base.zig").getAllocator; +const JSPrivateDataPtr = @import("../base.zig").JSPrivateDataPtr; +const GetJSPrivateData = @import("../base.zig").GetJSPrivateData; +const Environment = @import("../../env.zig"); +const ZigString = JSC.ZigString; +const JSInternalPromise = JSC.JSInternalPromise; +const JSPromise = JSC.JSPromise; +const JSValue = JSC.JSValue; +const JSError = JSC.JSError; +const JSGlobalObject = JSC.JSGlobalObject; + +const VirtualMachine = @import("../javascript.zig").VirtualMachine; +const Task = @import("../javascript.zig").Task; + +const picohttp = @import("picohttp"); + +pub const TextEncoder = struct { + filler: u32 = 0, + + const utf8_string: string = "utf-8"; + + pub export fn TextEncoder__encode( + globalThis: *JSGlobalObject, + zig_str: *const ZigString, + ) JSValue { + var ctx = globalThis.ref(); + if (zig_str.is16Bit()) { + var bytes = strings.toUTF8AllocWithType( + default_allocator, + @TypeOf(zig_str.utf16Slice()), + zig_str.utf16Slice(), + ) catch { + return JSC.toInvalidArguments("Out of memory", .{}, ctx); + }; + return ArrayBuffer.fromBytes(bytes, .Uint8Array).toJS(ctx, null); + } else { + // latin1 always has the same length as utf-8 + // so we can use the Gigacage to allocate the buffer + var array = JSC.JSValue.createUninitializedUint8Array(ctx.ptr(), zig_str.len); + var buffer = array.asArrayBuffer(ctx.ptr()) orelse + return JSC.toInvalidArguments("Out of memory", .{}, ctx); + const result = strings.copyLatin1IntoUTF8(buffer.slice(), []const u8, zig_str.slice()); + std.debug.assert(result.written == zig_str.len); + return array; + } + + unreachable; + } + + // This is a fast path for copying a Rope string into a Uint8Array. + // This keeps us from an extra string temporary allocation + const RopeStringEncoder = struct { + globalThis: *JSGlobalObject, + allocator: std.mem.Allocator, + buffer_value: JSC.JSValue, + slice: []u8, + tail: usize = 0, + any_utf16: bool = false, + + pub fn append8(it: *JSC.JSString.Iterator, ptr: [*]const u8, len: u32) callconv(.C) void { + var this = bun.cast(*RopeStringEncoder, it.data.?); + // we use memcpy here instead of encoding + // SIMD only has an impact for long strings + // so in a case like this, the fastest path is to memcpy + // and then later, we can use the SIMD version + @memcpy(this.slice.ptr + this.tail, ptr, len); + this.tail += len; + } + pub fn append16(it: *JSC.JSString.Iterator, _: [*]const u16, _: u32) callconv(.C) void { + var this = bun.cast(*RopeStringEncoder, it.data.?); + this.any_utf16 = true; + it.stop = 1; + return; + } + pub fn write8(it: *JSC.JSString.Iterator, ptr: [*]const u8, len: u32, offset: u32) callconv(.C) void { + var this = bun.cast(*RopeStringEncoder, it.data.?); + // we use memcpy here instead of encoding + // SIMD only has an impact for long strings + // so in a case like this, the fastest path is to memcpy + // and then later, we can use the SIMD version + @memcpy(this.slice.ptr + offset, ptr, len); + } + pub fn write16(it: *JSC.JSString.Iterator, _: [*]const u16, _: u32, _: u32) callconv(.C) void { + var this = bun.cast(*RopeStringEncoder, it.data.?); + this.any_utf16 = true; + it.stop = 1; + return; + } + + pub fn iter(this: *RopeStringEncoder) JSC.JSString.Iterator { + return .{ + .data = this, + .stop = 0, + .append8 = append8, + .append16 = append16, + .write8 = write8, + .write16 = write16, + }; + } + }; + + // This fast path is only suitable for Latin-1 strings. + // It's not suitable for UTF-16 strings, because getting the byteLength is unpredictable + pub export fn TextEncoder__encodeRopeString( + globalThis: *JSGlobalObject, + rope_str: *JSC.JSString, + ) JSValue { + var ctx = globalThis.ref(); + if (comptime Environment.allow_assert) std.debug.assert(rope_str.is8Bit()); + var array = JSC.JSValue.createUninitializedUint8Array(ctx.ptr(), rope_str.length()); + var encoder = RopeStringEncoder{ + .globalThis = globalThis, + .allocator = bun.default_allocator, + .buffer_value = array, + .slice = (array.asArrayBuffer(globalThis) orelse return JSC.JSValue.jsUndefined()).slice(), + }; + var iter = encoder.iter(); + rope_str.iterator(globalThis, &iter); + + if (encoder.any_utf16) { + return JSC.JSValue.jsUndefined(); + } + + if (comptime !bun.FeatureFlags.latin1_is_now_ascii) { + strings.replaceLatin1WithUTF8(encoder.slice); + } + + return array; + } + + const read_key = ZigString.init("read"); + const written_key = ZigString.init("written"); + + pub export fn TextEncoder__encodeInto( + globalThis: *JSC.JSGlobalObject, + input: *const ZigString, + buf_ptr: [*]u8, + buf_len: usize, + ) JSC.JSValue { + var output = buf_ptr[0..buf_len]; + var result: strings.EncodeIntoResult = strings.EncodeIntoResult{ .read = 0, .written = 0 }; + if (input.is16Bit()) { + const utf16_slice = input.utf16Slice(); + result = strings.copyUTF16IntoUTF8(output, @TypeOf(utf16_slice), utf16_slice); + } else { + result = strings.copyLatin1IntoUTF8(output, @TypeOf(input.slice()), input.slice()); + } + return JSC.JSValue.createObject2(globalThis, &read_key, &written_key, JSValue.jsNumber(result.read), JSValue.jsNumber(result.written)); + } +}; + +comptime { + if (!JSC.is_bindgen) { + _ = TextEncoder.TextEncoder__encode; + _ = TextEncoder.TextEncoder__encodeInto; + _ = TextEncoder.TextEncoder__encodeRopeString; + } +} + +/// https://encoding.spec.whatwg.org/encodings.json +pub const EncodingLabel = enum { + @"UTF-8", + @"IBM866", + @"ISO-8859-2", + @"ISO-8859-3", + @"ISO-8859-4", + @"ISO-8859-5", + @"ISO-8859-6", + @"ISO-8859-7", + @"ISO-8859-8", + @"ISO-8859-8-I", + @"ISO-8859-10", + @"ISO-8859-13", + @"ISO-8859-14", + @"ISO-8859-15", + @"ISO-8859-16", + @"KOI8-R", + @"KOI8-U", + @"macintosh", + @"windows-874", + @"windows-1250", + @"windows-1251", + /// Also known as + /// - ASCII + /// - latin1 + @"windows-1252", + @"windows-1253", + @"windows-1254", + @"windows-1255", + @"windows-1256", + @"windows-1257", + @"windows-1258", + @"x-mac-cyrillic", + @"Big5", + @"EUC-JP", + @"ISO-2022-JP", + @"Shift_JIS", + @"EUC-KR", + @"UTF-16BE", + @"UTF-16LE", + @"x-user-defined", + + pub const Map = std.enums.EnumMap(EncodingLabel, string); + pub const label: Map = brk: { + var map = Map.initFull(""); + map.put(EncodingLabel.@"UTF-8", "utf-8"); + map.put(EncodingLabel.@"UTF-16LE", "utf-16le"); + map.put(EncodingLabel.@"windows-1252", "windows-1252"); + break :brk map; + }; + + const utf16_names = [_]string{ + "ucs-2", + "utf-16", + "unicode", + "utf-16le", + "csunicode", + "unicodefeff", + "iso-10646-ucs-2", + }; + + const utf8_names = [_]string{ + "utf8", + "utf-8", + "unicode11utf8", + "unicode20utf8", + "x-unicode20utf8", + "unicode-1-1-utf-8", + }; + + const latin1_names = [_]string{ + "l1", + "ascii", + "cp819", + "cp1252", + "ibm819", + "latin1", + "iso88591", + "us-ascii", + "x-cp1252", + "iso8859-1", + "iso_8859-1", + "iso-8859-1", + "iso-ir-100", + "csisolatin1", + "windows-1252", + "ansi_x3.4-1968", + "iso_8859-1:1987", + }; + + pub const latin1 = EncodingLabel.@"windows-1252"; + + pub fn which(input_: string) ?EncodingLabel { + const input = strings.trim(input_, " \t\r\n"); + const ExactMatcher = strings.ExactSizeMatcher; + const Eight = ExactMatcher(8); + const Sixteen = ExactMatcher(16); + return switch (input.len) { + 1, 0 => null, + 2...8 => switch (Eight.matchLower(input)) { + Eight.case("l1"), + Eight.case("ascii"), + Eight.case("cp819"), + Eight.case("cp1252"), + Eight.case("ibm819"), + Eight.case("latin1"), + Eight.case("iso88591"), + Eight.case("us-ascii"), + Eight.case("x-cp1252"), + => EncodingLabel.latin1, + + Eight.case("ucs-2"), + Eight.case("utf-16"), + Eight.case("unicode"), + Eight.case("utf-16le"), + => EncodingLabel.@"UTF-16LE", + + Eight.case("utf8"), Eight.case("utf-8") => EncodingLabel.@"UTF-8", + else => null, + }, + + 9...16 => switch (Sixteen.matchLower(input)) { + Sixteen.case("iso8859-1"), + Sixteen.case("iso_8859-1"), + Sixteen.case("iso-8859-1"), + Sixteen.case("iso-ir-100"), + Sixteen.case("csisolatin1"), + Sixteen.case("windows-1252"), + Sixteen.case("ansi_x3.4-1968"), + Sixteen.case("iso_8859-1:1987"), + => EncodingLabel.latin1, + + Sixteen.case("unicode11utf8"), + Sixteen.case("unicode20utf8"), + Sixteen.case("x-unicode20utf8"), + => EncodingLabel.@"UTF-8", + + Sixteen.case("csunicode"), + Sixteen.case("unicodefeff"), + Sixteen.case("iso-10646-ucs-2"), + => EncodingLabel.@"UTF-16LE", + + else => null, + }, + else => if (strings.eqlCaseInsensitiveASCII(input, "unicode-1-1-utf-8", true)) + EncodingLabel.@"UTF-8" + else + null, + }; + } +}; + +pub const TextDecoder = struct { + scratch_memory: []u8 = &[_]u8{}, + ignore_bom: bool = false, + fatal: bool = false, + encoding: EncodingLabel = EncodingLabel.utf8, + + pub const Class = NewClass( + TextDecoder, + .{ + .name = "TextDecoder", + }, + .{ + .decode = .{ + .rfn = decode, + }, + }, + .{ + .encoding = .{ + .get = getEncoding, + .readOnly = true, + }, + .ignoreBOM = .{ + .get = getIgnoreBOM, + .set = setIgnoreBOM, + }, + .fatal = .{ + .get = getFatal, + .set = setFatal, + }, + }, + ); + + pub fn getIgnoreBOM( + this: *TextDecoder, + _: js.JSContextRef, + _: js.JSValueRef, + _: js.JSStringRef, + _: js.ExceptionRef, + ) js.JSValueRef { + return JSC.JSValue.jsBoolean(this.ignore_bom).asObjectRef(); + } + pub fn setIgnoreBOM( + this: *TextDecoder, + _: js.JSContextRef, + _: js.JSValueRef, + _: js.JSStringRef, + value: JSC.C.JSValueRef, + _: js.ExceptionRef, + ) bool { + this.ignore_bom = JSValue.fromRef(value).toBoolean(); + return true; + } + pub fn setFatal( + this: *TextDecoder, + _: js.JSContextRef, + _: js.JSValueRef, + _: js.JSStringRef, + value: JSC.C.JSValueRef, + _: js.ExceptionRef, + ) bool { + this.fatal = JSValue.fromRef(value).toBoolean(); + return true; + } + pub fn getFatal( + this: *TextDecoder, + _: js.JSContextRef, + _: js.JSValueRef, + _: js.JSStringRef, + _: js.ExceptionRef, + ) js.JSValueRef { + return JSC.JSValue.jsBoolean(this.fatal).asObjectRef(); + } + + const utf8_string: string = "utf-8"; + pub fn getEncoding( + this: *TextDecoder, + ctx: js.JSContextRef, + _: js.JSValueRef, + _: js.JSStringRef, + _: js.ExceptionRef, + ) js.JSValueRef { + return ZigString.init(EncodingLabel.label.get(this.encoding).?).toValue(ctx.ptr()).asObjectRef(); + } + const Vector16 = std.meta.Vector(16, u16); + const max_16_ascii: Vector16 = @splat(16, @as(u16, 127)); + + fn decodeUTF16WithAlignment( + _: *TextDecoder, + comptime Slice: type, + slice: Slice, + ctx: js.JSContextRef, + ) JSC.C.JSValueRef { + var i: usize = 0; + + while (i < slice.len) { + while (i + strings.ascii_u16_vector_size <= slice.len) { + const vec: strings.AsciiU16Vector = slice[i..][0..strings.ascii_u16_vector_size].*; + if ((@reduce( + .Or, + @bitCast( + strings.AsciiVectorU16U1, + vec > strings.max_u16_ascii, + ) | @bitCast( + strings.AsciiVectorU16U1, + vec < strings.min_u16_ascii, + ), + ) == 0)) { + break; + } + i += strings.ascii_u16_vector_size; + } + while (i < slice.len and slice[i] <= 127) { + i += 1; + } + break; + } + + // is this actually a UTF-16 string that is just ascii? + // we can still allocate as UTF-16 and just copy the bytes + if (i == slice.len) { + if (comptime Slice == []u16) { + return JSC.C.JSValueMakeString(ctx, JSC.C.JSStringCreateWithCharacters(slice.ptr, slice.len)); + } else { + var str = ZigString.init(""); + str.ptr = @ptrCast([*]u8, slice.ptr); + str.len = slice.len; + str.markUTF16(); + return str.toValueGC(ctx.ptr()).asObjectRef(); + } + } + + var buffer = std.ArrayListAlignedUnmanaged(u16, @alignOf(@TypeOf(slice.ptr))){}; + // copy the allocator to reduce the number of threadlocal accesses + const allocator = VirtualMachine.vm.allocator; + buffer.ensureTotalCapacity(allocator, slice.len) catch unreachable; + buffer.items.len = i; + + @memcpy( + std.mem.sliceAsBytes(buffer.items).ptr, + std.mem.sliceAsBytes(slice).ptr, + std.mem.sliceAsBytes(slice[0..i]).len, + ); + + const first_high_surrogate = 0xD800; + const last_high_surrogate = 0xDBFF; + const first_low_surrogate = 0xDC00; + const last_low_surrogate = 0xDFFF; + + var remainder = slice[i..]; + while (remainder.len > 0) { + switch (remainder[0]) { + 0...127 => { + const count: usize = if (strings.firstNonASCII16CheckMin(Slice, remainder, false)) |index| index + 1 else remainder.len; + + buffer.ensureUnusedCapacity(allocator, count) catch unreachable; + + const prev = buffer.items.len; + buffer.items.len += count; + // Since this string is freshly allocated, we know it's not going to overlap + @memcpy( + std.mem.sliceAsBytes(buffer.items[prev..]).ptr, + std.mem.sliceAsBytes(remainder).ptr, + std.mem.sliceAsBytes(remainder[0..count]).len, + ); + remainder = remainder[count..]; + }, + first_high_surrogate...last_high_surrogate => |first| { + if (remainder.len > 1) { + if (remainder[1] >= first_low_surrogate and remainder[1] <= last_low_surrogate) { + buffer.ensureUnusedCapacity(allocator, 2) catch unreachable; + buffer.items.ptr[buffer.items.len] = first; + buffer.items.ptr[buffer.items.len + 1] = remainder[1]; + buffer.items.len += 2; + remainder = remainder[2..]; + continue; + } + } + buffer.ensureUnusedCapacity(allocator, 1) catch unreachable; + buffer.items.ptr[buffer.items.len] = strings.unicode_replacement; + buffer.items.len += 1; + remainder = remainder[1..]; + continue; + }, + + // Is this an unpaired low surrogate or four-digit hex escape? + else => { + buffer.ensureUnusedCapacity(allocator, 1) catch unreachable; + buffer.items.ptr[buffer.items.len] = strings.unicode_replacement; + buffer.items.len += 1; + remainder = remainder[1..]; + }, + } + } + + var full = buffer.toOwnedSlice(allocator); + + var out = ZigString.init(""); + out.ptr = @ptrCast([*]u8, full.ptr); + out.len = full.len; + out.markUTF16(); + return out.toValueGC(ctx.ptr()).asObjectRef(); + } + + pub fn decode( + this: *TextDecoder, + ctx: js.JSContextRef, + _: js.JSObjectRef, + _: js.JSObjectRef, + args: []const js.JSValueRef, + exception: js.ExceptionRef, + ) js.JSValueRef { + const arguments: []const JSC.JSValue = @ptrCast([*]const JSC.JSValue, args.ptr)[0..args.len]; + + if (arguments.len < 1 or arguments[0].isUndefined()) { + return ZigString.Empty.toValue(ctx.ptr()).asObjectRef(); + } + + const array_buffer = arguments[0].asArrayBuffer(ctx.ptr()) orelse { + JSC.throwInvalidArguments("TextDecoder.decode expects an ArrayBuffer or TypedArray", .{}, ctx, exception); + return null; + }; + + if (array_buffer.len == 0) { + return ZigString.Empty.toValue(ctx.ptr()).asObjectRef(); + } + + JSC.C.JSValueProtect(ctx, args[0]); + defer JSC.C.JSValueUnprotect(ctx, args[0]); + + switch (this.encoding) { + EncodingLabel.@"latin1" => { + return ZigString.init(array_buffer.slice()).toValueGC(ctx.ptr()).asObjectRef(); + }, + EncodingLabel.@"UTF-8" => { + const buffer_slice = array_buffer.slice(); + + if (this.fatal) { + if (strings.toUTF16Alloc(default_allocator, buffer_slice, true)) |result_| { + if (result_) |result| { + return ZigString.toExternalU16(result.ptr, result.len, ctx.ptr()).asObjectRef(); + } + } else |err| { + switch (err) { + error.InvalidByteSequence => { + JSC.JSError(default_allocator, "Invalid byte sequence", .{}, ctx, exception); + return null; + }, + error.OutOfMemory => { + JSC.JSError(default_allocator, "Out of memory", .{}, ctx, exception); + return null; + }, + else => { + JSC.JSError(default_allocator, "Unknown error", .{}, ctx, exception); + return null; + }, + } + } + } else { + if (strings.toUTF16Alloc(default_allocator, buffer_slice, false)) |result_| { + if (result_) |result| { + return ZigString.toExternalU16(result.ptr, result.len, ctx.ptr()).asObjectRef(); + } + } else |err| { + switch (err) { + error.OutOfMemory => { + JSC.JSError(default_allocator, "Out of memory", .{}, ctx, exception); + return null; + }, + else => { + JSC.JSError(default_allocator, "Unknown error", .{}, ctx, exception); + return null; + }, + } + } + } + + // Experiment: using mimalloc directly is slightly slower + return ZigString.init(buffer_slice).toValueGC(ctx.ptr()).asObjectRef(); + }, + + EncodingLabel.@"UTF-16LE" => { + if (std.mem.isAligned(@ptrToInt(array_buffer.ptr) + @as(usize, array_buffer.offset), @alignOf([*]u16))) { + return this.decodeUTF16WithAlignment([]u16, array_buffer.asU16(), ctx); + } + + return this.decodeUTF16WithAlignment([]align(1) u16, array_buffer.asU16Unaligned(), ctx); + }, + else => { + JSC.throwInvalidArguments("TextDecoder.decode set to unsupported encoding", .{}, ctx, exception); + return null; + }, + } + } + + pub const Constructor = JSC.NewConstructor(TextDecoder, .{ + .constructor = .{ .rfn = constructor }, + }, .{}); + + pub fn constructor( + ctx: js.JSContextRef, + _: js.JSObjectRef, + args_: []const js.JSValueRef, + exception: js.ExceptionRef, + ) js.JSObjectRef { + var arguments: []const JSC.JSValue = @ptrCast([*]const JSC.JSValue, args_.ptr)[0..args_.len]; + var encoding = EncodingLabel.@"UTF-8"; + if (arguments.len > 0) { + if (!arguments[0].isString()) { + JSC.throwInvalidArguments("TextDecoder(encoding) label is invalid", .{}, ctx, exception); + return null; + } + + var str = arguments[0].toSlice(ctx.ptr(), default_allocator); + defer if (str.allocated) str.deinit(); + encoding = EncodingLabel.which(str.slice()) orelse { + JSC.throwInvalidArguments("Unsupported encoding label \"{s}\"", .{str.slice()}, ctx, exception); + return null; + }; + } + var decoder = getAllocator(ctx).create(TextDecoder) catch unreachable; + decoder.* = TextDecoder{ .encoding = encoding }; + return TextDecoder.Class.make(ctx, decoder); + } +}; + +/// This code is incredibly redundant +/// We have different paths for creaitng a new buffer versus writing into an existing one +/// That's mostly why all the duplication +/// The majority of the business logic here is just shooting it off to the optimized functions +pub const Encoder = struct { + export fn Bun__encoding__writeLatin1AsHex(input: [*]const u8, len: usize, to: [*]u8, to_len: usize) i64 { + return writeU8(input, len, to, to_len, .hex); + } + export fn Bun__encoding__writeLatin1AsASCII(input: [*]const u8, len: usize, to: [*]u8, to_len: usize) i64 { + return writeU8(input, len, to, to_len, .ascii); + } + export fn Bun__encoding__writeLatin1AsURLSafeBase64(input: [*]const u8, len: usize, to: [*]u8, to_len: usize) i64 { + return writeU8(input, len, to, to_len, .base64url); + } + export fn Bun__encoding__writeLatin1AsUTF16(input: [*]const u8, len: usize, to: [*]u8, to_len: usize) i64 { + return writeU8(input, len, to, to_len, .utf16le); + } + export fn Bun__encoding__writeLatin1AsUTF8(input: [*]const u8, len: usize, to: [*]u8, to_len: usize) i64 { + return writeU8(input, len, to, to_len, JSC.Node.Encoding.utf8); + } + export fn Bun__encoding__writeLatin1AsBase64(input: [*]const u8, len: usize, to: [*]u8, to_len: usize) i64 { + return writeU8(input, len, to, to_len, .base64); + } + export fn Bun__encoding__writeUTF16AsBase64(input: [*]const u16, len: usize, to: [*]u8, to_len: usize) i64 { + return writeU16(input, len, to, to_len, .base64); + } + export fn Bun__encoding__writeUTF16AsHex(input: [*]const u16, len: usize, to: [*]u8, to_len: usize) i64 { + return writeU16(input, len, to, to_len, .hex); + } + export fn Bun__encoding__writeUTF16AsURLSafeBase64(input: [*]const u16, len: usize, to: [*]u8, to_len: usize) i64 { + return writeU16(input, len, to, to_len, .base64url); + } + export fn Bun__encoding__writeUTF16AsUTF16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize) i64 { + return writeU16(input, len, to, to_len, JSC.Node.Encoding.utf16le); + } + export fn Bun__encoding__writeUTF16AsUTF8(input: [*]const u16, len: usize, to: [*]u8, to_len: usize) i64 { + return writeU16(input, len, to, to_len, .utf8); + } + export fn Bun__encoding__writeUTF16AsASCII(input: [*]const u8, len: usize, to: [*]u8, to_len: usize) i64 { + return writeU8(input, len, to, to_len, .ascii); + } + + export fn Bun__encoding__byteLengthLatin1AsHex(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .hex); + } + export fn Bun__encoding__byteLengthLatin1AsASCII(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .ascii); + } + export fn Bun__encoding__byteLengthLatin1AsURLSafeBase64(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .base64url); + } + export fn Bun__encoding__byteLengthLatin1AsUTF16(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .utf16le); + } + export fn Bun__encoding__byteLengthLatin1AsUTF8(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .utf8); + } + export fn Bun__encoding__byteLengthLatin1AsBase64(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .base64); + } + export fn Bun__encoding__byteLengthUTF16AsBase64(input: [*]const u16, len: usize) usize { + return byteLengthU16(input, len, .base64); + } + export fn Bun__encoding__byteLengthUTF16AsHex(input: [*]const u16, len: usize) usize { + return byteLengthU16(input, len, .hex); + } + export fn Bun__encoding__byteLengthUTF16AsURLSafeBase64(input: [*]const u16, len: usize) usize { + return byteLengthU16(input, len, .base64url); + } + export fn Bun__encoding__byteLengthUTF16AsUTF16(input: [*]const u16, len: usize) usize { + return byteLengthU16(input, len, .utf16le); + } + export fn Bun__encoding__byteLengthUTF16AsUTF8(input: [*]const u16, len: usize) usize { + return byteLengthU16(input, len, .utf8); + } + export fn Bun__encoding__byteLengthUTF16AsASCII(input: [*]const u8, len: usize) usize { + return byteLengthU8(input, len, .ascii); + } + + export fn Bun__encoding__constructFromLatin1AsHex(globalObject: *JSGlobalObject, input: [*]const u8, len: usize) JSValue { + var slice = constructFromU8(input, len, .hex); + return JSC.JSValue.createBuffer(globalObject, slice, globalObject.bunVM().allocator); + } + export fn Bun__encoding__constructFromLatin1AsASCII(globalObject: *JSGlobalObject, input: [*]const u8, len: usize) JSValue { + var slice = constructFromU8(input, len, .ascii); + return JSC.JSValue.createBuffer(globalObject, slice, globalObject.bunVM().allocator); + } + export fn Bun__encoding__constructFromLatin1AsURLSafeBase64(globalObject: *JSGlobalObject, input: [*]const u8, len: usize) JSValue { + var slice = constructFromU8(input, len, .base64url); + return JSC.JSValue.createBuffer(globalObject, slice, globalObject.bunVM().allocator); + } + export fn Bun__encoding__constructFromLatin1AsUTF16(globalObject: *JSGlobalObject, input: [*]const u8, len: usize) JSValue { + var slice = constructFromU8(input, len, .utf16le); + return JSC.JSValue.createBuffer(globalObject, slice, globalObject.bunVM().allocator); + } + export fn Bun__encoding__constructFromLatin1AsUTF8(globalObject: *JSGlobalObject, input: [*]const u8, len: usize) JSValue { + var slice = constructFromU8(input, len, JSC.Node.Encoding.utf8); + return JSC.JSValue.createBuffer(globalObject, slice, globalObject.bunVM().allocator); + } + export fn Bun__encoding__constructFromLatin1AsBase64(globalObject: *JSGlobalObject, input: [*]const u8, len: usize) JSValue { + var slice = constructFromU8(input, len, .base64); + return JSC.JSValue.createBuffer(globalObject, slice, globalObject.bunVM().allocator); + } + export fn Bun__encoding__constructFromUTF16AsBase64(globalObject: *JSGlobalObject, input: [*]const u16, len: usize) JSValue { + var slice = constructFromU16(input, len, .base64); + return JSC.JSValue.createBuffer(globalObject, slice, globalObject.bunVM().allocator); + } + export fn Bun__encoding__constructFromUTF16AsHex(globalObject: *JSGlobalObject, input: [*]const u16, len: usize) JSValue { + var slice = constructFromU16(input, len, .hex); + return JSC.JSValue.createBuffer(globalObject, slice, globalObject.bunVM().allocator); + } + export fn Bun__encoding__constructFromUTF16AsURLSafeBase64(globalObject: *JSGlobalObject, input: [*]const u16, len: usize) JSValue { + var slice = constructFromU16(input, len, .base64url); + return JSC.JSValue.createBuffer(globalObject, slice, globalObject.bunVM().allocator); + } + export fn Bun__encoding__constructFromUTF16AsUTF16(globalObject: *JSGlobalObject, input: [*]const u16, len: usize) JSValue { + var slice = constructFromU16(input, len, JSC.Node.Encoding.utf16le); + return JSC.JSValue.createBuffer(globalObject, slice, globalObject.bunVM().allocator); + } + export fn Bun__encoding__constructFromUTF16AsUTF8(globalObject: *JSGlobalObject, input: [*]const u16, len: usize) JSValue { + var slice = constructFromU16(input, len, .utf8); + return JSC.JSValue.createBuffer(globalObject, slice, globalObject.bunVM().allocator); + } + export fn Bun__encoding__constructFromUTF16AsASCII(globalObject: *JSGlobalObject, input: [*]const u16, len: usize) JSValue { + var slice = constructFromU16(input, len, .utf8); + return JSC.JSValue.createBuffer(globalObject, slice, globalObject.bunVM().allocator); + } + + export fn Bun__encoding__toStringUTF16(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, JSC.Node.Encoding.utf16le); + } + export fn Bun__encoding__toStringUTF8(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .utf8); + } + export fn Bun__encoding__toStringASCII(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .ascii); + } + export fn Bun__encoding__toStringLatin1(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .latin1); + } + + export fn Bun__encoding__toStringHex(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .hex); + } + + export fn Bun__encoding__toStringBase64(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .base64); + } + + export fn Bun__encoding__toStringURLSafeBase64(input: [*]const u8, len: usize, globalObject: *JSC.JSGlobalObject) JSValue { + return toString(input, len, globalObject, .base64url); + } + + // pub fn writeUTF16AsUTF8(utf16: [*]const u16, len: usize, to: [*]u8, to_len: usize) callconv(.C) i32 { + // return @intCast(i32, strings.copyUTF16IntoUTF8(to[0..to_len], []const u16, utf16[0..len]).written); + // } + + pub fn toString(input_ptr: [*]const u8, len: usize, global: *JSGlobalObject, comptime encoding: JSC.Node.Encoding) JSValue { + if (len == 0) + return ZigString.Empty.toValue(global); + + const input = input_ptr[0..len]; + const allocator = VirtualMachine.vm.allocator; + + switch (comptime encoding) { + .latin1, .ascii => { + var to = allocator.alloc(u8, len) catch return ZigString.init("Out of memory").toErrorInstance(global); + + @memcpy(to.ptr, input_ptr, to.len); + + // Hoping this gets auto vectorized + for (to[0..to.len]) |c, i| { + to[i] = @as(u8, @truncate(u7, c)); + } + + return ZigString.init(to).toExternalValue(global); + }, + .buffer, .utf8 => { + // JSC only supports UTF-16 strings for non-ascii text + const converted = strings.toUTF16Alloc(allocator, input, false) catch return ZigString.init("Out of memory").toErrorInstance(global); + if (converted) |utf16| { + return ZigString.toExternalU16(utf16.ptr, utf16.len, global); + } + + // If we get here, it means we can safely assume the string is 100% ASCII characters + // For this, we rely on the GC to manage the memory to minimize potential for memory leaks + return ZigString.init(input).toValueGC(global); + }, + // potentially convert UTF-16 to UTF-8 + JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { + const converted = strings.toUTF16Alloc(allocator, input, false) catch return ZigString.init("Out of memory").toErrorInstance(global); + if (converted) |utf16| { + return ZigString.toExternalU16(utf16.ptr, utf16.len, global); + } + + var output = allocator.alloc(u8, input.len) catch return ZigString.init("Out of memory").toErrorInstance(global); + JSC.WTF.copyLCharsFromUCharSource(output.ptr, []align(1) const u16, @ptrCast([*]align(1) const u16, input.ptr)[0 .. input.len / 2]); + return ZigString.init(output).toExternalValue(global); + }, + + JSC.Node.Encoding.hex => { + var output = allocator.alloc(u8, input.len * 2) catch return ZigString.init("Out of memory").toErrorInstance(global); + const wrote = strings.encodeBytesToHex(output, input); + std.debug.assert(wrote == output.len); + var val = ZigString.init(output); + val.mark(); + return val.toExternalValue(global); + }, + + JSC.Node.Encoding.base64url => { + return JSC.WTF.toBase64URLStringValue(input, global); + }, + + JSC.Node.Encoding.base64 => { + const to_len = bun.base64.encodeLen(input); + var to = allocator.alloc(u8, to_len) catch return ZigString.init("Out of memory").toErrorInstance(global); + const wrote = bun.base64.encode(to, input); + return ZigString.init(to[0..wrote]).toExternalValue(global); + }, + } + } + + pub fn writeU8(input: [*]const u8, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 { + if (len == 0 or to_len == 0) + return 0; + + // TODO: increase temporary buffer size for larger amounts of data + // defer { + // if (comptime encoding.isBinaryToText()) {} + // } + + // if (comptime encoding.isBinaryToText()) {} + + switch (comptime encoding) { + JSC.Node.Encoding.buffer => { + const written = @minimum(len, to_len); + @memcpy(to, input, written); + + return @intCast(i64, written); + }, + .latin1, .ascii => { + const written = @minimum(len, to_len); + @memcpy(to, input, written); + + // Hoping this gets auto vectorized + for (to[0..written]) |c, i| { + to[i] = @as(u8, @truncate(u7, c)); + } + + return @intCast(i64, written); + }, + .utf8 => { + // need to encode + return @intCast(i64, strings.copyLatin1IntoUTF8(to[0..to_len], []const u8, input[0..len]).written); + }, + // encode latin1 into UTF16 + JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { + if (to_len < 2) + return 0; + + if (std.mem.isAligned(@ptrToInt(to), @alignOf([*]u16))) { + var buf = input[0..len]; + var output = @ptrCast([*]u16, @alignCast(@alignOf(u16), to))[0 .. to_len / 2]; + return strings.copyLatin1IntoUTF16([]u16, output, []const u8, buf).written; + } else { + var buf = input[0..len]; + var output = @ptrCast([*]align(1) u16, to)[0 .. to_len / 2]; + return strings.copyLatin1IntoUTF16([]align(1) u16, output, []const u8, buf).written; + } + }, + + JSC.Node.Encoding.hex => { + return @intCast(i64, strings.decodeHexToBytes(to[0..to_len], u8, input[0..len])); + }, + + JSC.Node.Encoding.base64url => { + var slice = strings.trim(input[0..len], "\r\n\t " ++ [_]u8{std.ascii.control_code.VT}); + if (slice.len == 0) + return 0; + + if (strings.eqlComptime(slice[slice.len - 2 ..][0..2], "==")) { + slice = slice[0 .. slice.len - 2]; + } else if (slice[slice.len - 1] == '=') { + slice = slice[0 .. slice.len - 1]; + } + + const wrote = bun.base64.urlsafe.decode(to[0..to_len], slice) catch |err| brk: { + if (err == error.NoSpaceLeft) { + break :brk to_len; + } + + return -1; + }; + return @intCast(i64, wrote); + }, + + JSC.Node.Encoding.base64 => { + var slice = strings.trim(input[0..len], "\r\n\t " ++ [_]u8{std.ascii.control_code.VT}); + var outlen = bun.base64.decodeLen(slice); + + return @intCast(i64, bun.base64.decode(to[0..outlen], slice).written); + }, + // else => return 0, + } + } + + pub fn byteLengthU8(input: [*]const u8, len: usize, comptime encoding: JSC.Node.Encoding) usize { + if (len == 0) + return 0; + + switch (comptime encoding) { + .utf8 => { + return strings.elementLengthLatin1IntoUTF8([]const u8, input[0..len]); + }, + + .latin1, JSC.Node.Encoding.ascii, JSC.Node.Encoding.buffer => { + return len; + }, + + JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { + return strings.elementLengthUTF8IntoUTF16([]const u8, input[0..len]) * 2; + }, + + JSC.Node.Encoding.hex => { + return len * 2; + }, + + JSC.Node.Encoding.base64, JSC.Node.Encoding.base64url => { + return bun.base64.encodeLen(input[0..len]); + }, + // else => return &[_]u8{}; + } + } + + pub fn writeU16(input: [*]const u16, len: usize, to: [*]u8, to_len: usize, comptime encoding: JSC.Node.Encoding) i64 { + if (len == 0) + return 0; + + switch (comptime encoding) { + .utf8 => { + return @intCast(i32, strings.copyUTF16IntoUTF8(to[0..to_len], []const u16, input[0..len]).written); + }, + // string is already encoded, just need to copy the data + .latin1, JSC.Node.Encoding.ascii, JSC.Node.Encoding.ucs2, JSC.Node.Encoding.buffer, JSC.Node.Encoding.utf16le => { + strings.copyU16IntoU8(to[0..to_len], []const u16, input[0..len]); + + return @intCast(i64, @minimum(len, to_len)); + }, + + JSC.Node.Encoding.hex => { + return @intCast(i64, strings.decodeHexToBytes(to[0..to_len], u16, input[0..len])); + }, + + JSC.Node.Encoding.base64, JSC.Node.Encoding.base64url => { + if (to_len < 2 or len == 0) + return 0; + + // very very slow case! + // shouldn't really happen though + var transcoded = strings.toUTF8Alloc(bun.default_allocator, input[0..len]) catch return 0; + defer bun.default_allocator.free(transcoded); + return writeU8(transcoded.ptr, transcoded.len, to, to_len, encoding); + }, + // else => return &[_]u8{}; + } + } + + /// Node returns imprecise byte length here + /// Should be fast enough for us to return precise length + pub fn byteLengthU16(input: [*]const u16, len: usize, comptime encoding: JSC.Node.Encoding) usize { + if (len == 0) + return 0; + + switch (comptime encoding) { + // these should be the same size + .ascii, .latin1, .utf8 => { + return strings.elementLengthUTF16IntoUTF8([]const u16, input[0..len]); + }, + JSC.Node.Encoding.ucs2, JSC.Node.Encoding.buffer, JSC.Node.Encoding.utf16le => { + return len * 2; + }, + + JSC.Node.Encoding.hex => { + return len; + }, + + JSC.Node.Encoding.base64, JSC.Node.Encoding.base64url => { + return bun.base64.encodeLen(input[0..len]); + }, + // else => return &[_]u8{}; + } + } + + pub fn constructFromU8(input: [*]const u8, len: usize, comptime encoding: JSC.Node.Encoding) []u8 { + if (len == 0) + return &[_]u8{}; + + const allocator = VirtualMachine.vm.allocator; + + switch (comptime encoding) { + JSC.Node.Encoding.buffer => { + var to = allocator.alloc(u8, len) catch return &[_]u8{}; + @memcpy(to.ptr, input, len); + + return to; + }, + .latin1, .ascii => { + var to = allocator.alloc(u8, len) catch return &[_]u8{}; + @memcpy(to.ptr, input, len); + + // Hoping this gets auto vectorized + for (to[0..len]) |c, i| { + to[i] = @as(u8, @truncate(u7, c)); + } + + return to; + }, + .utf8 => { + // need to encode + return strings.allocateLatin1IntoUTF8(allocator, []const u8, input[0..len]) catch return &[_]u8{}; + }, + // encode latin1 into UTF16 + // return as bytes + JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { + var to = allocator.alloc(u16, len) catch return &[_]u8{}; + _ = strings.copyLatin1IntoUTF16([]u16, to, []const u8, input[0..len]); + return std.mem.sliceAsBytes(to[0..len]); + }, + + JSC.Node.Encoding.hex => { + if (len < 2) + return &[_]u8{}; + + var to = allocator.alloc(u8, len / 2) catch return &[_]u8{}; + return to[0..strings.decodeHexToBytes(to, u8, input[0..len])]; + }, + + JSC.Node.Encoding.base64url => { + var slice = strings.trim(input[0..len], "\r\n\t " ++ [_]u8{std.ascii.control_code.VT}); + if (slice.len == 0) + return &[_]u8{}; + + if (strings.eqlComptime(slice[slice.len - 2 ..][0..2], "==")) { + slice = slice[0 .. slice.len - 2]; + } else if (slice[slice.len - 1] == '=') { + slice = slice[0 .. slice.len - 1]; + } + + const to_len = bun.base64.urlsafe.decoder.calcSizeForSlice(slice) catch unreachable; + var to = allocator.alloc(u8, to_len) catch return &[_]u8{}; + + const wrote = bun.base64.urlsafe.decode(to[0..to_len], slice) catch |err| brk: { + if (err == error.NoSpaceLeft) { + break :brk to_len; + } + + return &[_]u8{}; + }; + return to[0..wrote]; + }, + + JSC.Node.Encoding.base64 => { + var slice = strings.trim(input[0..len], "\r\n\t " ++ [_]u8{std.ascii.control_code.VT}); + var outlen = bun.base64.decodeLen(slice); + + var to = allocator.alloc(u8, outlen) catch return &[_]u8{}; + const written = bun.base64.decode(to[0..outlen], slice).written; + return to[0..written]; + }, + // else => return 0, + } + } + + pub fn constructFromU16(input: [*]const u16, len: usize, comptime encoding: JSC.Node.Encoding) []u8 { + if (len == 0) + return &[_]u8{}; + + const allocator = VirtualMachine.vm.allocator; + + switch (comptime encoding) { + .utf8 => { + return strings.toUTF8AllocWithType(allocator, []const u16, input[0..len]) catch return &[_]u8{}; + }, + JSC.Node.Encoding.latin1, JSC.Node.Encoding.buffer, JSC.Node.Encoding.ascii => { + var to = allocator.alloc(u8, len) catch return &[_]u8{}; + @memcpy(to.ptr, input, len); + for (to[0..len]) |c, i| { + to[i] = @as(u8, @truncate(u7, c)); + } + + return to; + }, + // string is already encoded, just need to copy the data + JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => { + var to = std.mem.sliceAsBytes(allocator.alloc(u16, len * 2) catch return &[_]u8{}); + @memcpy(to.ptr, std.mem.sliceAsBytes(input[0..len]).ptr, std.mem.sliceAsBytes(input[0..len]).len); + return to; + }, + + JSC.Node.Encoding.hex => { + var to = allocator.alloc(u8, len * 2) catch return &[_]u8{}; + return to[0..strings.decodeHexToBytes(to, u16, input[0..len])]; + }, + + JSC.Node.Encoding.base64 => { + + // very very slow case! + // shouldn't really happen though + var transcoded = strings.toUTF8Alloc(allocator, input[0..len]) catch return &[_]u8{}; + defer allocator.free(transcoded); + return constructFromU8(transcoded.ptr, transcoded.len, .base64); + }, + + JSC.Node.Encoding.base64url => { + + // very very slow case! + // shouldn't really happen though + var transcoded = strings.toUTF8Alloc(allocator, input[0..len]) catch return &[_]u8{}; + defer allocator.free(transcoded); + return constructFromU8(transcoded.ptr, transcoded.len, .base64url); + }, + // else => return 0, + } + } + + comptime { + if (!JSC.is_bindgen) { + _ = Bun__encoding__writeLatin1AsHex; + _ = Bun__encoding__writeLatin1AsURLSafeBase64; + _ = Bun__encoding__writeLatin1AsUTF16; + _ = Bun__encoding__writeLatin1AsUTF8; + _ = Bun__encoding__writeLatin1AsBase64; + _ = Bun__encoding__writeUTF16AsBase64; + _ = Bun__encoding__writeUTF16AsHex; + _ = Bun__encoding__writeUTF16AsURLSafeBase64; + _ = Bun__encoding__writeUTF16AsUTF16; + _ = Bun__encoding__writeUTF16AsUTF8; + _ = Bun__encoding__writeLatin1AsASCII; + _ = Bun__encoding__writeUTF16AsASCII; + + _ = Bun__encoding__byteLengthLatin1AsHex; + _ = Bun__encoding__byteLengthLatin1AsURLSafeBase64; + _ = Bun__encoding__byteLengthLatin1AsUTF16; + _ = Bun__encoding__byteLengthLatin1AsUTF8; + _ = Bun__encoding__byteLengthLatin1AsBase64; + _ = Bun__encoding__byteLengthUTF16AsBase64; + _ = Bun__encoding__byteLengthUTF16AsHex; + _ = Bun__encoding__byteLengthUTF16AsURLSafeBase64; + _ = Bun__encoding__byteLengthUTF16AsUTF16; + _ = Bun__encoding__byteLengthUTF16AsUTF8; + _ = Bun__encoding__byteLengthLatin1AsASCII; + _ = Bun__encoding__byteLengthUTF16AsASCII; + + _ = Bun__encoding__toStringUTF16; + _ = Bun__encoding__toStringUTF8; + _ = Bun__encoding__toStringASCII; + _ = Bun__encoding__toStringLatin1; + _ = Bun__encoding__toStringHex; + _ = Bun__encoding__toStringBase64; + _ = Bun__encoding__toStringURLSafeBase64; + + _ = Bun__encoding__constructFromLatin1AsHex; + _ = Bun__encoding__constructFromLatin1AsASCII; + _ = Bun__encoding__constructFromLatin1AsURLSafeBase64; + _ = Bun__encoding__constructFromLatin1AsUTF16; + _ = Bun__encoding__constructFromLatin1AsUTF8; + _ = Bun__encoding__constructFromLatin1AsBase64; + _ = Bun__encoding__constructFromUTF16AsBase64; + _ = Bun__encoding__constructFromUTF16AsHex; + _ = Bun__encoding__constructFromUTF16AsURLSafeBase64; + _ = Bun__encoding__constructFromUTF16AsUTF16; + _ = Bun__encoding__constructFromUTF16AsUTF8; + _ = Bun__encoding__constructFromUTF16AsASCII; + } + } +}; + +comptime { + if (!JSC.is_bindgen) { + std.testing.refAllDecls(Encoder); + } +} + +test "Vec" {} |