diff options
author | 2023-03-18 00:55:05 -0700 | |
---|---|---|
committer | 2023-03-18 00:55:05 -0700 | |
commit | a9c41c67e639714fbb4d7764e18be37615280c08 (patch) | |
tree | 200e293da7e447f349c9dc6c2cfee593d21a3eff | |
parent | 1d4cc63154366dbdbdb87c8da43753cdac13d995 (diff) | |
download | bun-a9c41c67e639714fbb4d7764e18be37615280c08.tar.gz bun-a9c41c67e639714fbb4d7764e18be37615280c08.tar.zst bun-a9c41c67e639714fbb4d7764e18be37615280c08.zip |
Fix several bugs (#2418)
* utf16 codepoint with replacement character
* Fix test failure with `TextEncoder("ascii')`
* Add missing type
* Fix Response.prototype.bodyUsed and Request.prototype.bodyUsed
* Fix bug with scrypt error not clearing
* Update server.zig
* oopsie
-rw-r--r-- | packages/bun-types/globals.d.ts | 7 | ||||
-rw-r--r-- | src/bun.js/api/server.zig | 3 | ||||
-rw-r--r-- | src/bun.js/webcore.zig | 1 | ||||
-rw-r--r-- | src/bun.js/webcore/body.zig | 5 | ||||
-rw-r--r-- | src/bun.js/webcore/encoding.zig | 23 | ||||
-rw-r--r-- | src/string_immutable.zig | 61 | ||||
-rw-r--r-- | test/js/web/encoding/text-encoder.test.js | 9 |
7 files changed, 93 insertions, 16 deletions
diff --git a/packages/bun-types/globals.d.ts b/packages/bun-types/globals.d.ts index 6bb02e0c2..82f8cfb8b 100644 --- a/packages/bun-types/globals.d.ts +++ b/packages/bun-types/globals.d.ts @@ -1096,6 +1096,13 @@ declare class Request implements BlobInterface { * @returns Promise<FormData> - The body of the request as a {@link FormData}. */ formData(): Promise<FormData>; + + /** + * Has the body of the request been read? + * + * [Request.bodyUsed](https://developer.mozilla.org/en-US/docs/Web/API/Request/bodyUsed) + */ + readonly bodyUsed: boolean; } declare interface Crypto { diff --git a/src/bun.js/api/server.zig b/src/bun.js/api/server.zig index 113432942..cafd1d358 100644 --- a/src/bun.js/api/server.zig +++ b/src/bun.js/api/server.zig @@ -4620,6 +4620,9 @@ pub fn NewServer(comptime ssl_enabled_: bool, comptime debug_mode_: bool) type { zig_str = ZigString.init(std.fmt.allocPrint(bun.default_allocator, "OpenSSL {s}", .{message}) catch unreachable); var encoded_str = zig_str.withEncoding(); encoded_str.mark(); + + // We shouldn't *need* to do this but it's not entirely clear. + BoringSSL.ERR_clear_error(); } } diff --git a/src/bun.js/webcore.zig b/src/bun.js/webcore.zig index 70c6ded5c..4bb0bd2a5 100644 --- a/src/bun.js/webcore.zig +++ b/src/bun.js/webcore.zig @@ -583,6 +583,7 @@ pub const Crypto = struct { else => @compileError("Error type not added!"), }; globalThis.throwValue(err); + BoringSSL.ERR_clear_error(); return .zero; } diff --git a/src/bun.js/webcore/body.zig b/src/bun.js/webcore/body.zig index 1a0831333..2eed85808 100644 --- a/src/bun.js/webcore/body.zig +++ b/src/bun.js/webcore/body.zig @@ -781,7 +781,10 @@ pub const Body = struct { else => .{ .Blob = Blob.initEmpty(undefined) }, }; - this.* = .{ .Used = {} }; + this.* = if (this.* == .Null) + .{ .Null = {} } + else + .{ .Used = {} }; return any_blob; } diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig index d58513c1e..184f1c0e1 100644 --- a/src/bun.js/webcore/encoding.zig +++ b/src/bun.js/webcore/encoding.zig @@ -221,10 +221,12 @@ pub const TextEncoder = struct { ) u64 { var output = buf_ptr[0..buf_len]; const input = input_ptr[0..input_len]; - const result: strings.EncodeIntoResult = strings.copyUTF16IntoUTF8(output, []const u16, input, true); - if (result.read == 0 or result.written == 0) { + var result: strings.EncodeIntoResult = strings.copyUTF16IntoUTF8(output, []const u16, input, false); + if (output.len >= 3 and (result.read == 0 or result.written == 0)) { const replacement_char = [_]u8{ 239, 191, 189 }; @memcpy(buf_ptr, &replacement_char, replacement_char.len); + result.read = 1; + result.written = 3; } const sized: [2]u32 = .{ result.read, result.written }; return @bitCast(u64, sized); @@ -602,7 +604,22 @@ pub const TextDecoder = struct { fn decodeSlice(this: *TextDecoder, globalThis: *JSC.JSGlobalObject, buffer_slice: []const u8) JSValue { switch (this.encoding) { EncodingLabel.latin1 => { - return ZigString.init(buffer_slice).toValueGC(globalThis); + if (strings.isAllASCII(buffer_slice)) { + return ZigString.init(buffer_slice).toValueGC(globalThis); + } + + // It's unintuitive that we encode Latin1 as UTF16 even though the engine natively supports Latin1 strings... + // However, this is also what WebKit seems to do. + // + // It's not clear why we couldn't jusst use Latin1 here, but tests failures proved it necessary. + const out_length = strings.elementLengthLatin1IntoUTF16([]const u8, buffer_slice); + var bytes = globalThis.allocator().alloc(u16, out_length) catch { + globalThis.throwOutOfMemory(); + return .zero; + }; + + const out = strings.copyLatin1IntoUTF16([]u16, bytes, []const u8, buffer_slice); + return ZigString.toExternalU16(bytes.ptr, out.written, globalThis); }, EncodingLabel.@"UTF-8" => { if (this.fatal) { diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 7bdd7cfe5..16caa133f 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -1220,6 +1220,40 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa return null; } +pub fn utf16CodepointWithFFFD(comptime Type: type, input: Type) UTF16Replacement { + const c0 = @as(u21, input[0]); + + if (c0 & ~@as(u21, 0x03ff) == 0xd800) { + // surrogate pair + if (input.len == 1) + return .{ + .len = 1, + }; + //error.DanglingSurrogateHalf; + const c1 = @as(u21, input[1]); + if (c1 & ~@as(u21, 0x03ff) != 0xdc00) + if (input.len == 1) { + return .{ + .len = 1, + }; + } else { + return .{ + .fail = true, + .len = 1, + .code_point = unicode_replacement, + }; + }; + // return error.ExpectedSecondSurrogateHalf; + + return .{ .len = 2, .code_point = 0x10000 + (((c0 & 0x03ff) << 10) | (c1 & 0x03ff)) }; + } else if (c0 & ~@as(u21, 0x03ff) == 0xdc00) { + // return error.UnexpectedSecondSurrogateHalf; + return .{ .fail = true, .len = 1, .code_point = unicode_replacement }; + } else { + return .{ .code_point = c0, .len = 1 }; + } +} + pub fn utf16Codepoint(comptime Type: type, input: Type) UTF16Replacement { const c0 = @as(u21, input[0]); @@ -2576,16 +2610,19 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type, var utf16_remaining = utf16; var ended_on_non_ascii = false; - if (comptime Type == []const u16) { - if (bun.FeatureFlags.use_simdutf) { - log("UTF16 {d} -> UTF8 {d}", .{ utf16.len, out_len }); - - if (remaining.len >= out_len) { - const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(trimmed, remaining[0..out_len]); - return EncodeIntoResult{ - .read = @truncate(u32, trimmed.len), - .written = @truncate(u32, result.count), - }; + brk: { + if (comptime Type == []const u16) { + if (bun.FeatureFlags.use_simdutf) { + log("UTF16 {d} -> UTF8 {d}", .{ utf16.len, out_len }); + if (remaining.len >= out_len) { + const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(trimmed, remaining); + if (result.status == .surrogate) break :brk; + + return EncodeIntoResult{ + .read = @truncate(u32, trimmed.len), + .written = @truncate(u32, result.count), + }; + } } } } @@ -2599,7 +2636,7 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type, if (@min(utf16_remaining.len, remaining.len) == 0) break; - const replacement = utf16Codepoint(Type, utf16_remaining); + const replacement = utf16CodepointWithFFFD(Type, utf16_remaining); const width: usize = replacement.utf8Width(); if (width > remaining.len) { @@ -2642,7 +2679,7 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type, 3 => { remaining[0] = @truncate(u8, 0xF0 | (replacement.code_point >> 18)); remaining[1] = @truncate(u8, 0x80 | (replacement.code_point >> 12) & 0x3F); - remaining[3] = @truncate(u8, 0x80 | (replacement.code_point >> 0) & 0x3F); + remaining[2] = @truncate(u8, 0x80 | (replacement.code_point >> 6) & 0x3F); remaining = remaining[remaining.len..]; }, else => {}, diff --git a/test/js/web/encoding/text-encoder.test.js b/test/js/web/encoding/text-encoder.test.js index 3d271026d..1bf2057bc 100644 --- a/test/js/web/encoding/text-encoder.test.js +++ b/test/js/web/encoding/text-encoder.test.js @@ -13,6 +13,15 @@ const getByteLength = str => { return s; }; +it("not enough space for replacement character", () => { + const encoder = new TextEncoder(); + const bytes = new Uint8Array(2); + const result = encoder.encodeInto("\udc00", bytes); + expect(result.read).toBe(0); + expect(result.written).toBe(0); + expect(Array.from(bytes)).toEqual([0x00, 0x00]); +}); + describe("TextEncoder", () => { it("should encode latin1 text with non-ascii latin1 characters", () => { var text = "H©ell©o Wor©ld!"; |