Fix several bugs (#2418)

* utf16 codepoint with replacement character * Fix test failure with `TextEncoder("ascii')` * Add missing type * Fix Response.prototype.bodyUsed and Request.prototype.bodyUsed * Fix bug with scrypt error not clearing * Update server.zig * oopsie
author: Dylan Conway <35280289+dylan-conway@users.noreply.github.com> 2023-03-18 00:55:05 -0700
committer: GitHub <noreply@github.com> 2023-03-18 00:55:05 -0700
commit: a9c41c67e639714fbb4d7764e18be37615280c08 (patch)
tree: 200e293da7e447f349c9dc6c2cfee593d21a3eff
parent: 1d4cc63154366dbdbdb87c8da43753cdac13d995 (diff)
download: bun-a9c41c67e639714fbb4d7764e18be37615280c08.tar.gz
bun-a9c41c67e639714fbb4d7764e18be37615280c08.tar.zst
bun-a9c41c67e639714fbb4d7764e18be37615280c08.zip
7 files changed, 93 insertions, 16 deletions
diff --git a/packages/bun-types/globals.d.ts b/packages/bun-types/globals.d.ts
index 6bb02e0c2..82f8cfb8b 100644
--- a/packages/bun-types/globals.d.ts
+++ b/packages/bun-types/globals.d.ts
@@ -1096,6 +1096,13 @@ declare class Request implements BlobInterface {
    * @returns Promise<FormData> - The body of the request as a {@link FormData}.
    */
   formData(): Promise<FormData>;
+
+  /**
+   * Has the body of the request been read?
+   *
+   * [Request.bodyUsed](https://developer.mozilla.org/en-US/docs/Web/API/Request/bodyUsed)
+   */
+  readonly bodyUsed: boolean;
 }
 
 declare interface Crypto {
diff --git a/src/bun.js/api/server.zig b/src/bun.js/api/server.zig
index 113432942..cafd1d358 100644
--- a/src/bun.js/api/server.zig
+++ b/src/bun.js/api/server.zig
@@ -4620,6 +4620,9 @@ pub fn NewServer(comptime ssl_enabled_: bool, comptime debug_mode_: bool) type {
                     zig_str = ZigString.init(std.fmt.allocPrint(bun.default_allocator, "OpenSSL {s}", .{message}) catch unreachable);
                     var encoded_str = zig_str.withEncoding();
                     encoded_str.mark();
+
+                    // We shouldn't *need* to do this but it's not entirely clear.
+                    BoringSSL.ERR_clear_error();
                 }
             }
 
diff --git a/src/bun.js/webcore.zig b/src/bun.js/webcore.zig
index 70c6ded5c..4bb0bd2a5 100644
--- a/src/bun.js/webcore.zig
+++ b/src/bun.js/webcore.zig
@@ -583,6 +583,7 @@ pub const Crypto = struct {
             else => @compileError("Error type not added!"),
         };
         globalThis.throwValue(err);
+        BoringSSL.ERR_clear_error();
         return .zero;
     }
 
diff --git a/src/bun.js/webcore/body.zig b/src/bun.js/webcore/body.zig
index 1a0831333..2eed85808 100644
--- a/src/bun.js/webcore/body.zig
+++ b/src/bun.js/webcore/body.zig
@@ -781,7 +781,10 @@ pub const Body = struct {
                 else => .{ .Blob = Blob.initEmpty(undefined) },
             };
 
-            this.* = .{ .Used = {} };
+            this.* = if (this.* == .Null)
+                .{ .Null = {} }
+            else
+                .{ .Used = {} };
             return any_blob;
         }
 
diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig
index d58513c1e..184f1c0e1 100644
--- a/src/bun.js/webcore/encoding.zig
+++ b/src/bun.js/webcore/encoding.zig
@@ -221,10 +221,12 @@ pub const TextEncoder = struct {
     ) u64 {
         var output = buf_ptr[0..buf_len];
         const input = input_ptr[0..input_len];
-        const result: strings.EncodeIntoResult = strings.copyUTF16IntoUTF8(output, []const u16, input, true);
-        if (result.read == 0 or result.written == 0) {
+        var result: strings.EncodeIntoResult = strings.copyUTF16IntoUTF8(output, []const u16, input, false);
+        if (output.len >= 3 and (result.read == 0 or result.written == 0)) {
             const replacement_char = [_]u8{ 239, 191, 189 };
             @memcpy(buf_ptr, &replacement_char, replacement_char.len);
+            result.read = 1;
+            result.written = 3;
         }
         const sized: [2]u32 = .{ result.read, result.written };
         return @bitCast(u64, sized);
@@ -602,7 +604,22 @@ pub const TextDecoder = struct {
     fn decodeSlice(this: *TextDecoder, globalThis: *JSC.JSGlobalObject, buffer_slice: []const u8) JSValue {
         switch (this.encoding) {
             EncodingLabel.latin1 => {
-                return ZigString.init(buffer_slice).toValueGC(globalThis);
+                if (strings.isAllASCII(buffer_slice)) {
+                    return ZigString.init(buffer_slice).toValueGC(globalThis);
+                }
+
+                // It's unintuitive that we encode Latin1 as UTF16 even though the engine natively supports Latin1 strings...
+                // However, this is also what WebKit seems to do.
+                //
+                // It's not clear why we couldn't jusst use Latin1 here, but tests failures proved it necessary.
+                const out_length = strings.elementLengthLatin1IntoUTF16([]const u8, buffer_slice);
+                var bytes = globalThis.allocator().alloc(u16, out_length) catch {
+                    globalThis.throwOutOfMemory();
+                    return .zero;
+                };
+
+                const out = strings.copyLatin1IntoUTF16([]u16, bytes, []const u8, buffer_slice);
+                return ZigString.toExternalU16(bytes.ptr, out.written, globalThis);
             },
             EncodingLabel.@"UTF-8" => {
                 if (this.fatal) {
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 7bdd7cfe5..16caa133f 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1220,6 +1220,40 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa
     return null;
 }
 
+pub fn utf16CodepointWithFFFD(comptime Type: type, input: Type) UTF16Replacement {
+    const c0 = @as(u21, input[0]);
+
+    if (c0 & ~@as(u21, 0x03ff) == 0xd800) {
+        // surrogate pair
+        if (input.len == 1)
+            return .{
+                .len = 1,
+            };
+        //error.DanglingSurrogateHalf;
+        const c1 = @as(u21, input[1]);
+        if (c1 & ~@as(u21, 0x03ff) != 0xdc00)
+            if (input.len == 1) {
+                return .{
+                    .len = 1,
+                };
+            } else {
+                return .{
+                    .fail = true,
+                    .len = 1,
+                    .code_point = unicode_replacement,
+                };
+            };
+        // return error.ExpectedSecondSurrogateHalf;
+
+        return .{ .len = 2, .code_point = 0x10000 + (((c0 & 0x03ff) << 10) | (c1 & 0x03ff)) };
+    } else if (c0 & ~@as(u21, 0x03ff) == 0xdc00) {
+        // return error.UnexpectedSecondSurrogateHalf;
+        return .{ .fail = true, .len = 1, .code_point = unicode_replacement };
+    } else {
+        return .{ .code_point = c0, .len = 1 };
+    }
+}
+
 pub fn utf16Codepoint(comptime Type: type, input: Type) UTF16Replacement {
     const c0 = @as(u21, input[0]);
 
@@ -2576,16 +2610,19 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type,
     var utf16_remaining = utf16;
     var ended_on_non_ascii = false;
 
-    if (comptime Type == []const u16) {
-        if (bun.FeatureFlags.use_simdutf) {
-            log("UTF16 {d} -> UTF8 {d}", .{ utf16.len, out_len });
-
-            if (remaining.len >= out_len) {
-                const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(trimmed, remaining[0..out_len]);
-                return EncodeIntoResult{
-                    .read = @truncate(u32, trimmed.len),
-                    .written = @truncate(u32, result.count),
-                };
+    brk: {
+        if (comptime Type == []const u16) {
+            if (bun.FeatureFlags.use_simdutf) {
+                log("UTF16 {d} -> UTF8 {d}", .{ utf16.len, out_len });
+                if (remaining.len >= out_len) {
+                    const result = bun.simdutf.convert.utf16.to.utf8.with_errors.le(trimmed, remaining);
+                    if (result.status == .surrogate) break :brk;
+
+                    return EncodeIntoResult{
+                        .read = @truncate(u32, trimmed.len),
+                        .written = @truncate(u32, result.count),
+                    };
+                }
             }
         }
     }
@@ -2599,7 +2636,7 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type,
         if (@min(utf16_remaining.len, remaining.len) == 0)
             break;
 
-        const replacement = utf16Codepoint(Type, utf16_remaining);
+        const replacement = utf16CodepointWithFFFD(Type, utf16_remaining);
 
         const width: usize = replacement.utf8Width();
         if (width > remaining.len) {
@@ -2642,7 +2679,7 @@ pub fn copyUTF16IntoUTF8WithBuffer(buf: []u8, comptime Type: type, utf16: Type,
                         3 => {
                             remaining[0] = @truncate(u8, 0xF0 | (replacement.code_point >> 18));
                             remaining[1] = @truncate(u8, 0x80 | (replacement.code_point >> 12) & 0x3F);
-                            remaining[3] = @truncate(u8, 0x80 | (replacement.code_point >> 0) & 0x3F);
+                            remaining[2] = @truncate(u8, 0x80 | (replacement.code_point >> 6) & 0x3F);
                             remaining = remaining[remaining.len..];
                         },
                         else => {},
diff --git a/test/js/web/encoding/text-encoder.test.js b/test/js/web/encoding/text-encoder.test.js
index 3d271026d..1bf2057bc 100644
--- a/test/js/web/encoding/text-encoder.test.js
+++ b/test/js/web/encoding/text-encoder.test.js
@@ -13,6 +13,15 @@ const getByteLength = str => {
   return s;
 };
 
+it("not enough space for replacement character", () => {
+  const encoder = new TextEncoder();
+  const bytes = new Uint8Array(2);
+  const result = encoder.encodeInto("\udc00", bytes);
+  expect(result.read).toBe(0);
+  expect(result.written).toBe(0);
+  expect(Array.from(bytes)).toEqual([0x00, 0x00]);
+});
+
 describe("TextEncoder", () => {
   it("should encode latin1 text with non-ascii latin1 characters", () => {
     var text = "H©ell©o Wor©ld!";
author	Dylan Conway <35280289+dylan-conway@users.noreply.github.com>	2023-03-18 00:55:05 -0700
committer	GitHub <noreply@github.com>	2023-03-18 00:55:05 -0700
commit	a9c41c67e639714fbb4d7764e18be37615280c08 (patch)
tree	200e293da7e447f349c9dc6c2cfee593d21a3eff
parent	1d4cc63154366dbdbdb87c8da43753cdac13d995 (diff)
download	bun-a9c41c67e639714fbb4d7764e18be37615280c08.tar.gz bun-a9c41c67e639714fbb4d7764e18be37615280c08.tar.zst bun-a9c41c67e639714fbb4d7764e18be37615280c08.zip