From 6052a3edbd985d9e504f8c0fcaa4c6a1a2823ec3 Mon Sep 17 00:00:00 2001 From: "Alex Lam S.L" Date: Fri, 10 Feb 2023 07:20:11 +0200 Subject: fix assertion failure (#2033) --- src/string_immutable.zig | 7 ++----- test/bun.js/buffer.test.js | 8 ++++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 6a82e0dfd..b14d9fc5d 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -1063,7 +1063,6 @@ pub fn copyLatin1IntoASCII(dest: []u8, src: []const u8) void { /// If there are no non-ascii characters, this returns null /// This is intended to be used for strings that go to JavaScript pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fail_if_invalid: bool) !?[]u16 { - var first_non_ascii: ?u32 = null; var output_: ?std.ArrayList(u16) = null; if (comptime bun.FeatureFlags.use_simdutf) { @@ -1092,7 +1091,6 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa return error.InvalidByteSequence; } - first_non_ascii = 0; output_ = .{ .items = out[0..0], .capacity = out.len, @@ -1104,14 +1102,13 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa } } - if (first_non_ascii orelse strings.firstNonASCII(bytes)) |i| { + if (strings.firstNonASCII(bytes)) |i| { const ascii = bytes[0..i]; const chunk = bytes[i..]; var output = output_ orelse try std.ArrayList(u16).initCapacity(allocator, ascii.len + 2); errdefer output.deinit(); output.items.len = ascii.len; - if (first_non_ascii == null) - strings.copyU8IntoU16(output.items, ascii); + strings.copyU8IntoU16(output.items, ascii); var remaining = chunk; diff --git a/test/bun.js/buffer.test.js b/test/bun.js/buffer.test.js index 37484328b..6c4e9d721 100644 --- a/test/bun.js/buffer.test.js +++ b/test/bun.js/buffer.test.js @@ -2525,6 +2525,14 @@ it("should not crash on invalid UTF-8 byte sequence", () => { expect(str).toBe("\uFFFD\uFFFD"); }); +it("should not crash on invalid UTF-8 byte sequence with ASCII head", () => { + const buf = Buffer.from([0x42, 0xc0, 0xfd]); + expect(buf.length).toBe(3); + const str = buf.toString(); + expect(str.length).toBe(3); + expect(str).toBe("B\uFFFD\uFFFD"); +}); + it("should not perform out-of-bound access on invalid UTF-8 byte sequence", () => { const buf = Buffer.from([0x01, 0x9a, 0x84, 0x13, 0x12, 0x11, 0x10, 0x09]).subarray(2); expect(buf.length).toBe(6); -- cgit v1.2.3