[simdutf] workaround validation OOB access (#2031)

author: Alex Lam S.L <alexlamsl@gmail.com> 2023-02-10 06:26:23 +0200
committer: GitHub <noreply@github.com> 2023-02-09 20:26:23 -0800
commit: 119120d21ccc3d2424a607f0637b89a5aae871ec (patch)
tree: c797613b4bca7f97c87a6be252657bda07e886d5
parent: 4bedd3833d4af65b0b87e09d0d471f4f0e983cbf (diff)
download: bun-119120d21ccc3d2424a607f0637b89a5aae871ec.tar.gz
bun-119120d21ccc3d2424a607f0637b89a5aae871ec.tar.zst
bun-119120d21ccc3d2424a607f0637b89a5aae871ec.zip
2 files changed, 18 insertions, 15 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 24be341da..6a82e0dfd 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1066,10 +1066,7 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa
     var first_non_ascii: ?u32 = null;
     var output_: ?std.ArrayList(u16) = null;
 
-    if (bun.FeatureFlags.use_simdutf and
-        // workaround https://github.com/simdutf/simdutf/issues/213
-        bytes.len > 4)
-    {
+    if (comptime bun.FeatureFlags.use_simdutf) {
         use_simdutf: {
             if (bun.simdutf.validate.ascii(bytes))
                 return null;
@@ -1087,24 +1084,22 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa
             var out = try allocator.alloc(u16, out_length);
             log("toUTF16 {d} UTF8 -> {d} UTF16", .{ bytes.len, out_length });
 
-            const result = bun.simdutf.convert.utf8.to.utf16.with_errors.le(trimmed, out);
-            switch (result.status) {
-                .success => {
-                    return out;
-                },
-                else => {
-                    if (fail_if_invalid) {
+            // avoid `.with_errors.le()` due to https://github.com/simdutf/simdutf/issues/213
+            switch (bun.simdutf.convert.utf8.to.utf16.le(trimmed, out)) {
+                0 => {
+                    if (comptime fail_if_invalid) {
                         allocator.free(out);
                         return error.InvalidByteSequence;
                     }
 
-                    first_non_ascii = @truncate(u32, result.count);
-                    output_ = std.ArrayList(u16){
-                        .items = out[0..first_non_ascii.?],
+                    first_non_ascii = 0;
+                    output_ = .{
+                        .items = out[0..0],
                         .capacity = out.len,
                         .allocator = allocator,
                     };
                 },
+                else => return out,
             }
         }
     }
diff --git a/test/bun.js/buffer.test.js b/test/bun.js/buffer.test.js
index dba55ffe8..37484328b 100644
--- a/test/bun.js/buffer.test.js
+++ b/test/bun.js/buffer.test.js
@@ -2518,9 +2518,17 @@ test("Buffer.byteLength", () => {
 });
 
 it("should not crash on invalid UTF-8 byte sequence", () => {
-  const buf = Buffer.from([0xc0, 0xfd]).toString();
+  const buf = Buffer.from([0xc0, 0xfd]);
   expect(buf.length).toBe(2);
   const str = buf.toString();
   expect(str.length).toBe(2);
   expect(str).toBe("\uFFFD\uFFFD");
 });
+
+it("should not perform out-of-bound access on invalid UTF-8 byte sequence", () => {
+  const buf = Buffer.from([0x01, 0x9a, 0x84, 0x13, 0x12, 0x11, 0x10, 0x09]).subarray(2);
+  expect(buf.length).toBe(6);
+  const str = buf.toString();
+  expect(str.length).toBe(6);
+  expect(str).toBe("\uFFFD\x13\x12\x11\x10\x09");
+});
author	Alex Lam S.L <alexlamsl@gmail.com>	2023-02-10 06:26:23 +0200
committer	GitHub <noreply@github.com>	2023-02-09 20:26:23 -0800
commit	119120d21ccc3d2424a607f0637b89a5aae871ec (patch)
tree	c797613b4bca7f97c87a6be252657bda07e886d5
parent	4bedd3833d4af65b0b87e09d0d471f4f0e983cbf (diff)
download	bun-119120d21ccc3d2424a607f0637b89a5aae871ec.tar.gz bun-119120d21ccc3d2424a607f0637b89a5aae871ec.tar.zst bun-119120d21ccc3d2424a607f0637b89a5aae871ec.zip