fix(Buffer) Fix arguments in buffer.write, fix size returned from buffer.write for utf16, fix size calc for base64, fix calc for hex byte size (#1881)

* fix buffer write when more than 2 args are passed * fix encoding parameter * better buffer write tests * fix ucs2/utf8 len return * fix base64 size in Buffer write * fix hex encoding size * actual fix base64 * actual fix base64 * fix segfault * add fill parameter in Buffer.alloc * use createUnitialized when alloc have fill parameter, use RETURN_IF_EXCEPTION when static casting * fix utf16le, ucs2 copying in Buffer write
author: Ciro Spaciari <ciro.spaciari@gmail.com> 2023-01-23 21:48:52 -0300
committer: GitHub <noreply@github.com> 2023-01-23 16:48:52 -0800
commit: b3533fb3a88b33da50e58ad9594a0559af575035 (patch)
tree: 05c1a22ab6b967ce8ced970e4dcd783e056e50cc
parent: 79c0b614ee04d06d9565cd52450b0de1f58fa87e (diff)
download: bun-b3533fb3a88b33da50e58ad9594a0559af575035.tar.gz
bun-b3533fb3a88b33da50e58ad9594a0559af575035.tar.zst
bun-b3533fb3a88b33da50e58ad9594a0559af575035.zip
7 files changed, 210 insertions, 56 deletions
diff --git a/bun.lockb b/bun.lockb
index a4162f668..342d600cb 100755
--- a/bun.lockb
+++ b/bun.lockb
diff --git a/package.json b/package.json
index 3401ee180..5c584c6da 100644
--- a/package.json
+++ b/package.json
@@ -4,6 +4,7 @@
     "eslint-config-prettier": "^8.5.0",
     "express": "^4.18.2",
     "mitata": "^0.1.3",
+    "mongodb": "^4.13.0",
     "peechy": "latest",
     "prettier": "^2.4.1",
     "react": "next",
diff --git a/src/base64/base64.zig b/src/base64/base64.zig
index be0086c46..a5a94200f 100644
--- a/src/base64/base64.zig
+++ b/src/base64/base64.zig
@@ -37,24 +37,28 @@ pub fn decodeURLSafe(destination: []u8, source: []const u8) DecodeResult {
 
 pub fn encode(destination: []u8, source: []const u8) usize {
     return zig_base64.standard.Encoder.encode(destination, source).len;
+    
+}
+
+pub fn decodeLenUpperBound(len: usize) usize {
+    return zig_base64.standard.Decoder.calcSizeUpperBound(len) catch {
+        //fallback
+        return len / 4 * 3;
+    };
 }
 
-/// Given a source string of length len, this returns the amount of
-/// memory the destination string should have.
-///
-/// remember, this is integer math
-/// 3 bytes turn into 4 chars
-/// ceiling[len / 3] * 4
-///
-///
 pub fn decodeLen(source: anytype) usize {
-    return (source.len / 4 * 3 + 2);
+    return zig_base64.standard.Decoder.calcSizeForSlice(source) catch {
+        //fallback
+        return source.len / 4 * 3;
+    };
 }
 
 pub fn encodeLen(source: anytype) usize {
-    return (source.len + 2) / 3 * 4;
+    return zig_base64.standard.Encoder.calcSize(source.len);
 }
 
+
 pub const urlsafe = zig_base64.Base64DecoderWithIgnore.init(
     zig_base64.url_safe_alphabet_chars,
     null,
diff --git a/src/bun.js/bindings/JSBuffer.cpp b/src/bun.js/bindings/JSBuffer.cpp
index 5b038f3ae..0c393ce05 100644
--- a/src/bun.js/bindings/JSBuffer.cpp
+++ b/src/bun.js/bindings/JSBuffer.cpp
@@ -374,18 +374,65 @@ static inline JSC::EncodedJSValue jsBufferConstructorFunction_allocBody(JSC::JSG
         return JSValue::encode(jsUndefined());
     }
 
-    auto arrayBuffer = JSC::ArrayBuffer::tryCreate(length, 1);
-    if (!arrayBuffer) {
-        throwOutOfMemoryError(lexicalGlobalObject, throwScope);
-        return JSValue::encode(jsUndefined());
-    }
-
     auto* globalObject = reinterpret_cast<Zig::GlobalObject*>(lexicalGlobalObject);
     auto* subclassStructure = globalObject->JSBufferSubclassStructure();
 
-    auto uint8Array = JSC::JSUint8Array::create(lexicalGlobalObject, subclassStructure, WTFMove(arrayBuffer), 0, length);
 
-    RELEASE_AND_RETURN(throwScope, JSC::JSValue::encode(uint8Array));
+    // fill argument
+    if(callFrame->argumentCount() > 1){
+        auto uint8Array = JSC::JSUint8Array::createUninitialized(lexicalGlobalObject, subclassStructure, length);
+
+        auto value = callFrame->argument(1);
+
+        if (!value.isString()) {
+            auto value_ = value.toInt32(lexicalGlobalObject) & 0xFF;
+
+            auto value_uint8 = static_cast<uint8_t>(value_);
+            RETURN_IF_EXCEPTION(throwScope, JSC::JSValue::encode(jsUndefined()));
+
+            auto length = uint8Array->byteLength();
+            auto start = 0;
+            auto end = length;
+
+            auto startPtr = uint8Array->typedVector() + start;
+            auto endPtr = uint8Array->typedVector() + end;
+            memset(startPtr, value_uint8, endPtr - startPtr);
+            RELEASE_AND_RETURN(throwScope, JSC::JSValue::encode(uint8Array));
+        }
+
+        {
+            size_t length = uint8Array->byteLength();
+            size_t start = 0;
+            size_t end = length;
+            WebCore::BufferEncodingType encoding = WebCore::BufferEncodingType::utf8;
+            if (callFrame->argumentCount() > 2) {
+                auto encoding_ = callFrame->uncheckedArgument(2).toString(lexicalGlobalObject);
+
+                std::optional<BufferEncodingType> encoded = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, encoding_);
+                if (!encoded) {
+                    throwTypeError(lexicalGlobalObject, throwScope, "Invalid encoding"_s);
+                    return JSC::JSValue::encode(jsUndefined());
+                }
+                encoding = encoded.value();
+                
+            }
+            auto startPtr = uint8Array->typedVector() + start;
+            auto str_ = value.toWTFString(lexicalGlobalObject);
+            ZigString str = Zig::toZigString(str_);
+
+            Bun__Buffer_fill(&str, startPtr, end - start, encoding);
+            RELEASE_AND_RETURN(throwScope, JSC::JSValue::encode(uint8Array));
+        }
+    } else {
+        auto arrayBuffer = JSC::ArrayBuffer::tryCreate(length, 1);
+        if (!arrayBuffer) {
+            throwOutOfMemoryError(lexicalGlobalObject, throwScope);
+            return JSValue::encode(jsUndefined());
+        }
+        auto uint8Array = JSC::JSUint8Array::create(lexicalGlobalObject, subclassStructure, WTFMove(arrayBuffer), 0, length);
+        RELEASE_AND_RETURN(throwScope, JSC::JSValue::encode(uint8Array));
+
+    }
 }
 
 static inline JSC::EncodedJSValue jsBufferConstructorFunction_allocUnsafeSlowBody(JSC::JSGlobalObject* lexicalGlobalObject, JSC::CallFrame* callFrame)
@@ -891,6 +938,8 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_fillBody(JSC::JSGlob
         auto value_ = value.toInt32(lexicalGlobalObject) & 0xFF;
 
         auto value_uint8 = static_cast<uint8_t>(value_);
+        RETURN_IF_EXCEPTION(throwScope, JSC::JSValue::encode(jsUndefined()));
+        
         auto length = castedThis->byteLength();
         auto start = 0;
         auto end = length;
@@ -1363,7 +1412,6 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_writeBody(JSC::JSGlo
                 throwTypeError(lexicalGlobalObject, scope, "Invalid encoding"_s);
                 return JSC::JSValue::encode(jsUndefined());
             }
-
             encoding = encoded.value();
         }
     }
@@ -1371,20 +1419,38 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_writeBody(JSC::JSGlo
     if (UNLIKELY(length < offset)) {
         RELEASE_AND_RETURN(scope, JSC::JSValue::encode(JSC::jsNumber(0)));
     }
-
+    
     if (callFrame->argumentCount() > 2) {
         uint32_t arg_len = 0;
-        arg_len = callFrame->argument(2).toUInt32(lexicalGlobalObject);
-        length = std::min(arg_len, length - offset);
-    }
+        EnsureStillAliveScope arg2 = callFrame->argument(2);
+        if (arg2.value().isAnyInt()) {
+            arg_len = arg2.value().toUInt32(lexicalGlobalObject);
+            length = std::min(arg_len, length);
 
-    if (callFrame->argumentCount() > 2) {
-        std::optional<BufferEncodingType> parsedEncoding = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, callFrame->argument(3));
-        if (parsedEncoding.has_value()) {
-            encoding = parsedEncoding.value();
+            if (callFrame->argumentCount() > 3) {
+                EnsureStillAliveScope arg3 = callFrame->argument(3);
+                if (arg3.value().isString()) {
+                     std::optional<BufferEncodingType> encoded = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, arg3.value());
+                     if (!encoded) {
+                         throwTypeError(lexicalGlobalObject, scope, "Invalid encoding"_s);
+                         return JSC::JSValue::encode(jsUndefined());
+                     }
+                     encoding = encoded.value();
+                }
+            }
+        } else if (arg2.value().isString()) {
+            std::optional<BufferEncodingType> encoded = parseEnumeration<BufferEncodingType>(*lexicalGlobalObject, arg2.value());
+            if (!encoded) {
+                throwTypeError(lexicalGlobalObject, scope, "Invalid encoding"_s);
+                return JSC::JSValue::encode(jsUndefined());
+            }
+
+            encoding = encoded.value();
         }
     }
 
+    length = length - offset;
+
     auto view = str->tryGetValue(lexicalGlobalObject);
     int64_t written = 0;
 
@@ -1397,6 +1463,8 @@ static inline JSC::EncodedJSValue jsBufferPrototypeFunction_writeBody(JSC::JSGlo
     case WebCore::BufferEncodingType::base64:
     case WebCore::BufferEncodingType::base64url:
     case WebCore::BufferEncodingType::hex: {
+
+        
         if (view.is8Bit()) {
             written = Bun__encoding__writeLatin1(view.characters8(), view.length(), castedThis->typedVector() + offset, length, static_cast<uint8_t>(encoding));
         } else {
diff --git a/src/bun.js/webcore/encoding.zig b/src/bun.js/webcore/encoding.zig
index 76541e111..4f30aadae 100644
--- a/src/bun.js/webcore/encoding.zig
+++ b/src/bun.js/webcore/encoding.zig
@@ -860,12 +860,14 @@ pub const Encoder = struct {
 
         switch (comptime encoding) {
             JSC.Node.Encoding.buffer => {
+
                 const written = @min(len, to_len);
                 @memcpy(to, input, written);
 
                 return @intCast(i64, written);
             },
             .latin1, .ascii => {
+
                 const written = @min(len, to_len);
                 @memcpy(to, input, written);
 
@@ -887,12 +889,17 @@ pub const Encoder = struct {
 
                 if (std.mem.isAligned(@ptrToInt(to), @alignOf([*]u16))) {
                     var buf = input[0..len];
+
                     var output = @ptrCast([*]u16, @alignCast(@alignOf(u16), to))[0 .. to_len / 2];
-                    return strings.copyLatin1IntoUTF16([]u16, output, []const u8, buf).written;
+                    var written = strings.copyLatin1IntoUTF16([]u16, output, []const u8, buf).written;
+                    return written * 2;
                 } else {
                     var buf = input[0..len];
                     var output = @ptrCast([*]align(1) u16, to)[0 .. to_len / 2];
-                    return strings.copyLatin1IntoUTF16([]align(1) u16, output, []const u8, buf).written;
+
+                    var written = strings.copyLatin1IntoUTF16([]align(1) u16, output, []const u8, buf).written;
+                    return written * 2;
+                    
                 }
             },
 
@@ -901,6 +908,7 @@ pub const Encoder = struct {
             },
 
             JSC.Node.Encoding.base64url => {
+
                 var slice = strings.trim(input[0..len], "\r\n\t " ++ [_]u8{std.ascii.control_code.vt});
                 if (slice.len == 0)
                     return 0;
@@ -940,11 +948,11 @@ pub const Encoder = struct {
             },
 
             JSC.Node.Encoding.hex => {
-                return len * 2;
+                return len / 2; 
             },
 
             JSC.Node.Encoding.base64, JSC.Node.Encoding.base64url => {
-                return bun.base64.encodeLen(input[0..len]);
+                return bun.base64.decodeLen(input[0..len]);
             },
             // else => return &[_]u8{};
         }
@@ -958,12 +966,21 @@ pub const Encoder = struct {
             .utf8 => {
                 return @intCast(i32, strings.copyUTF16IntoUTF8(to[0..to_len], []const u16, input[0..len]).written);
             },
-            // string is already encoded, just need to copy the data
-            .latin1, JSC.Node.Encoding.ascii, JSC.Node.Encoding.ucs2, JSC.Node.Encoding.buffer, JSC.Node.Encoding.utf16le => {
+            .latin1, JSC.Node.Encoding.ascii, JSC.Node.Encoding.buffer => {
                 strings.copyU16IntoU8(to[0..to_len], []const u16, input[0..len]);
-
                 return @intCast(i64, @min(len, to_len));
             },
+            // string is already encoded, just need to copy the data
+            JSC.Node.Encoding.ucs2, JSC.Node.Encoding.utf16le => {
+                var bytes_input_len = len * 2;
+                var written = @min(bytes_input_len, to_len);
+                if (written < 2) return 0;
+
+                var fixed_len = (written/2) * 2;
+                var input_u8 = @ptrCast([*] const u8,  input);
+                strings.copyU16IntoU8(to[0..written], []const u8, input_u8[0..fixed_len]);
+                return @intCast(i64, written);
+            },
 
             JSC.Node.Encoding.hex => {
                 return @intCast(i64, strings.decodeHexToBytes(to[0..to_len], u16, input[0..len]));
@@ -999,11 +1016,11 @@ pub const Encoder = struct {
             },
 
             JSC.Node.Encoding.hex => {
-                return len;
+                return len / 2;
             },
 
             JSC.Node.Encoding.base64, JSC.Node.Encoding.base64url => {
-                return bun.base64.encodeLen(input[0..len]);
+                return bun.base64.decodeLenUpperBound(len);
             },
             // else => return &[_]u8{};
         }
diff --git a/src/deps/uws b/src/deps/uws
-Subproject a076c28a37ae2ffbcb4e2cec023056b13ba0518
+Subproject 665680ca11e09649b96e665a5eb97edc65cbb65
diff --git a/test/bun.js/buffer.test.js b/test/bun.js/buffer.test.js
index 193124b00..72cd8e10d 100644
--- a/test/bun.js/buffer.test.js
+++ b/test/bun.js/buffer.test.js
@@ -429,34 +429,98 @@ it("read", () => {
 });
 
 it("write", () => {
-  let buf = Buffer.alloc(16);
+
+  const resultMap = new Map([
+    ['utf8', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])],
+    ['ucs2', Buffer.from([102, 0, 111, 0, 111, 0, 0, 0, 0])],
+    ['ascii', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])],
+    ['latin1', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])],
+    ['binary', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])],
+    ['utf16le', Buffer.from([102, 0, 111, 0, 111, 0, 0, 0, 0])],
+    ['base64', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])],
+    ['base64url', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])],
+    ['hex', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])],
+  ]);
+
+  let buf = Buffer.alloc(9);
   function reset() {
     new Uint8Array(buf.buffer).fill(0);
   }
 
-  expect(buf.write("hello", 8, 8)).toBe(5);
-  reset();
 
-  expect(buf.write("hello!", 3, 8)).toBe(6);
-  reset();
+  // utf8, ucs2, ascii, latin1, utf16le
+  const encodings = ['utf8', 'utf-8', 'ucs2', 'ucs-2', 'ascii', 'latin1',
+    'binary', 'utf16le', 'utf-16le'];
 
-  expect(buf.write("Foo Bar!", 4, 4)).toBe(4);
-  reset();
+  encodings
+    .reduce((es, e) => es.concat(e, e.toUpperCase()), [])
+    .forEach((encoding) => {
+      reset();
 
-  expect(buf.write("foo", 0, 1)).toBe(1);
-  reset();
+      const len = Buffer.byteLength('foo', encoding);
+      expect(buf.write('foo', 0, len, encoding)).toBe(len);
 
-  expect(buf.write("foo", 0, 2)).toBe(2);
-  reset();
+      if (encoding.includes('-'))
+        encoding = encoding.replace('-', '');
 
-  expect(buf.write("foo", 0)).toBe(3);
-  reset();
+      expect(buf).toStrictEqual(resultMap.get(encoding.toLowerCase()));
+    });
 
-  expect(buf.write("Foo Bar!", 4, 6)).toBe(6);
-  reset();
+  // base64
+  ['base64', 'BASE64', 'base64url', 'BASE64URL'].forEach((encoding) => {
+    reset()
 
-  expect(buf.write("Foo Bar!", 4, 7)).toBe(7);
-  reset();
+    const len = Buffer.byteLength('Zm9v', encoding);
+
+    expect(buf.write('Zm9v', 0, len, encoding)).toBe(len);
+    expect(buf).toStrictEqual(resultMap.get(encoding.toLowerCase()));
+  });
+
+
+  // hex
+  ['hex', 'HEX'].forEach((encoding) => {
+    reset();
+    const len = Buffer.byteLength('666f6f', encoding);
+
+    expect(buf.write('666f6f', 0, len, encoding)).toBe(len);
+    expect(buf).toStrictEqual(resultMap.get(encoding.toLowerCase()));
+  });
+
+
+  // UCS-2 overflow CVE-2018-12115
+  for (let i = 1; i < 4; i++) {
+    // Allocate two Buffers sequentially off the pool. Run more than once in case
+    // we hit the end of the pool and don't get sequential allocations
+    const x = Buffer.allocUnsafe(4).fill(0);
+    const y = Buffer.allocUnsafe(4).fill(1);
+    // Should not write anything, pos 3 doesn't have enough room for a 16-bit char
+    expect(x.write('ыыыыыы', 3, 'ucs2')).toBe(0);
+    // CVE-2018-12115 experienced via buffer overrun to next block in the pool
+    expect(Buffer.compare(y, Buffer.alloc(4, 1))).toBe(0);
+  }
+
+  // // Should not write any data when there is no space for 16-bit chars
+  const z = Buffer.alloc(4, 0);
+  expect(z.write('\u0001', 3, 'ucs2')).toBe(0);
+  expect(Buffer.compare(z, Buffer.alloc(4, 0))).toBe(0);
+  // Make sure longer strings are written up to the buffer end.
+  expect(z.write('abcd', 2)).toBe(2);
+  expect([...z]).toStrictEqual([0, 0, 0x61, 0x62]);
+
+  //Large overrun could corrupt the process with utf8
+  expect(Buffer.alloc(4).write('a'.repeat(100), 3, 'utf8')).toBe(1);
+
+  // Large overrun could corrupt the process
+  expect(Buffer.alloc(4).write('ыыыыыы'.repeat(100), 3, 'utf16le')).toBe(0);
+
+  {
+    // .write() does not affect the byte after the written-to slice of the Buffer.
+    // Refs: https://github.com/nodejs/node/issues/26422
+    const buf = Buffer.alloc(8);
+    expect(buf.write('ыы', 1, 'utf16le')).toBe(4);
+    expect([...buf]).toStrictEqual([0, 0x4b, 0x04, 0x4b, 0x04, 0, 0, 0]);
+
+  }
 });
 
 it("includes", () => {
@@ -679,7 +743,7 @@ it("Buffer.toString(base64)", () => {
 
 it("Buffer can be mocked", () => {
   function MockBuffer() {
-    const noop = function () {};
+    const noop = function () { };
     const res = Buffer.alloc(0);
     for (const op in Buffer.prototype) {
       if (typeof res[op] === "function") {
@@ -766,7 +830,7 @@ it("Buffer.from (Node.js test/test-buffer-from.js)", () => {
     new MyBadPrimitive(),
     Symbol(),
     5n,
-    (one, two, three) => {},
+    (one, two, three) => { },
     undefined,
     null,
   ].forEach((input) => {
@@ -808,7 +872,7 @@ it("new Buffer() (Node.js test/test-buffer-new.js)", () => {
   // Now test protecting users from doing stupid things
 
   expect(function () {
-    function AB() {}
+    function AB() { }
     Object.setPrototypeOf(AB, ArrayBuffer);
     Object.setPrototypeOf(AB.prototype, ArrayBuffer.prototype);
     Buffer.from(new AB());
author	Ciro Spaciari <ciro.spaciari@gmail.com>	2023-01-23 21:48:52 -0300
committer	GitHub <noreply@github.com>	2023-01-23 16:48:52 -0800
commit	b3533fb3a88b33da50e58ad9594a0559af575035 (patch)
tree	05c1a22ab6b967ce8ced970e4dcd783e056e50cc
parent	79c0b614ee04d06d9565cd52450b0de1f58fa87e (diff)
download	bun-b3533fb3a88b33da50e58ad9594a0559af575035.tar.gz bun-b3533fb3a88b33da50e58ad9594a0559af575035.tar.zst bun-b3533fb3a88b33da50e58ad9594a0559af575035.zip