diff options
-rw-r--r-- | src/string_immutable.zig | 63 | ||||
-rw-r--r-- | test/bun.js/buffer.test.js | 107 |
2 files changed, 113 insertions, 57 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 27f9ac5d7..db00508e1 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -956,32 +956,53 @@ pub inline fn copyU16IntoU8(output_: []u8, comptime InputType: type, input_: Inp if (comptime Environment.allow_assert) { std.debug.assert(input_.len <= output_.len); } - if (comptime !JSC.is_bindgen and Environment.isAarch64) { - // faster on aarch64 - // but it only uses SSE2 when it could use AVX2 - // so it's better to let llvm auto-vectorize it - JSC.WTF.copyLCharsFromUCharSource(output_.ptr, InputType, input_); - } else { - var output = output_; - var input = input_; - if (comptime Environment.allow_assert) { - std.debug.assert(input.len <= output.len); - } - // https://zig.godbolt.org/z/Y1qa9PTo1 - // https://github.com/ziglang/zig/issues/11830 - // this auto-vectorizes on x64 and aarch64 - var input_ptr = input.ptr; - var output_ptr = output.ptr; + var output = output_; + var input = input_; + if (comptime Environment.allow_assert) { + std.debug.assert(input.len <= output.len); + } - const last_input_ptr = input_ptr + @min(input.len, output.len); + // https://zig.godbolt.org/z/9rTn1orcY - while (last_input_ptr != input_ptr) { - output_ptr[0] = @truncate(u8, input_ptr[0]); - output_ptr += 1; - input_ptr += 1; + const group = @as(usize, 16); + // end at the last group of 16 bytes + var input_ptr = input.ptr; + var output_ptr = output.ptr; + + if (comptime Environment.enableSIMD) { + const last_vector_ptr = input.ptr + (@min(input.len, output.len) & ~(group - 1)); + while (last_vector_ptr != input_ptr) { + const input_vec1: @Vector(group, u16) = input_ptr[0..group].*; + output_ptr[0] = @truncate(u8, input_vec1[0]); + output_ptr[1] = @truncate(u8, input_vec1[1]); + output_ptr[2] = @truncate(u8, input_vec1[2]); + output_ptr[3] = @truncate(u8, input_vec1[3]); + output_ptr[4] = @truncate(u8, input_vec1[4]); + output_ptr[5] = @truncate(u8, input_vec1[5]); + output_ptr[6] = @truncate(u8, input_vec1[6]); + output_ptr[7] = @truncate(u8, input_vec1[7]); + output_ptr[8] = @truncate(u8, input_vec1[8]); + output_ptr[9] = @truncate(u8, input_vec1[9]); + output_ptr[10] = @truncate(u8, input_vec1[10]); + output_ptr[11] = @truncate(u8, input_vec1[11]); + output_ptr[12] = @truncate(u8, input_vec1[12]); + output_ptr[13] = @truncate(u8, input_vec1[13]); + output_ptr[14] = @truncate(u8, input_vec1[14]); + output_ptr[15] = @truncate(u8, input_vec1[15]); + + output_ptr += group; + input_ptr += group; } } + + const last_input_ptr = input_ptr + @min(input.len, output.len); + + while (last_input_ptr != input_ptr) { + output_ptr[0] = @truncate(u8, input_ptr[0]); + output_ptr += 1; + input_ptr += 1; + } } const strings = @This(); diff --git a/test/bun.js/buffer.test.js b/test/bun.js/buffer.test.js index 72cd8e10d..a1a3d325a 100644 --- a/test/bun.js/buffer.test.js +++ b/test/bun.js/buffer.test.js @@ -428,18 +428,49 @@ it("read", () => { reset(); }); -it("write", () => { +// this is for checking the simd code path +it("write long utf16 string works", () => { + const long = "ππππππ
ππ€£βΊοΈπππ".repeat(1000); + const buf = Buffer.alloc(long.length * 2); + buf.write(long, 0, "utf16le"); + expect(buf.toString("utf16le")).toBe(long); + for (let offset = 0; offset < long.length; offset += 48) { + expect(buf.toString("utf16le", offset, offset + 4)).toBe("π"); + expect(buf.toString("utf16le", offset, offset + 8)).toBe("ππ"); + expect(buf.toString("utf16le", offset, offset + 12)).toBe("πππ"); + expect(buf.toString("utf16le", offset, offset + 16)).toBe("ππππ"); + expect(buf.toString("utf16le", offset, offset + 20)).toBe("πππππ"); + expect(buf.toString("utf16le", offset, offset + 24)).toBe("ππππππ
"); + expect(buf.toString("utf16le", offset, offset + 28)).toBe("ππππππ
π"); + expect(buf.toString("utf16le", offset, offset + 32)).toBe( + "ππππππ
ππ€£", + ); + expect(buf.toString("utf16le", offset, offset + 36)).toBe( + "ππππππ
ππ€£βΊοΈ", + ); + expect(buf.toString("utf16le", offset, offset + 40)).toBe( + "ππππππ
ππ€£βΊοΈπ", + ); + expect(buf.toString("utf16le", offset, offset + 44)).toBe( + "ππππππ
ππ€£βΊοΈππ", + ); + expect(buf.toString("utf16le", offset, offset + 48)).toBe( + "ππππππ
ππ€£βΊοΈπππ", + ); + } +}); +it("write", () => { const resultMap = new Map([ - ['utf8', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], - ['ucs2', Buffer.from([102, 0, 111, 0, 111, 0, 0, 0, 0])], - ['ascii', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], - ['latin1', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], - ['binary', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], - ['utf16le', Buffer.from([102, 0, 111, 0, 111, 0, 0, 0, 0])], - ['base64', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], - ['base64url', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], - ['hex', Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], + ["utf8", Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], + ["ucs2", Buffer.from([102, 0, 111, 0, 111, 0, 0, 0, 0])], + ["ascii", Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], + ["latin1", Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], + ["binary", Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], + ["utf16le", Buffer.from([102, 0, 111, 0, 111, 0, 0, 0, 0])], + ["base64", Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], + ["base64url", Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], + ["hex", Buffer.from([102, 111, 111, 0, 0, 0, 0, 0, 0])], ]); let buf = Buffer.alloc(9); @@ -447,46 +478,51 @@ it("write", () => { new Uint8Array(buf.buffer).fill(0); } - // utf8, ucs2, ascii, latin1, utf16le - const encodings = ['utf8', 'utf-8', 'ucs2', 'ucs-2', 'ascii', 'latin1', - 'binary', 'utf16le', 'utf-16le']; + const encodings = [ + "utf8", + "utf-8", + "ucs2", + "ucs-2", + "ascii", + "latin1", + "binary", + "utf16le", + "utf-16le", + ]; encodings .reduce((es, e) => es.concat(e, e.toUpperCase()), []) .forEach((encoding) => { reset(); - const len = Buffer.byteLength('foo', encoding); - expect(buf.write('foo', 0, len, encoding)).toBe(len); + const len = Buffer.byteLength("foo", encoding); + expect(buf.write("foo", 0, len, encoding)).toBe(len); - if (encoding.includes('-')) - encoding = encoding.replace('-', ''); + if (encoding.includes("-")) encoding = encoding.replace("-", ""); expect(buf).toStrictEqual(resultMap.get(encoding.toLowerCase())); }); // base64 - ['base64', 'BASE64', 'base64url', 'BASE64URL'].forEach((encoding) => { - reset() + ["base64", "BASE64", "base64url", "BASE64URL"].forEach((encoding) => { + reset(); - const len = Buffer.byteLength('Zm9v', encoding); + const len = Buffer.byteLength("Zm9v", encoding); - expect(buf.write('Zm9v', 0, len, encoding)).toBe(len); + expect(buf.write("Zm9v", 0, len, encoding)).toBe(len); expect(buf).toStrictEqual(resultMap.get(encoding.toLowerCase())); }); - // hex - ['hex', 'HEX'].forEach((encoding) => { + ["hex", "HEX"].forEach((encoding) => { reset(); - const len = Buffer.byteLength('666f6f', encoding); + const len = Buffer.byteLength("666f6f", encoding); - expect(buf.write('666f6f', 0, len, encoding)).toBe(len); + expect(buf.write("666f6f", 0, len, encoding)).toBe(len); expect(buf).toStrictEqual(resultMap.get(encoding.toLowerCase())); }); - // UCS-2 overflow CVE-2018-12115 for (let i = 1; i < 4; i++) { // Allocate two Buffers sequentially off the pool. Run more than once in case @@ -494,32 +530,31 @@ it("write", () => { const x = Buffer.allocUnsafe(4).fill(0); const y = Buffer.allocUnsafe(4).fill(1); // Should not write anything, pos 3 doesn't have enough room for a 16-bit char - expect(x.write('ΡΡΡΡΡΡ', 3, 'ucs2')).toBe(0); + expect(x.write("ΡΡΡΡΡΡ", 3, "ucs2")).toBe(0); // CVE-2018-12115 experienced via buffer overrun to next block in the pool expect(Buffer.compare(y, Buffer.alloc(4, 1))).toBe(0); } // // Should not write any data when there is no space for 16-bit chars const z = Buffer.alloc(4, 0); - expect(z.write('\u0001', 3, 'ucs2')).toBe(0); + expect(z.write("\u0001", 3, "ucs2")).toBe(0); expect(Buffer.compare(z, Buffer.alloc(4, 0))).toBe(0); // Make sure longer strings are written up to the buffer end. - expect(z.write('abcd', 2)).toBe(2); + expect(z.write("abcd", 2)).toBe(2); expect([...z]).toStrictEqual([0, 0, 0x61, 0x62]); //Large overrun could corrupt the process with utf8 - expect(Buffer.alloc(4).write('a'.repeat(100), 3, 'utf8')).toBe(1); + expect(Buffer.alloc(4).write("a".repeat(100), 3, "utf8")).toBe(1); // Large overrun could corrupt the process - expect(Buffer.alloc(4).write('ΡΡΡΡΡΡ'.repeat(100), 3, 'utf16le')).toBe(0); + expect(Buffer.alloc(4).write("ΡΡΡΡΡΡ".repeat(100), 3, "utf16le")).toBe(0); { // .write() does not affect the byte after the written-to slice of the Buffer. // Refs: https://github.com/nodejs/node/issues/26422 const buf = Buffer.alloc(8); - expect(buf.write('ΡΡ', 1, 'utf16le')).toBe(4); + expect(buf.write("ΡΡ", 1, "utf16le")).toBe(4); expect([...buf]).toStrictEqual([0, 0x4b, 0x04, 0x4b, 0x04, 0, 0, 0]); - } }); @@ -743,7 +778,7 @@ it("Buffer.toString(base64)", () => { it("Buffer can be mocked", () => { function MockBuffer() { - const noop = function () { }; + const noop = function () {}; const res = Buffer.alloc(0); for (const op in Buffer.prototype) { if (typeof res[op] === "function") { @@ -830,7 +865,7 @@ it("Buffer.from (Node.js test/test-buffer-from.js)", () => { new MyBadPrimitive(), Symbol(), 5n, - (one, two, three) => { }, + (one, two, three) => {}, undefined, null, ].forEach((input) => { @@ -872,7 +907,7 @@ it("new Buffer() (Node.js test/test-buffer-new.js)", () => { // Now test protecting users from doing stupid things expect(function () { - function AB() { } + function AB() {} Object.setPrototypeOf(AB, ArrayBuffer); Object.setPrototypeOf(AB.prototype, ArrayBuffer.prototype); Buffer.from(new AB()); |