aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Alex Lam S.L <alexlamsl@gmail.com> 2023-02-10 06:26:23 +0200
committerGravatar GitHub <noreply@github.com> 2023-02-09 20:26:23 -0800
commit119120d21ccc3d2424a607f0637b89a5aae871ec (patch)
treec797613b4bca7f97c87a6be252657bda07e886d5
parent4bedd3833d4af65b0b87e09d0d471f4f0e983cbf (diff)
downloadbun-119120d21ccc3d2424a607f0637b89a5aae871ec.tar.gz
bun-119120d21ccc3d2424a607f0637b89a5aae871ec.tar.zst
bun-119120d21ccc3d2424a607f0637b89a5aae871ec.zip
[simdutf] workaround validation OOB access (#2031)
-rw-r--r--src/string_immutable.zig23
-rw-r--r--test/bun.js/buffer.test.js10
2 files changed, 18 insertions, 15 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 24be341da..6a82e0dfd 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -1066,10 +1066,7 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa
var first_non_ascii: ?u32 = null;
var output_: ?std.ArrayList(u16) = null;
- if (bun.FeatureFlags.use_simdutf and
- // workaround https://github.com/simdutf/simdutf/issues/213
- bytes.len > 4)
- {
+ if (comptime bun.FeatureFlags.use_simdutf) {
use_simdutf: {
if (bun.simdutf.validate.ascii(bytes))
return null;
@@ -1087,24 +1084,22 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa
var out = try allocator.alloc(u16, out_length);
log("toUTF16 {d} UTF8 -> {d} UTF16", .{ bytes.len, out_length });
- const result = bun.simdutf.convert.utf8.to.utf16.with_errors.le(trimmed, out);
- switch (result.status) {
- .success => {
- return out;
- },
- else => {
- if (fail_if_invalid) {
+ // avoid `.with_errors.le()` due to https://github.com/simdutf/simdutf/issues/213
+ switch (bun.simdutf.convert.utf8.to.utf16.le(trimmed, out)) {
+ 0 => {
+ if (comptime fail_if_invalid) {
allocator.free(out);
return error.InvalidByteSequence;
}
- first_non_ascii = @truncate(u32, result.count);
- output_ = std.ArrayList(u16){
- .items = out[0..first_non_ascii.?],
+ first_non_ascii = 0;
+ output_ = .{
+ .items = out[0..0],
.capacity = out.len,
.allocator = allocator,
};
},
+ else => return out,
}
}
}
diff --git a/test/bun.js/buffer.test.js b/test/bun.js/buffer.test.js
index dba55ffe8..37484328b 100644
--- a/test/bun.js/buffer.test.js
+++ b/test/bun.js/buffer.test.js
@@ -2518,9 +2518,17 @@ test("Buffer.byteLength", () => {
});
it("should not crash on invalid UTF-8 byte sequence", () => {
- const buf = Buffer.from([0xc0, 0xfd]).toString();
+ const buf = Buffer.from([0xc0, 0xfd]);
expect(buf.length).toBe(2);
const str = buf.toString();
expect(str.length).toBe(2);
expect(str).toBe("\uFFFD\uFFFD");
});
+
+it("should not perform out-of-bound access on invalid UTF-8 byte sequence", () => {
+ const buf = Buffer.from([0x01, 0x9a, 0x84, 0x13, 0x12, 0x11, 0x10, 0x09]).subarray(2);
+ expect(buf.length).toBe(6);
+ const str = buf.toString();
+ expect(str.length).toBe(6);
+ expect(str).toBe("\uFFFD\x13\x12\x11\x10\x09");
+});