diff options
| author | 2022-12-05 12:06:24 -0800 | |
|---|---|---|
| committer | 2022-12-05 12:06:24 -0800 | |
| commit | 9e29159f445de2435bdbc0e680c6088304a9ac32 (patch) | |
| tree | 45401f1a3db23a173945baefd36b25d016da2f38 | |
| parent | e23832d1ce9dc21265ef627e2d5deedf74642738 (diff) | |
| download | bun-9e29159f445de2435bdbc0e680c6088304a9ac32.tar.gz bun-9e29159f445de2435bdbc0e680c6088304a9ac32.tar.zst bun-9e29159f445de2435bdbc0e680c6088304a9ac32.zip | |
More null checking for simdutf
| -rw-r--r-- | src/string_immutable.zig | 65 | 
1 files changed, 38 insertions, 27 deletions
| diff --git a/src/string_immutable.zig b/src/string_immutable.zig index 5e74bab2b..6ea880522 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -972,38 +972,49 @@ pub fn toUTF16Alloc(allocator: std.mem.Allocator, bytes: []const u8, comptime fa      if (bun.FeatureFlags.use_simdutf) {          if (bytes.len == 0)              return &[_]u16{}; +        use_simdutf: { +            const validated = bun.simdutf.validate.with_errors.ascii(bytes); +            if (validated.status == .success) +                return null; -        const validated = bun.simdutf.validate.with_errors.ascii(bytes); -        if (validated.status == .success) -            return null; +            const offset = @truncate(u32, validated.count); -        const offset = @truncate(u32, validated.count); +            const trimmed = bun.simdutf.trim.utf8(bytes[offset..]); +            if (trimmed.len == 0 and offset == 0) +                return &[_]u16{}; -        const trimmed = bun.simdutf.trim.utf8(bytes[offset..]); -        const out_length = bun.simdutf.length.utf16.from.utf8.le(trimmed); -        var out = try allocator.alloc(u16, out_length + offset); -        log("toUTF16 {d} UTF8 -> {d} UTF16", .{ bytes.len, out_length }); -        if (offset > 0) -            strings.copyU8IntoU16(out[0..offset], bytes[0..offset]); +            if (trimmed.len == 0) +                break :use_simdutf; -        const result = bun.simdutf.convert.utf8.to.utf16.with_errors.le(trimmed, out[offset..]); -        switch (result.status) { -            .success => { -                return out; -            }, -            else => { -                if (fail_if_invalid) { -                    allocator.free(out); -                    return error.InvalidByteSequence; -                } +            const out_length = bun.simdutf.length.utf16.from.utf8.le(trimmed); -                first_non_ascii = @truncate(u32, result.count) + offset; -                output_ = std.ArrayList(u16){ -                    .items = out[0..first_non_ascii.?], -                    .capacity = out.len, -                    .allocator = allocator, -                }; -            }, +            if (out_length != trimmed.len) +                break :use_simdutf; + +            var out = try allocator.alloc(u16, out_length + offset); +            log("toUTF16 {d} UTF8 -> {d} UTF16", .{ bytes.len, out_length }); +            if (offset > 0) +                strings.copyU8IntoU16(out[0..offset], bytes[0..offset]); + +            const result = bun.simdutf.convert.utf8.to.utf16.with_errors.le(trimmed, out[offset..]); +            switch (result.status) { +                .success => { +                    return out; +                }, +                else => { +                    if (fail_if_invalid) { +                        allocator.free(out); +                        return error.InvalidByteSequence; +                    } + +                    first_non_ascii = @truncate(u32, result.count) + offset; +                    output_ = std.ArrayList(u16){ +                        .items = out[0..first_non_ascii.?], +                        .capacity = out.len, +                        .allocator = allocator, +                    }; +                }, +            }          }      } | 
