diff options
author | 2022-03-09 04:00:19 -0800 | |
---|---|---|
committer | 2022-03-09 04:00:19 -0800 | |
commit | 01b1fee2842b7be463f3861e177e2eba90eff308 (patch) | |
tree | 22a35a3208504d484a70e471aaff45c22c95b3a6 | |
parent | bdb5e4aa56fe48202f7c94982837ef72167b5408 (diff) | |
download | bun-01b1fee2842b7be463f3861e177e2eba90eff308.tar.gz bun-01b1fee2842b7be463f3861e177e2eba90eff308.tar.zst bun-01b1fee2842b7be463f3861e177e2eba90eff308.zip |
69% perf improvement for long string literals
-rw-r--r-- | src/js_lexer.zig | 22 | ||||
-rw-r--r-- | src/js_printer.zig | 96 | ||||
-rw-r--r-- | src/string_immutable.zig | 96 |
3 files changed, 132 insertions, 82 deletions
diff --git a/src/js_lexer.zig b/src/js_lexer.zig index f44d3b786..6e8563c70 100644 --- a/src/js_lexer.zig +++ b/src/js_lexer.zig @@ -4,17 +4,17 @@ const tables = @import("js_lexer_tables.zig"); const build_options = @import("build_options"); const js_ast = @import("js_ast.zig"); -const _global = @import("global.zig"); -const string = _global.string; -const Output = _global.Output; -const Global = _global.Global; -const Environment = _global.Environment; -const strings = _global.strings; -const CodePoint = _global.CodePoint; -const MutableString = _global.MutableString; -const stringZ = _global.stringZ; -const default_allocator = _global.default_allocator; -const C = _global.C; +const bun = @import("global.zig"); +const string = bun.string; +const Output = bun.Output; +const Global = bun.Global; +const Environment = bun.Environment; +const strings = bun.strings; +const CodePoint = bun.CodePoint; +const MutableString = bun.MutableString; +const stringZ = bun.stringZ; +const default_allocator = bun.default_allocator; +const C = bun.C; const FeatureFlags = @import("feature_flags.zig"); const JavascriptString = []const u16; diff --git a/src/js_printer.zig b/src/js_printer.zig index a7fe33609..9c271a15d 100644 --- a/src/js_printer.zig +++ b/src/js_printer.zig @@ -9,20 +9,20 @@ const runtime = @import("runtime.zig"); const Lock = @import("./lock.zig").Lock; const Api = @import("./api/schema.zig").Api; const fs = @import("fs.zig"); -const _global = @import("global.zig"); -const string = _global.string; -const Output = _global.Output; -const Global = _global.Global; -const Environment = _global.Environment; -const strings = _global.strings; -const MutableString = _global.MutableString; -const stringZ = _global.stringZ; -const default_allocator = _global.default_allocator; -const C = _global.C; +const bun = @import("global.zig"); +const string = bun.string; +const Output = bun.Output; +const Global = bun.Global; +const Environment = bun.Environment; +const strings = bun.strings; +const MutableString = bun.MutableString; +const stringZ = bun.stringZ; +const default_allocator = bun.default_allocator; +const C = bun.C; const Ref = @import("ast/base.zig").Ref; -const StoredFileDescriptorType = _global.StoredFileDescriptorType; -const FeatureFlags = _global.FeatureFlags; -const FileDescriptorType = _global.FileDescriptorType; +const StoredFileDescriptorType = bun.StoredFileDescriptorType; +const FeatureFlags = bun.FeatureFlags; +const FileDescriptorType = bun.FileDescriptorType; const expect = std.testing.expect; const ImportKind = importRecord.ImportKind; @@ -53,7 +53,7 @@ const first_low_surrogate = 0xDC00; const last_low_surrogate = 0xDFFF; const CodepointIterator = @import("./string_immutable.zig").UnsignedCodepointIterator; const assert = std.debug.assert; - +const ascii_only_always_on_unless_minifying = true; threadlocal var imported_module_ids_list: std.ArrayList(u32) = undefined; threadlocal var imported_module_ids_list_unset: bool = true; const ImportRecord = importRecord.ImportRecord; @@ -505,7 +505,7 @@ pub fn NewPrinter( comptime Writer: type, comptime Linker: type, comptime rewrite_esm_to_cjs: bool, - comptime bun: bool, + comptime is_bun_platform: bool, comptime is_inside_bundle: bool, comptime is_json: bool, comptime generate_source_map: bool, @@ -1066,7 +1066,7 @@ pub fn NewPrinter( // e(text.len) catch unreachable; - while (i < n) { + outer: while (i < n) { const CodeUnitType = u32; const c: CodeUnitType = text[i]; @@ -1086,32 +1086,6 @@ pub fn NewPrinter( } }, - '/', - 'a'...'z', - 'A'...'Z', - '0'...'9', - '_', - '-', - '(', - '[', - '{', - '<', - '>', - ')', - ']', - '}', - ',', - ':', - ';', - '.', - '?', - '!', - '@', - '#', - '%', - '*', - '+', - }, // Special-case the bell character since it may cause dumping this file to // the terminal to make a sound, which is undesirable. Note that we can't // use an octal literal to print this shorter since octal literals are not @@ -1120,12 +1094,16 @@ pub fn NewPrinter( e.print("\\x07"); }, 0x08 => { - e.print("\\b"); + if (quote == '`') + e.print(0x08) else e.print("\\f"); }, 0x0C => { - e.print("\\f"); + if (quote == '`') + e.print(0x000C) + else + e.print("\\f"); }, '\t' => { if (quote == '`') @@ -1198,11 +1176,20 @@ pub fn NewPrinter( else => { switch (c) { first_ascii...last_ascii => { + e.print(@intCast(u8, c)); + + // Fast path for printing long UTF-16 template literals + // this only applies to template literal strings + // but we print a template literal if there is a \n or a \r + // which is often if the string is long and UTF-16 + if (comptime quote == '`') { const remain = text[i..]; if (remain.len > 1 and remain[0] < last_ascii and remain[0] > first_ascii) { if (strings.@"nextUTF16NonASCIIOr$`\\"([]const u16, remain)) |count| { i += count; var ptr = e.writer.reserve(count) catch unreachable; + var to_copy = ptr[0..count]; + strings.copyU16IntoU8(to_copy, []const u16, remain[0..count]); e.writer.advance(count); @@ -1214,6 +1201,12 @@ pub fn NewPrinter( var ptr = e.writer.reserve(count) catch unreachable; var to_copy = ptr[0..count]; strings.copyU16IntoU8(to_copy, []const u16, remain); + e.writer.advance(count); + i += count; + } + } + } + }, first_high_surrogate...last_high_surrogate => { // Is there a next character? @@ -1258,8 +1251,7 @@ pub fn NewPrinter( e.writer.advance(6); }, else => { - // this extra branch should get compiled - if (ascii_only) { + if (ascii_only_always_on_unless_minifying) { if (c > 0xFF) { var ptr = e.writer.reserve(6) catch unreachable; // Write an unpaired high surrogate @@ -4381,12 +4373,12 @@ pub fn NewPrinter( pub fn NewWriter( comptime ContextType: type, - writeByte: fn (ctx: *ContextType, char: u8) anyerror!usize, - writeAllFn: fn (ctx: *ContextType, buf: anytype) anyerror!usize, - getLastByte: fn (ctx: *const ContextType) u8, - getLastLastByte: fn (ctx: *const ContextType) u8, - reserveNext: fn (ctx: *ContextType, count: u32) anyerror![*]u8, - advanceBy: fn (ctx: *ContextType, count: u32) void, + comptime writeByte: fn (ctx: *ContextType, char: u8) anyerror!usize, + comptime writeAllFn: fn (ctx: *ContextType, buf: anytype) anyerror!usize, + comptime getLastByte: fn (ctx: *const ContextType) u8, + comptime getLastLastByte: fn (ctx: *const ContextType) u8, + comptime reserveNext: fn (ctx: *ContextType, count: u32) anyerror![*]u8, + comptime advanceBy: fn (ctx: *ContextType, count: u32) void, ) type { return struct { const Self = @This(); diff --git a/src/string_immutable.zig b/src/string_immutable.zig index e551333ae..cf01a3188 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -730,14 +730,18 @@ pub inline fn copyU16IntoU8(output_: []u8, comptime InputType: type, input_: Inp std.debug.assert(input.len <= output.len); } - while (input.len >= 4) { - output[0] = @intCast(u8, input[0]); - output[1] = @intCast(u8, input[1]); - output[2] = @intCast(u8, input[2]); - output[3] = @intCast(u8, input[3]); + // on X64, this is 4 + // on WASM, this is 2 + const machine_word_length = comptime @sizeOf(usize) / @sizeOf(u16); + + while (input.len >= machine_word_length) { + comptime var machine_word_i: usize = 0; + inline while (machine_word_i > machine_word_length) : (machine_word_i += 1) { + output[machine_word_i] = @intCast(u8, input[machine_word_i]); + } - output = output[4..]; - input = input[4..]; + output = output[machine_word_length..]; + input = input[machine_word_length..]; } for (input) |c, i| { @@ -1370,8 +1374,10 @@ pub const AsciiVectorIntU16 = std.meta.Int(.unsigned, ascii_u16_vector_size); pub const max_16_ascii = @splat(ascii_vector_size, @as(u8, 127)); pub const min_16_ascii = @splat(ascii_vector_size, @as(u8, 0x20)); pub const max_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 127)); +pub const min_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 0x20)); pub const AsciiVector = std.meta.Vector(ascii_vector_size, u8); pub const AsciiVectorU1 = std.meta.Vector(ascii_vector_size, u1); +pub const AsciiVectorU16U1 = std.meta.Vector(ascii_u16_vector_size, u1); pub const AsciiU16Vector = std.meta.Vector(ascii_u16_vector_size, u16); pub const max_4_ascii = @splat(4, @as(u8, 127)); pub fn isAllASCII(slice: []const u8) bool { @@ -1676,41 +1682,93 @@ pub fn containsAnyBesidesChar(bytes: []const u8, char: u8) bool { } pub fn firstNonASCII16(comptime Slice: type, slice: Slice) ?u32 { + return firstNonASCII16CheckMin(Slice, slice, false); +} + +pub fn firstNonASCII16CheckMin(comptime Slice: type, slice: Slice, comptime check_min: bool) ?u32 { var remaining = slice; if (comptime Environment.isAarch64 or Environment.isX64) { - while (remaining.len >= 64) { - comptime var count: usize = 0; - inline while (count < 8) : (count += 1) { - const vec: AsciiU16Vector = remaining[(comptime count * ascii_u16_vector_size)..][0..ascii_u16_vector_size].*; + while (remaining.len >= ascii_u16_vector_size) { + const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*; + + if (comptime check_min) { + const cmp = vec > max_u16_ascii or vec < min_16_ascii; + const bitmask = @ptrCast(*const u16, &cmp).*; + const first = @ctz(u16, bitmask); + if (first < ascii_u16_vector_size) { + return @intCast(u32, @as(u32, first) + + @intCast(u32, slice.len - remaining.len)); + } + } + + if (comptime !check_min) { const cmp = vec > max_u16_ascii; const bitmask = @ptrCast(*const u16, &cmp).*; const first = @ctz(u16, bitmask); + if (first < ascii_u16_vector_size) { - return @intCast(u32, (comptime count * ascii_u16_vector_size) + - @as(u32, first) + + return @intCast(u32, @as(u32, first) + @intCast(u32, slice.len - remaining.len)); } } - remaining = remaining[comptime ascii_u16_vector_size * count..]; + + remaining = remaining[ascii_u16_vector_size..]; } + } + if (comptime check_min) { + for (remaining) |char, i| { + if (char > 127 or char < 0x20) { + return @truncate(u32, i + (slice.len - remaining.len)); + } + } + } else { + for (remaining) |char, i| { + if (char > 127) { + return @truncate(u32, i + (slice.len - remaining.len)); + } + } + } + + return null; +} + +/// Fast path for printing template literal strings +pub fn @"nextUTF16NonASCIIOr$`\\"( + comptime Slice: type, + slice: Slice, +) ?u32 { + var remaining = slice; + + if (comptime Environment.isAarch64 or Environment.isX64) { while (remaining.len >= ascii_u16_vector_size) { const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*; - const cmp = vec > max_u16_ascii; - const bitmask = @ptrCast(*const u16, &cmp).*; - const first = @ctz(u16, bitmask); + + const cmp = @bitCast(AsciiVectorU16U1, (vec > max_u16_ascii)) | + @bitCast(AsciiVectorU16U1, (vec < min_u16_ascii)) | + @bitCast(AsciiVectorU16U1, (vec == @splat(ascii_u16_vector_size, @as(u16, '$')))) | + @bitCast(AsciiVectorU16U1, (vec == @splat(ascii_u16_vector_size, @as(u16, '`')))) | + @bitCast(AsciiVectorU16U1, (vec == @splat(ascii_u16_vector_size, @as(u16, '\\')))); + + const bitmask = @ptrCast(*const u8, &cmp).*; + const first = @ctz(u8, bitmask); if (first < ascii_u16_vector_size) { return @intCast(u32, @as(u32, first) + @intCast(u32, slice.len - remaining.len)); } + remaining = remaining[ascii_u16_vector_size..]; } } for (remaining) |char, i| { - if (char > 127) { - return @truncate(u32, i + (slice.len - remaining.len)); + switch (char) { + '$', '`', '\\', 0...0x20 - 1, 128...std.math.maxInt(u16) => { + return @truncate(u32, i + (slice.len - remaining.len)); + }, + + else => {}, } } |