aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <jarred@jarredsumner.com> 2022-03-09 04:00:19 -0800
committerGravatar Jarred Sumner <jarred@jarredsumner.com> 2022-03-09 04:00:19 -0800
commit01b1fee2842b7be463f3861e177e2eba90eff308 (patch)
tree22a35a3208504d484a70e471aaff45c22c95b3a6
parentbdb5e4aa56fe48202f7c94982837ef72167b5408 (diff)
downloadbun-01b1fee2842b7be463f3861e177e2eba90eff308.tar.gz
bun-01b1fee2842b7be463f3861e177e2eba90eff308.tar.zst
bun-01b1fee2842b7be463f3861e177e2eba90eff308.zip
69% perf improvement for long string literals
-rw-r--r--src/js_lexer.zig22
-rw-r--r--src/js_printer.zig96
-rw-r--r--src/string_immutable.zig96
3 files changed, 132 insertions, 82 deletions
diff --git a/src/js_lexer.zig b/src/js_lexer.zig
index f44d3b786..6e8563c70 100644
--- a/src/js_lexer.zig
+++ b/src/js_lexer.zig
@@ -4,17 +4,17 @@ const tables = @import("js_lexer_tables.zig");
const build_options = @import("build_options");
const js_ast = @import("js_ast.zig");
-const _global = @import("global.zig");
-const string = _global.string;
-const Output = _global.Output;
-const Global = _global.Global;
-const Environment = _global.Environment;
-const strings = _global.strings;
-const CodePoint = _global.CodePoint;
-const MutableString = _global.MutableString;
-const stringZ = _global.stringZ;
-const default_allocator = _global.default_allocator;
-const C = _global.C;
+const bun = @import("global.zig");
+const string = bun.string;
+const Output = bun.Output;
+const Global = bun.Global;
+const Environment = bun.Environment;
+const strings = bun.strings;
+const CodePoint = bun.CodePoint;
+const MutableString = bun.MutableString;
+const stringZ = bun.stringZ;
+const default_allocator = bun.default_allocator;
+const C = bun.C;
const FeatureFlags = @import("feature_flags.zig");
const JavascriptString = []const u16;
diff --git a/src/js_printer.zig b/src/js_printer.zig
index a7fe33609..9c271a15d 100644
--- a/src/js_printer.zig
+++ b/src/js_printer.zig
@@ -9,20 +9,20 @@ const runtime = @import("runtime.zig");
const Lock = @import("./lock.zig").Lock;
const Api = @import("./api/schema.zig").Api;
const fs = @import("fs.zig");
-const _global = @import("global.zig");
-const string = _global.string;
-const Output = _global.Output;
-const Global = _global.Global;
-const Environment = _global.Environment;
-const strings = _global.strings;
-const MutableString = _global.MutableString;
-const stringZ = _global.stringZ;
-const default_allocator = _global.default_allocator;
-const C = _global.C;
+const bun = @import("global.zig");
+const string = bun.string;
+const Output = bun.Output;
+const Global = bun.Global;
+const Environment = bun.Environment;
+const strings = bun.strings;
+const MutableString = bun.MutableString;
+const stringZ = bun.stringZ;
+const default_allocator = bun.default_allocator;
+const C = bun.C;
const Ref = @import("ast/base.zig").Ref;
-const StoredFileDescriptorType = _global.StoredFileDescriptorType;
-const FeatureFlags = _global.FeatureFlags;
-const FileDescriptorType = _global.FileDescriptorType;
+const StoredFileDescriptorType = bun.StoredFileDescriptorType;
+const FeatureFlags = bun.FeatureFlags;
+const FileDescriptorType = bun.FileDescriptorType;
const expect = std.testing.expect;
const ImportKind = importRecord.ImportKind;
@@ -53,7 +53,7 @@ const first_low_surrogate = 0xDC00;
const last_low_surrogate = 0xDFFF;
const CodepointIterator = @import("./string_immutable.zig").UnsignedCodepointIterator;
const assert = std.debug.assert;
-
+const ascii_only_always_on_unless_minifying = true;
threadlocal var imported_module_ids_list: std.ArrayList(u32) = undefined;
threadlocal var imported_module_ids_list_unset: bool = true;
const ImportRecord = importRecord.ImportRecord;
@@ -505,7 +505,7 @@ pub fn NewPrinter(
comptime Writer: type,
comptime Linker: type,
comptime rewrite_esm_to_cjs: bool,
- comptime bun: bool,
+ comptime is_bun_platform: bool,
comptime is_inside_bundle: bool,
comptime is_json: bool,
comptime generate_source_map: bool,
@@ -1066,7 +1066,7 @@ pub fn NewPrinter(
// e(text.len) catch unreachable;
- while (i < n) {
+ outer: while (i < n) {
const CodeUnitType = u32;
const c: CodeUnitType = text[i];
@@ -1086,32 +1086,6 @@ pub fn NewPrinter(
}
},
- '/',
- 'a'...'z',
- 'A'...'Z',
- '0'...'9',
- '_',
- '-',
- '(',
- '[',
- '{',
- '<',
- '>',
- ')',
- ']',
- '}',
- ',',
- ':',
- ';',
- '.',
- '?',
- '!',
- '@',
- '#',
- '%',
- '*',
- '+',
- },
// Special-case the bell character since it may cause dumping this file to
// the terminal to make a sound, which is undesirable. Note that we can't
// use an octal literal to print this shorter since octal literals are not
@@ -1120,12 +1094,16 @@ pub fn NewPrinter(
e.print("\\x07");
},
0x08 => {
- e.print("\\b");
+ if (quote == '`')
+ e.print(0x08)
else
e.print("\\f");
},
0x0C => {
- e.print("\\f");
+ if (quote == '`')
+ e.print(0x000C)
+ else
+ e.print("\\f");
},
'\t' => {
if (quote == '`')
@@ -1198,11 +1176,20 @@ pub fn NewPrinter(
else => {
switch (c) {
first_ascii...last_ascii => {
+ e.print(@intCast(u8, c));
+
+ // Fast path for printing long UTF-16 template literals
+ // this only applies to template literal strings
+ // but we print a template literal if there is a \n or a \r
+ // which is often if the string is long and UTF-16
+ if (comptime quote == '`') {
const remain = text[i..];
if (remain.len > 1 and remain[0] < last_ascii and remain[0] > first_ascii) {
if (strings.@"nextUTF16NonASCIIOr$`\\"([]const u16, remain)) |count| {
i += count;
var ptr = e.writer.reserve(count) catch unreachable;
+ var to_copy = ptr[0..count];
+
strings.copyU16IntoU8(to_copy, []const u16, remain[0..count]);
e.writer.advance(count);
@@ -1214,6 +1201,12 @@ pub fn NewPrinter(
var ptr = e.writer.reserve(count) catch unreachable;
var to_copy = ptr[0..count];
strings.copyU16IntoU8(to_copy, []const u16, remain);
+ e.writer.advance(count);
+ i += count;
+ }
+ }
+ }
+ },
first_high_surrogate...last_high_surrogate => {
// Is there a next character?
@@ -1258,8 +1251,7 @@ pub fn NewPrinter(
e.writer.advance(6);
},
else => {
- // this extra branch should get compiled
- if (ascii_only) {
+ if (ascii_only_always_on_unless_minifying) {
if (c > 0xFF) {
var ptr = e.writer.reserve(6) catch unreachable;
// Write an unpaired high surrogate
@@ -4381,12 +4373,12 @@ pub fn NewPrinter(
pub fn NewWriter(
comptime ContextType: type,
- writeByte: fn (ctx: *ContextType, char: u8) anyerror!usize,
- writeAllFn: fn (ctx: *ContextType, buf: anytype) anyerror!usize,
- getLastByte: fn (ctx: *const ContextType) u8,
- getLastLastByte: fn (ctx: *const ContextType) u8,
- reserveNext: fn (ctx: *ContextType, count: u32) anyerror![*]u8,
- advanceBy: fn (ctx: *ContextType, count: u32) void,
+ comptime writeByte: fn (ctx: *ContextType, char: u8) anyerror!usize,
+ comptime writeAllFn: fn (ctx: *ContextType, buf: anytype) anyerror!usize,
+ comptime getLastByte: fn (ctx: *const ContextType) u8,
+ comptime getLastLastByte: fn (ctx: *const ContextType) u8,
+ comptime reserveNext: fn (ctx: *ContextType, count: u32) anyerror![*]u8,
+ comptime advanceBy: fn (ctx: *ContextType, count: u32) void,
) type {
return struct {
const Self = @This();
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index e551333ae..cf01a3188 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -730,14 +730,18 @@ pub inline fn copyU16IntoU8(output_: []u8, comptime InputType: type, input_: Inp
std.debug.assert(input.len <= output.len);
}
- while (input.len >= 4) {
- output[0] = @intCast(u8, input[0]);
- output[1] = @intCast(u8, input[1]);
- output[2] = @intCast(u8, input[2]);
- output[3] = @intCast(u8, input[3]);
+ // on X64, this is 4
+ // on WASM, this is 2
+ const machine_word_length = comptime @sizeOf(usize) / @sizeOf(u16);
+
+ while (input.len >= machine_word_length) {
+ comptime var machine_word_i: usize = 0;
+ inline while (machine_word_i > machine_word_length) : (machine_word_i += 1) {
+ output[machine_word_i] = @intCast(u8, input[machine_word_i]);
+ }
- output = output[4..];
- input = input[4..];
+ output = output[machine_word_length..];
+ input = input[machine_word_length..];
}
for (input) |c, i| {
@@ -1370,8 +1374,10 @@ pub const AsciiVectorIntU16 = std.meta.Int(.unsigned, ascii_u16_vector_size);
pub const max_16_ascii = @splat(ascii_vector_size, @as(u8, 127));
pub const min_16_ascii = @splat(ascii_vector_size, @as(u8, 0x20));
pub const max_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 127));
+pub const min_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 0x20));
pub const AsciiVector = std.meta.Vector(ascii_vector_size, u8);
pub const AsciiVectorU1 = std.meta.Vector(ascii_vector_size, u1);
+pub const AsciiVectorU16U1 = std.meta.Vector(ascii_u16_vector_size, u1);
pub const AsciiU16Vector = std.meta.Vector(ascii_u16_vector_size, u16);
pub const max_4_ascii = @splat(4, @as(u8, 127));
pub fn isAllASCII(slice: []const u8) bool {
@@ -1676,41 +1682,93 @@ pub fn containsAnyBesidesChar(bytes: []const u8, char: u8) bool {
}
pub fn firstNonASCII16(comptime Slice: type, slice: Slice) ?u32 {
+ return firstNonASCII16CheckMin(Slice, slice, false);
+}
+
+pub fn firstNonASCII16CheckMin(comptime Slice: type, slice: Slice, comptime check_min: bool) ?u32 {
var remaining = slice;
if (comptime Environment.isAarch64 or Environment.isX64) {
- while (remaining.len >= 64) {
- comptime var count: usize = 0;
- inline while (count < 8) : (count += 1) {
- const vec: AsciiU16Vector = remaining[(comptime count * ascii_u16_vector_size)..][0..ascii_u16_vector_size].*;
+ while (remaining.len >= ascii_u16_vector_size) {
+ const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
+
+ if (comptime check_min) {
+ const cmp = vec > max_u16_ascii or vec < min_16_ascii;
+ const bitmask = @ptrCast(*const u16, &cmp).*;
+ const first = @ctz(u16, bitmask);
+ if (first < ascii_u16_vector_size) {
+ return @intCast(u32, @as(u32, first) +
+ @intCast(u32, slice.len - remaining.len));
+ }
+ }
+
+ if (comptime !check_min) {
const cmp = vec > max_u16_ascii;
const bitmask = @ptrCast(*const u16, &cmp).*;
const first = @ctz(u16, bitmask);
+
if (first < ascii_u16_vector_size) {
- return @intCast(u32, (comptime count * ascii_u16_vector_size) +
- @as(u32, first) +
+ return @intCast(u32, @as(u32, first) +
@intCast(u32, slice.len - remaining.len));
}
}
- remaining = remaining[comptime ascii_u16_vector_size * count..];
+
+ remaining = remaining[ascii_u16_vector_size..];
}
+ }
+ if (comptime check_min) {
+ for (remaining) |char, i| {
+ if (char > 127 or char < 0x20) {
+ return @truncate(u32, i + (slice.len - remaining.len));
+ }
+ }
+ } else {
+ for (remaining) |char, i| {
+ if (char > 127) {
+ return @truncate(u32, i + (slice.len - remaining.len));
+ }
+ }
+ }
+
+ return null;
+}
+
+/// Fast path for printing template literal strings
+pub fn @"nextUTF16NonASCIIOr$`\\"(
+ comptime Slice: type,
+ slice: Slice,
+) ?u32 {
+ var remaining = slice;
+
+ if (comptime Environment.isAarch64 or Environment.isX64) {
while (remaining.len >= ascii_u16_vector_size) {
const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
- const cmp = vec > max_u16_ascii;
- const bitmask = @ptrCast(*const u16, &cmp).*;
- const first = @ctz(u16, bitmask);
+
+ const cmp = @bitCast(AsciiVectorU16U1, (vec > max_u16_ascii)) |
+ @bitCast(AsciiVectorU16U1, (vec < min_u16_ascii)) |
+ @bitCast(AsciiVectorU16U1, (vec == @splat(ascii_u16_vector_size, @as(u16, '$')))) |
+ @bitCast(AsciiVectorU16U1, (vec == @splat(ascii_u16_vector_size, @as(u16, '`')))) |
+ @bitCast(AsciiVectorU16U1, (vec == @splat(ascii_u16_vector_size, @as(u16, '\\'))));
+
+ const bitmask = @ptrCast(*const u8, &cmp).*;
+ const first = @ctz(u8, bitmask);
if (first < ascii_u16_vector_size) {
return @intCast(u32, @as(u32, first) +
@intCast(u32, slice.len - remaining.len));
}
+
remaining = remaining[ascii_u16_vector_size..];
}
}
for (remaining) |char, i| {
- if (char > 127) {
- return @truncate(u32, i + (slice.len - remaining.len));
+ switch (char) {
+ '$', '`', '\\', 0...0x20 - 1, 128...std.math.maxInt(u16) => {
+ return @truncate(u32, i + (slice.len - remaining.len));
+ },
+
+ else => {},
}
}