Fix all known string encoding bugs

author: Jarred Sumner <jarred@jarredsumner.com> 2021-10-25 00:52:07 -0700
committer: Jarred Sumner <jarred@jarredsumner.com> 2021-10-25 00:52:07 -0700
commit: 42c264bf7b45bdf7944d10260beeaf7c8b50a21a (patch)
tree: aaf099275cbafcf0b253a48dda30db2dc987ed66
parent: fe6564b5332a72116f68c1c95ae7da86fe2ca668 (diff)
download: bun-42c264bf7b45bdf7944d10260beeaf7c8b50a21a.tar.gz
bun-42c264bf7b45bdf7944d10260beeaf7c8b50a21a.tar.zst
bun-42c264bf7b45bdf7944d10260beeaf7c8b50a21a.zip
9 files changed, 265 insertions, 122 deletions
diff --git a/Makefile b/Makefile
index 2e7600dbc..93e234ee1 100644
--- a/Makefile
+++ b/Makefile
@@ -719,4 +719,7 @@ run-unit:
 	@zig-out/bin/$(testname) -- fake
 	
 
-test: build-unit run-unit
-\ No newline at end of file
+test: build-unit run-unit
+
+integration-test-dev: 
+	USE_EXISTING_PROCESS=true node integration/scripts/browser.js
+\ No newline at end of file
diff --git a/build.zig b/build.zig
index b03781330..4ea04cbf8 100644
--- a/build.zig
+++ b/build.zig
@@ -286,7 +286,7 @@ pub fn build(b: *std.build.Builder) !void {
         obj.setBuildMode(mode);
         obj.linkLibC();
         obj.linkLibCpp();
-
+        obj.strip = false;
         obj.bundle_compiler_rt = true;
 
         if (target.getOsTag() == .linux) {
diff --git a/integration/scripts/browser.js b/integration/scripts/browser.js
index eae77291c..10a07b54e 100644
--- a/integration/scripts/browser.js
+++ b/integration/scripts/browser.js
@@ -6,35 +6,38 @@ const fs = require("fs");
 const child_process = require("child_process");
 const snippetsDir = path.resolve(__dirname, "../snippets");
 const serverURL = process.env.TEST_SERVER_URL || "http://localhost:8080";
-
+const USE_EXISTING_PROCESS = process.env.USE_EXISTING_PROCESS || false;
 const DISABLE_HMR = !!process.env.DISABLE_HMR;
 const bunFlags = [
   `--origin=${serverURL}`,
   DISABLE_HMR && "--disable-hmr",
 ].filter(Boolean);
 const bunExec = process.env.BUN_BIN || "bun";
-const bunProcess = child_process.spawn(bunExec, bunFlags, {
-  cwd: snippetsDir,
-  stdio: "pipe",
-  env: {
-    ...process.env,
-    DISABLE_BUN_ANALYTICS: "1",
-  },
-
-  shell: false,
-});
-console.log("$", bunExec, bunFlags.join(" "));
-const isDebug = bunExec.endsWith("-debug");
 
-bunProcess.stderr.pipe(process.stderr);
-bunProcess.stdout.pipe(process.stdout);
-bunProcess.once("error", (err) => {
-  console.error("❌ bun error", err);
-  process.exit(1);
-});
-process.on("beforeExit", () => {
-  bunProcess?.kill(0);
-});
+var bunProcess;
+if (!USE_EXISTING_PROCESS) {
+  bunProcess = child_process.spawn(bunExec, bunFlags, {
+    cwd: snippetsDir,
+    stdio: "pipe",
+    env: {
+      ...process.env,
+      DISABLE_BUN_ANALYTICS: "1",
+    },
+
+    shell: false,
+  });
+  console.log("$", bunExec, bunFlags.join(" "));
+  bunProcess.stderr.pipe(process.stderr);
+  bunProcess.stdout.pipe(process.stdout);
+  bunProcess.once("error", (err) => {
+    console.error("❌ bun error", err);
+    process.exit(1);
+  });
+  process.on("beforeExit", () => {
+    bunProcess?.kill(0);
+  });
+}
+const isDebug = bunExec.endsWith("-debug");
 
 function writeSnapshot(name, code) {
   let file = path.join(__dirname, "../snapshots", name);
@@ -61,7 +64,8 @@ function writeSnapshot(name, code) {
 }
 
 async function main() {
-  const browser = await puppeteer.launch();
+  const launchOptions = USE_EXISTING_PROCESS ? { devtools: true } : undefined;
+  const browser = await puppeteer.launch(launchOptions);
   const promises = [];
   let allTestsPassed = true;
 
@@ -69,6 +73,13 @@ async function main() {
     var page;
     try {
       page = await browser.newPage();
+      if (USE_EXISTING_PROCESS) {
+        await page.evaluate(`
+        globalThis.BUN_DEBUG_MODE = true;
+      `);
+      }
+
+      var shouldClose = true;
       page.on("console", (obj) =>
         console.log(`[console.${obj.type()}] ${obj.text()}`)
       );
@@ -89,6 +100,7 @@ async function main() {
 
       console.log(`✅ ${key}`);
     } catch (e) {
+      if (USE_EXISTING_PROCESS) shouldClose = false;
       allTestsPassed = false;
       console.log(`❌ ${key}: ${(e && e.message) || e}`);
     } finally {
@@ -102,7 +114,7 @@ async function main() {
       }
     }
 
-    await page.close();
+    if (shouldClose) await page.close();
   }
 
   const tests = require("./snippets.json");
@@ -112,16 +124,18 @@ async function main() {
     await runPage(test);
   }
 
-  await browser.close();
-  bunProcess.kill(0);
+  if (!USE_EXISTING_PROCESS || (USE_EXISTING_PROCESS && allTestsPassed)) {
+    bunProcess && bunProcess.kill(0);
 
-  if (!allTestsPassed) {
-    console.error(`❌ browser test failed`);
-    process.exit(1);
-  } else {
-    console.log(`✅ browser test passed`);
-    bunProcess.kill(0);
-    process.exit(0);
+    if (!allTestsPassed) {
+      console.error(`❌ browser test failed`);
+      process.exit(1);
+    } else {
+      console.log(`✅ browser test passed`);
+      bunProcess && bunProcess.kill(0);
+      process.exit(0);
+    }
+    await browser.close();
   }
 }
 
diff --git a/integration/snippets/string-escapes.js b/integration/snippets/string-escapes.js
index dc5c7cff9..fd821f892 100644
--- a/integration/snippets/string-escapes.js
+++ b/integration/snippets/string-escapes.js
@@ -1,14 +1,18 @@
 var tab = "\t";
 var シ = "wow";
 var f = "";
+var f = "\u2087";
 var obj = {
   "\r\n": "\r\n",
   "\n": "\n",
   "\t": "\t",
+  "\f": "\f",
+  "\v": "\v",
   "\u2028": "\u2028",
   "\u2029": "\u2029",
   "😊": "😊",
   "😃": "😃",
+  "🕵🏽‍♂️": "🕵🏽‍♂️",
   "㋡": "㋡",
   "☺": "☺",
   シ: "シ",
diff --git a/src/http.zig b/src/http.zig
index b7c28d0b8..e1fb5a4d4 100644
--- a/src/http.zig
+++ b/src/http.zig
@@ -1762,6 +1762,17 @@ pub const RequestContext = struct {
                     threadlocal var buffer: MutableString = undefined;
                     threadlocal var has_loaded_buffer: bool = false;
 
+                    pub fn reserveNext(rctx: *SocketPrinterInternal, count: u32) anyerror![*]u8 {
+                        try buffer.growIfNeeded(count);
+                        return return @ptrCast([*]u8, &buffer.list.items.ptr[buffer.list.items.len]);
+                    }
+
+                    pub fn advanceBy(rctx: *SocketPrinterInternal, count: u32) void {
+                        if (comptime Environment.isDebug) std.debug.assert(buffer.list.items.len + count < buffer.list.capacity);
+
+                        buffer.list.items = buffer.list.items.ptr[0 .. buffer.list.items.len + count];
+                    }
+
                     pub fn init(rctx: *RequestContext, _loader: Options.Loader) SocketPrinterInternal {
                         if (!has_loaded_buffer) {
                             buffer = MutableString.init(default_allocator, 0) catch unreachable;
@@ -1841,6 +1852,8 @@ pub const RequestContext = struct {
                     SocketPrinterInternal.writeAll,
                     SocketPrinterInternal.getLastByte,
                     SocketPrinterInternal.getLastLastByte,
+                    SocketPrinterInternal.reserveNext,
+                    SocketPrinterInternal.advanceBy,
                 );
                 const loader = ctx.bundler.options.loaders.get(result.file.input.name.ext) orelse .file;
 
diff --git a/src/js_ast.zig b/src/js_ast.zig
index e21281389..e926ae3b7 100644
--- a/src/js_ast.zig
+++ b/src/js_ast.zig
@@ -1097,15 +1097,15 @@ pub const E = struct {
         }
 
         pub inline fn isUTF8(s: *const String) bool {
-            return s.utf8.len > 0;
+            return @maximum(s.utf8.len, s.value.len) == s.utf8.len;
         }
 
         pub inline fn isBlank(s: *const String) bool {
-            return std.math.max(s.utf8.len, s.value.len) == 0;
+            return @maximum(s.utf8.len, s.value.len) == 0;
         }
 
         pub inline fn isPresent(s: *const String) bool {
-            return std.math.max(s.utf8.len, s.value.len) > 0;
+            return @maximum(s.utf8.len, s.value.len) > 0;
         }
 
         pub fn eql(s: *const String, comptime _t: type, other: anytype) bool {
diff --git a/src/js_lexer.zig b/src/js_lexer.zig
index 351adbfc6..a966358b8 100644
--- a/src/js_lexer.zig
+++ b/src/js_lexer.zig
@@ -137,7 +137,7 @@ pub fn NewLexer(comptime json_options: JSONOptions) type {
             };
         }
 
-        pub fn loc(self: *LexerType) logger.Loc {
+        pub inline fn loc(self: *const LexerType) logger.Loc {
             return logger.usize2Loc(self.start);
         }
 
@@ -222,7 +222,7 @@ pub fn NewLexer(comptime json_options: JSONOptions) type {
 
         pub fn deinit(this: *LexerType) void {}
 
-        pub fn decodeEscapeSequences(lexer: *LexerType, start: usize, text: string, comptime BufType: type, buf_: *BufType) !void {
+        fn decodeEscapeSequences(lexer: *LexerType, start: usize, text: string, comptime BufType: type, buf_: *BufType) !void {
             var buf = buf_.*;
             defer buf_.* = buf;
             if (comptime is_json) lexer.is_ascii_only = false;
@@ -259,34 +259,36 @@ pub fn NewLexer(comptime json_options: JSONOptions) type {
                         const c2 = iter.c;
 
                         const width2 = iter.width;
-                        switch (iter.c) {
+                        switch (c2) {
+                            // https://mathiasbynens.be/notes/javascript-escapes#single
                             'b' => {
-                                buf.append(std.mem.readIntNative(u16, "\\b")) catch unreachable;
+                                buf.append(8) catch unreachable;
                                 continue;
                             },
                             'f' => {
-                                buf.append(std.mem.readIntNative(u16, "\\f")) catch unreachable;
+                                buf.append(9) catch unreachable;
                                 continue;
                             },
                             'n' => {
-                                buf.append(std.mem.readIntNative(u16, "\\n")) catch unreachable;
+                                buf.append(10) catch unreachable;
                                 continue;
                             },
-                            'r' => {
-                                buf.append(std.mem.readIntNative(u16, "\\r")) catch unreachable;
+                            'v' => {
+                                // Vertical tab is invalid JSON
+                                // We're going to allow it.
+                                // if (comptime is_json) {
+                                //     lexer.end = start + iter.i - width2;
+                                //     try lexer.syntaxError();
+                                // }
+                                buf.append(11) catch unreachable;
                                 continue;
                             },
                             't' => {
-                                buf.append(std.mem.readIntNative(u16, "\\t")) catch unreachable;
+                                buf.append(12) catch unreachable;
                                 continue;
                             },
-                            'v' => {
-                                if (comptime is_json) {
-                                    lexer.end = start + iter.i - width2;
-                                    try lexer.syntaxError();
-                                }
-
-                                buf.append(std.mem.readIntNative(u16, "\\v")) catch unreachable;
+                            'r' => {
+                                buf.append(13) catch unreachable;
                                 continue;
                             },
 
@@ -575,7 +577,8 @@ pub fn NewLexer(comptime json_options: JSONOptions) type {
                         }
 
                         switch (lexer.code_point) {
-                            'f', 't', 'r', 'n', '`', '\'', '0', '"', 0x2028, 0x2029 => {
+                            // 0 cannot be in this list because it may be a legacy octal literal
+                            'v', 'f', 't', 'r', 'n', '`', '\'', '"', 0x2028, 0x2029 => {
                                 try lexer.step();
                                 continue :stringLiteral;
                             },
@@ -673,7 +676,7 @@ pub fn NewLexer(comptime json_options: JSONOptions) type {
 
             // Reset string literal
             const base = if (comptime quote == 0) lexer.start else lexer.start + 1;
-            lexer.string_literal_slice = lexer.source.contents[base..@minimum(lexer.source.contents.len, lexer.end - string_literal_details.suffix_len)];
+            lexer.string_literal_slice = lexer.source.contents[base..@minimum(lexer.source.contents.len, lexer.end - @as(usize, string_literal_details.suffix_len))];
             lexer.string_literal_is_ascii = !string_literal_details.needs_slow_path;
             lexer.string_literal_buffer.shrinkRetainingCapacity(0);
             if (string_literal_details.needs_slow_path) {
@@ -2278,7 +2281,7 @@ pub fn NewLexer(comptime json_options: JSONOptions) type {
             // them. <CR><LF> and <CR> LineTerminatorSequences are normalized to
             // <LF> for both TV and TRV. An explicit EscapeSequence is needed to
             // include a <CR> or <CR><LF> sequence.
-            var bytes = MutableString.initCopy(lexer.allocator, text) catch unreachable;
+            var bytes = MutableString.init(lexer.allocator, text.len) catch unreachable;
             var end: usize = 0;
             var i: usize = 0;
             var c: u8 = '0';
diff --git a/src/js_printer.zig b/src/js_printer.zig
index a3c439ca3..868757b7d 100644
--- a/src/js_printer.zig
+++ b/src/js_printer.zig
@@ -265,6 +265,29 @@ pub fn NewPrinter(
             }
         }
 
+        fn fmt(p: *Printer, comptime str: string, args: anytype) !void {
+            const len = @call(
+                .{
+                    .modifier = .always_inline,
+                },
+                std.fmt.count,
+                .{ str, args },
+            );
+            var ptr = try p.writer.reserveNext(
+                len,
+            );
+
+            const written = @call(
+                .{
+                    .modifier = .always_inline,
+                },
+                std.fmt.bufPrint,
+                .{ ptr[0..len], str, args },
+            ) catch unreachable;
+
+            p.writer.advance(written.len);
+        }
+
         pub fn print(p: *Printer, str: anytype) void {
             switch (@TypeOf(str)) {
                 comptime_int, u16, u8 => {
@@ -538,12 +561,6 @@ pub fn NewPrinter(
                             backtick_cost += 1;
                         }
                     },
-                    '\r', '\n' => {
-                        if (comptime isDebug) {
-                            std.debug.assert(allow_backtick);
-                        }
-                        return '`';
-                    },
                     else => {},
                 }
                 i += 1;
@@ -578,22 +595,22 @@ pub fn NewPrinter(
         }
 
         pub fn printQuotedUTF16(e: *Printer, text: []const u16, quote: u8) void {
-            // utf-8 is a max of 4 bytes
-            // we leave two extra chars for "\" and "u"
-            var temp = [6]u8{ 0, 0, 0, 0, 0, 0 };
             var i: usize = 0;
             const n: usize = text.len;
 
             // e(text.len) catch unreachable;
 
             while (i < n) {
-                const c = @as(u21, text[i]);
+                const CodeUnitType = u21;
+
+                const c = @as(CodeUnitType, text[i]);
                 i += 1;
-                var r: u21 = 0;
+                var r: CodeUnitType = 0;
                 var width: u3 = 0;
 
                 // TODO: here
                 switch (c) {
+
                     // Special-case the null character since it may mess with code written in C
                     // that treats null characters as the end of the string.
                     0x00 => {
@@ -605,6 +622,38 @@ pub fn NewPrinter(
                         }
                     },
 
+                    'a'...'z',
+                    'A'...'Z',
+                    '0'...'9',
+                    '_',
+                    '-',
+                    '(',
+                    '[',
+                    '{',
+                    '<',
+                    '>',
+                    ')',
+                    ']',
+                    '}',
+                    ',',
+                    ':',
+                    ';',
+                    '.',
+                    '?',
+                    '!',
+                    '@',
+                    '#',
+                    '%',
+                    '^',
+                    '&',
+                    '*',
+                    '+',
+                    '=',
+                    ' ',
+                    => {
+                        e.print(@intCast(u8, c));
+                    },
+
                     // Special-case the bell character since it may cause dumping this file to
                     // the terminal to make a sound, which is undesirable. Note that we can't
                     // use an octal literal to print this shorter since octal literals are not
@@ -620,7 +669,7 @@ pub fn NewPrinter(
                     },
                     '\n' => {
                         if (quote == '`') {
-                            e.print("\n");
+                            e.print('\n');
                         } else {
                             e.print("\\n");
                         }
@@ -633,32 +682,34 @@ pub fn NewPrinter(
                         e.print("\\v");
                     },
                     // "\\"
-                    92 => {
+                    '\\' => {
                         e.print("\\\\");
                     },
+
                     '\'' => {
                         if (quote == '\'') {
-                            e.print("\\");
+                            e.print('\\');
                         }
                         e.print("'");
                     },
+
                     '"' => {
                         if (quote == '"') {
-                            e.print("\\");
+                            e.print('\\');
                         }
 
                         e.print("\"");
                     },
                     '`' => {
                         if (quote == '`') {
-                            e.print("\\");
+                            e.print('\\');
                         }
 
                         e.print("`");
                     },
                     '$' => {
                         if (quote == '`' and i < n and text[i] == '{') {
-                            e.print("\\");
+                            e.print('\\');
                         }
 
                         e.print('$');
@@ -672,19 +723,16 @@ pub fn NewPrinter(
                     0xFEFF => {
                         e.print("\\uFEFF");
                     },
+
                     else => {
                         switch (c) {
-                            // Common case: just append a single byte
-                            // we know it's not 0 since we already checked
-                            1...last_ascii => {
-                                e.print(@intCast(u8, c));
-                            },
+                            
                             first_high_surrogate...last_high_surrogate => {
 
                                 // Is there a next character?
 
                                 if (i < n) {
-                                    const c2 = text[i];
+                                    const c2: CodeUnitType = @as(CodeUnitType, text[i]);
 
                                     if (c2 >= first_high_surrogate and c2 <= last_low_surrogate) {
                                         // this is some magic to me
@@ -692,48 +740,62 @@ pub fn NewPrinter(
                                         i += 1;
                                         // Escape this character if UTF-8 isn't allowed
                                         if (ascii_only) {
-                                            // this is more magic!!
-                                            const bytes = [_]u8{
+                                            var ptr = e.writer.reserve(12) catch unreachable;
+                                            ptr[0..12].* = [_]u8{
                                                 '\\', 'u', hex_chars[c >> 12],  hex_chars[(c >> 8) & 15],  hex_chars[(c >> 4) & 15],  hex_chars[c & 15],
                                                 '\\', 'u', hex_chars[c2 >> 12], hex_chars[(c2 >> 8) & 15], hex_chars[(c2 >> 4) & 15], hex_chars[c2 & 15],
                                             };
-                                            e.print(&bytes);
+                                            e.writer.advance(12);
 
                                             continue;
                                             // Otherwise, encode to UTF-8
                                         } else {
-                                            width = std.unicode.utf8Encode(r, &temp) catch unreachable;
-                                            e.print(temp[0..width]);
+                                            var ptr = e.writer.reserve(4) catch unreachable;
+                                            e.writer.advance(strings.encodeWTF8RuneT(ptr[0..4], CodeUnitType, r));
                                             continue;
                                         }
                                     }
                                 }
 
-                                // Write an unpaired high surrogate
-                                temp = [_]u8{ '\\', 'u', hex_chars[c >> 12], hex_chars[(c >> 8) & 15], hex_chars[(c >> 4) & 15], hex_chars[c & 15] };
-                                e.print(&temp);
+                                {
+                                    // Write an unpaired high surrogate
+                                    var ptr = e.writer.reserve(6) catch unreachable;
+                                    ptr[0..6].* = [_]u8{ '\\', 'u', hex_chars[c >> 12], hex_chars[(c >> 8) & 15], hex_chars[(c >> 4) & 15], hex_chars[c & 15] };
+                                    e.writer.advance(6);
+                                }
                             },
                             // Is this an unpaired low surrogate or four-digit hex escape?
                             first_low_surrogate...last_low_surrogate => {
                                 // Write an unpaired high surrogate
-                                temp = [_]u8{ '\\', 'u', hex_chars[c >> 12], hex_chars[(c >> 8) & 15], hex_chars[(c >> 4) & 15], hex_chars[c & 15] };
-                                e.print(&temp);
+                                var ptr = e.writer.reserve(6) catch unreachable;
+                                ptr[0..6].* = [_]u8{ '\\', 'u', hex_chars[c >> 12], hex_chars[(c >> 8) & 15], hex_chars[(c >> 4) & 15], hex_chars[c & 15] };
+                                e.writer.advance(6);
                             },
                             else => {
                                 // this extra branch should get compiled
                                 if (ascii_only) {
                                     if (c > 0xFF) {
+                                        var ptr = e.writer.reserve(6) catch unreachable;
                                         // Write an unpaired high surrogate
-                                        temp = [_]u8{ '\\', 'u', hex_chars[c >> 12], hex_chars[(c >> 8) & 15], hex_chars[(c >> 4) & 15], hex_chars[c & 15] };
-                                        e.print(&temp);
+                                        ptr[0..6].* = [_]u8{ '\\', 'u', hex_chars[c >> 12], hex_chars[(c >> 8) & 15], hex_chars[(c >> 4) & 15], hex_chars[c & 15] };
+                                        e.writer.advance(6);
                                     } else {
                                         // Can this be a two-digit hex escape?
-                                        const quad = [_]u8{ '\\', 'x', hex_chars[c >> 4], hex_chars[c & 15] };
-                                        e.print(&quad);
+                                        var ptr = e.writer.reserve(4) catch unreachable;
+                                        ptr[0..4].* = [_]u8{ '\\', 'x', hex_chars[c >> 4], hex_chars[c & 15] };
+                                        e.writer.advance(4);
                                     }
                                 } else {
-                                    width = std.unicode.utf8Encode(c, &temp) catch unreachable;
-                                    e.print(temp[0..width]);
+                                    // chars < 255 as two digit hex escape
+                                    if (c < 0xFF) {
+                                        var ptr = e.writer.reserve(4) catch unreachable;
+                                        ptr[0..4].* = [_]u8{ '\\', 'x', hex_chars[c >> 4], hex_chars[c & 15] };
+                                        e.writer.advance(4);
+                                        continue;
+                                    }
+
+                                    var ptr = e.writer.reserve(4) catch return;
+                                    e.writer.advance(strings.encodeWTF8RuneT(ptr[0..4], CodeUnitType, c));
                                 }
                             },
                         }
@@ -3849,6 +3911,8 @@ pub fn NewWriter(
     writeAllFn: fn (ctx: *ContextType, buf: anytype) anyerror!usize,
     getLastByte: fn (ctx: *const ContextType) u8,
     getLastLastByte: fn (ctx: *const ContextType) u8,
+    reserveNext: fn (ctx: *ContextType, count: u32) anyerror![*]u8,
+    advanceBy: fn (ctx: *ContextType, count: u32) void,
 ) type {
     return struct {
         const Self = @This();
@@ -3900,6 +3964,15 @@ pub fn NewWriter(
             return @call(.{ .modifier = .always_inline }, getLastLastByte, .{&writer.ctx});
         }
 
+        pub fn reserve(writer: *Self, count: u32) anyerror![*]u8 {
+            return try reserveNext(&writer.ctx, count);
+        }
+
+        pub fn advance(writer: *Self, count: u32) void {
+            advanceBy(&writer.ctx, count);
+            writer.written += @intCast(i32, count);
+        }
+
         pub const Error = error{FormatError};
 
         pub fn writeAll(writer: *Self, bytes: anytype) Error!usize {
@@ -4015,6 +4088,16 @@ const FileWriterInternal = struct {
         return if (buffer.list.items.len > 1) buffer.list.items[buffer.list.items.len - 2] else 0;
     }
 
+    pub fn reserveNext(ctx: *FileWriterInternal, count: u32) anyerror![*]u8 {
+        try buffer.growIfNeeded(count);
+        return @ptrCast([*]u8, &buffer.list.items.ptr[buffer.list.items.len]);
+    }
+    pub fn advanceBy(ctx: *FileWriterInternal, count: u32) void {
+        if (comptime Environment.isDebug) std.debug.assert(buffer.list.items.len + count < buffer.list.capacity);
+
+        buffer.list.items = buffer.list.items.ptr[0 .. buffer.list.items.len + count];
+    }
+
     pub fn done(
         ctx: *FileWriterInternal,
     ) anyerror!void {
@@ -4101,6 +4184,16 @@ pub const BufferWriter = struct {
         return if (ctx.buffer.list.items.len > 1) ctx.buffer.list.items[ctx.buffer.list.items.len - 2] else 0;
     }
 
+    pub fn reserveNext(ctx: *BufferWriter, count: u32) anyerror![*]u8 {
+        try ctx.buffer.growIfNeeded(count);
+        return @ptrCast([*]u8, &ctx.buffer.list.items.ptr[ctx.buffer.list.items.len]);
+    }
+    pub fn advanceBy(ctx: *BufferWriter, count: u32) void {
+        if (comptime Environment.isDebug) std.debug.assert(ctx.buffer.list.items.len + count < ctx.buffer.list.capacity);
+
+        ctx.buffer.list.items = ctx.buffer.list.items.ptr[0 .. ctx.buffer.list.items.len + count];
+    }
+
     pub fn reset(ctx: *BufferWriter) void {
         ctx.buffer.reset();
         ctx.approximate_newline_count = 0;
@@ -4127,8 +4220,18 @@ pub const BufferPrinter = NewWriter(
     BufferWriter.writeAll,
     BufferWriter.getLastByte,
     BufferWriter.getLastLastByte,
+    BufferWriter.reserveNext,
+    BufferWriter.advanceBy,
+);
+pub const FileWriter = NewWriter(
+    FileWriterInternal,
+    FileWriterInternal.writeByte,
+    FileWriterInternal.writeAll,
+    FileWriterInternal.getLastByte,
+    FileWriterInternal.getLastLastByte,
+    FileWriterInternal.reserveNext,
+    FileWriterInternal.advanceBy,
 );
-pub const FileWriter = NewWriter(FileWriterInternal, FileWriterInternal.writeByte, FileWriterInternal.writeAll, FileWriterInternal.getLastByte, FileWriterInternal.getLastLastByte);
 pub fn NewFileWriter(file: std.fs.File) FileWriter {
     var internal = FileWriterInternal.init(file);
     return FileWriter.init(internal);
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 0030b8708..9bfd8df77 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -549,39 +549,52 @@ pub fn utf16EqlString(text: []const u16, str: string) bool {
 // This is a clone of golang's "utf8.EncodeRune" that has been modified to encode using
 // WTF-8 instead. See https://simonsapin.github.io/wtf-8/ for more info.
 pub fn encodeWTF8Rune(p: []u8, r: i32) u3 {
-    // Negative values are erroneous. Making it unsigned addresses the problem.
-    const i = @intCast(u32, r);
-    switch (i) {
+    return @call(
+        .{
+            .modifier = .always_inline,
+        },
+        encodeWTF8RuneT,
+        .{
+            p,
+            u32,
+            @intCast(u32, r),
+        },
+    );
+}
+
+pub fn encodeWTF8RuneT(p: []u8, comptime R: type, r: R) u3 {
+    switch (r) {
         0...0x7F => {
             p[0] = @intCast(u8, r);
             return 1;
         },
         (0x7F + 1)...0x7FF => {
-            p[0] = 0xC0 | @intCast(u8, r >> 6);
-            p[1] = 0x80 | @intCast(u8, r) & 0x3F;
+            p[0] = @truncate(u8, 0xC0 | ((r >> 6)));
+            p[1] = @truncate(u8, 0x80 | (r & 0x3F));
             return 2;
         },
         (0x7FF + 1)...0xFFFF => {
-            p[0] = 0xE0 | @intCast(u8, r >> 12);
-            p[1] = 0x80 | @intCast(u8, r >> 6) & 0x3F;
-            p[2] = 0x80 | @intCast(u8, r) & 0x3F;
+            p[0] = @truncate(u8, 0xE0 | ((r >> 12)));
+            p[1] = @truncate(u8, 0x80 | ((r >> 6) & 0x3F));
+            p[2] = @truncate(u8, 0x80 | (r & 0x3F));
             return 3;
         },
         else => {
-            p[0] = 0xF0 | @intCast(u8, r >> 18);
-            p[1] = 0x80 | @intCast(u8, r >> 12) & 0x3F;
-            p[2] = 0x80 | @intCast(u8, r >> 6) & 0x3F;
-            p[3] = 0x80 | @intCast(u8, r) & 0x3F;
+            p[0] = @truncate(u8, 0xF0 | ((r >> 18)));
+            p[1] = @truncate(u8, 0x80 | ((r >> 12) & 0x3F));
+            p[2] = @truncate(u8, 0x80 | ((r >> 6) & 0x3F));
+            p[3] = @truncate(u8, 0x80 | (r & 0x3F));
             return 4;
         },
     }
 }
 
 pub fn containsNonBmpCodePoint(text: string) bool {
-    var iter = std.unicode.Utf8Iterator{ .bytes = text, .i = 0 };
+    var iter = CodepointIterator.init(text);
+    var curs = CodepointIterator.Cursor{};
 
-    while (iter.nextCodepoint()) |codepoint| {
-        if (codepoint > 0xFFFF) {
+    while (iter.next(&curs)) {
+        if (curs.c > 0xFFFF) {
             return true;
         }
     }
@@ -668,16 +681,6 @@ pub inline fn utf8ByteSequenceLength(first_byte: u8) u3 {
     };
 }
 
-pub inline fn utf8ByteSequenceLength32(first_byte: u8) u32 {
-    return switch (first_byte) {
-        0b0000_0000...0b0111_1111 => 1,
-        0b1100_0000...0b1101_1111 => 2,
-        0b1110_0000...0b1110_1111 => 3,
-        0b1111_0000...0b1111_0111 => 4,
-        else => 0,
-    };
-}
-
 pub fn NewCodePointIterator(comptime CodePointType: type, comptime zeroValue: comptime_int) type {
     return struct {
         const Iterator = @This();
author	Jarred Sumner <jarred@jarredsumner.com>	2021-10-25 00:52:07 -0700
committer	Jarred Sumner <jarred@jarredsumner.com>	2021-10-25 00:52:07 -0700
commit	42c264bf7b45bdf7944d10260beeaf7c8b50a21a (patch)
tree	aaf099275cbafcf0b253a48dda30db2dc987ed66
parent	fe6564b5332a72116f68c1c95ae7da86fe2ca668 (diff)
download	bun-42c264bf7b45bdf7944d10260beeaf7c8b50a21a.tar.gz bun-42c264bf7b45bdf7944d10260beeaf7c8b50a21a.tar.zst bun-42c264bf7b45bdf7944d10260beeaf7c8b50a21a.zip