diff options
author | 2023-02-17 14:48:10 -0800 | |
---|---|---|
committer | 2023-02-17 14:48:10 -0800 | |
commit | d37daeb76ad078d4a2254419cfebbc0602a50670 (patch) | |
tree | 9e2b1f72f16818a56376a786100f8aa403946125 | |
parent | 79f7d29d034586b352a44a71bd10ffc47112545f (diff) | |
download | bun-d37daeb76ad078d4a2254419cfebbc0602a50670.tar.gz bun-d37daeb76ad078d4a2254419cfebbc0602a50670.tar.zst bun-d37daeb76ad078d4a2254419cfebbc0602a50670.zip |
Fix #2005 (#2096)
-rw-r--r-- | src/js_printer.zig | 49 | ||||
-rw-r--r-- | test/bun.js/repro_2005.test.js | 12 |
2 files changed, 60 insertions, 1 deletions
diff --git a/src/js_printer.zig b/src/js_printer.zig index 48e4c6260..ce4138cd4 100644 --- a/src/js_printer.zig +++ b/src/js_printer.zig @@ -2629,7 +2629,54 @@ pub fn NewPrinter( p.print(" "); } - p.print(e.value); + if (comptime is_bun_platform) { + // Translate any non-ASCII to unicode escape sequences + var ascii_start: usize = 0; + var is_ascii = false; + var iter = CodepointIterator.init(e.value); + var cursor = CodepointIterator.Cursor{}; + while (iter.next(&cursor)) { + switch (cursor.c) { + first_ascii...last_ascii => { + if (!is_ascii) { + ascii_start = cursor.i; + is_ascii = true; + } + }, + else => { + if (is_ascii) { + p.print(e.value[ascii_start..cursor.i]); + is_ascii = false; + } + + switch (cursor.c) { + 0...0xFFFF => { + p.print([_]u8{ + '\\', + 'u', + hex_chars[cursor.c >> 12], + hex_chars[(cursor.c >> 8) & 15], + hex_chars[(cursor.c >> 4) & 15], + hex_chars[cursor.c & 15], + }); + }, + else => { + p.print("\\u{"); + std.fmt.formatInt(cursor.c, 16, .lower, .{}, p) catch unreachable; + p.print("}"); + }, + } + }, + } + } + + if (is_ascii) { + p.print(e.value[ascii_start..]); + } + } else { + // UTF8 sequence is fine + p.print(e.value); + } // Need a space before the next identifier to avoid it turning into flags p.prev_reg_exp_end = p.writer.written; diff --git a/test/bun.js/repro_2005.test.js b/test/bun.js/repro_2005.test.js new file mode 100644 index 000000000..bd80ab7dd --- /dev/null +++ b/test/bun.js/repro_2005.test.js @@ -0,0 +1,12 @@ +import { it, expect } from "bun:test"; + +it("regex literal with non-Latin1 should work", () => { + const text = "这是一段要替换的文字"; + + //Correct results: 这是一段的文字 + expect(text.replace(new RegExp("要替换"), "")).toBe("这是一段的文字"); + + //Incorrect result: 这是一段要替换的文字 + expect(text.replace(/要替换/, "")).toBe("这是一段的文字"); + +}); |