aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2022-07-15 21:07:27 -0700
committerGravatar Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> 2022-07-15 21:07:41 -0700
commitfd4a210b84da0c7e5f55ae31a7e8af805b81abaa (patch)
tree7f1a4e30cb1aa3e69309d61d396779592e311147
parent9a7874a680dc8846628b11b923f0096e7d3dadfe (diff)
downloadbun-fd4a210b84da0c7e5f55ae31a7e8af805b81abaa.tar.gz
bun-fd4a210b84da0c7e5f55ae31a7e8af805b81abaa.tar.zst
bun-fd4a210b84da0c7e5f55ae31a7e8af805b81abaa.zip
[bun.js] Fix non-ascii latin1 string handling in console.log
Closes https://github.com/oven-sh/bun/issues/738 Closes https://github.com/oven-sh/bun/issues/737
-rw-r--r--src/bun.js/bindings/bindings.zig15
-rw-r--r--src/bun.js/bindings/exports.zig86
-rw-r--r--src/bun.js/javascript_core_c_api.zig4
-rw-r--r--src/bun.js/webcore/response.zig2
-rw-r--r--src/js_printer.zig61
-rw-r--r--src/string_immutable.zig63
-rw-r--r--test/bun.js/inspect.test.js17
7 files changed, 175 insertions, 73 deletions
diff --git a/src/bun.js/bindings/bindings.zig b/src/bun.js/bindings/bindings.zig
index 7fd20081e..897b3c7b4 100644
--- a/src/bun.js/bindings/bindings.zig
+++ b/src/bun.js/bindings/bindings.zig
@@ -206,6 +206,12 @@ pub const ZigString = extern struct {
return ZigString{ .ptr = slice_.ptr, .len = slice_.len };
}
+ pub fn init16(slice_: []const u16) ZigString {
+ var out = ZigString{ .ptr = std.mem.sliceAsBytes(slice_).ptr, .len = slice_.len };
+ out.markUTF16();
+ return out;
+ }
+
pub fn from(slice_: JSC.C.JSValueRef, ctx: JSC.C.JSContextRef) ZigString {
return JSC.JSValue.fromRef(slice_).getZigString(ctx.ptr());
}
@@ -242,7 +248,7 @@ pub const ZigString = extern struct {
return shim.cppFn("toExternalU16", .{ ptr, len, global });
}
- pub fn isUTF8(this: *ZigString) bool {
+ pub fn isUTF8(this: ZigString) bool {
return (@ptrToInt(this.ptr) & (1 << 61)) != 0;
}
@@ -272,12 +278,17 @@ pub const ZigString = extern struct {
}
pub fn format(self: ZigString, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
+ if (self.isUTF8()) {
+ try writer.writeAll(self.slice());
+ return;
+ }
+
if (self.is16Bit()) {
try strings.formatUTF16(self.utf16Slice(), writer);
return;
}
- try writer.writeAll(self.slice());
+ try strings.formatLatin1(self.slice(), writer);
}
pub inline fn toRef(slice_: []const u8, global: *JSGlobalObject) C_API.JSValueRef {
diff --git a/src/bun.js/bindings/exports.zig b/src/bun.js/bindings/exports.zig
index e39d3ea12..e68204f9a 100644
--- a/src/bun.js/bindings/exports.zig
+++ b/src/bun.js/bindings/exports.zig
@@ -1393,6 +1393,7 @@ pub const ZigConsoleClient = struct {
var slice = slice_;
var i: u32 = 0;
var len: u32 = @truncate(u32, slice.len);
+ var any_non_ascii = false;
while (i < len) : (i += 1) {
switch (slice[i]) {
'%' => {
@@ -1411,7 +1412,11 @@ pub const ZigConsoleClient = struct {
// Flush everything up to the %
const end = slice[0 .. i - 1];
- writer.writeAll(end);
+ if (!any_non_ascii)
+ writer.writeAll(end)
+ else
+ writer.writeLatin1(end);
+ any_non_ascii = false;
slice = slice[@minimum(slice.len, i + 1)..];
i = 0;
len = @truncate(u32, slice.len);
@@ -1436,11 +1441,14 @@ pub const ZigConsoleClient = struct {
break;
if (slice[i] == '%') i += 2;
},
+ 128...255 => {
+ any_non_ascii = true;
+ },
else => {},
}
}
- if (slice.len > 0) writer.writeAll(slice);
+ if (slice.len > 0) writer.writeLatin1(slice);
}
pub fn WrappedWriter(comptime Writer: type) type {
@@ -1451,9 +1459,32 @@ pub const ZigConsoleClient = struct {
self.ctx.print(fmt, args) catch unreachable;
}
+ pub fn writeLatin1(self: *@This(), buf: []const u8) void {
+ var remain = buf;
+ while (remain.len > 0) {
+ if (strings.firstNonASCII(remain)) |i| {
+ if (i > 0) self.ctx.writeAll(remain[0..i]) catch unreachable;
+ self.ctx.writeAll(&strings.latin1ToCodepointBytesAssumeNotASCII(remain[i])) catch unreachable;
+ remain = remain[i + 1 ..];
+ } else {
+ break;
+ }
+ }
+
+ self.ctx.writeAll(remain) catch unreachable;
+ }
+
pub inline fn writeAll(self: *@This(), buf: []const u8) void {
self.ctx.writeAll(buf) catch unreachable;
}
+
+ pub inline fn writeString(self: *@This(), str: ZigString) void {
+ self.print("{}", .{str});
+ }
+
+ pub inline fn write16Bit(self: *@This(), input: []const u16) void {
+ strings.formatUTF16Type([]const u16, input, self.ctx) catch unreachable;
+ }
};
}
@@ -1594,18 +1625,18 @@ pub const ZigConsoleClient = struct {
return;
}
- JSPrinter.writeJSONString(str.slice(), Writer, writer_, false) catch unreachable;
+ JSPrinter.writeJSONString(str.slice(), Writer, writer_, .latin1) catch unreachable;
return;
}
- if (jsType == .RegExpObject) {
+ if (jsType == .RegExpObject and enable_ansi_colors) {
writer.print(comptime Output.prettyFmt("<r><red>", enable_ansi_colors), .{});
}
writer.print("{}", .{str});
- if (jsType == .RegExpObject) {
+ if (jsType == .RegExpObject and enable_ansi_colors) {
writer.print(comptime Output.prettyFmt("<r>", enable_ansi_colors), .{});
}
},
@@ -1629,11 +1660,7 @@ pub const ZigConsoleClient = struct {
var description = value.getDescription(this.globalThis);
if (description.len > 0) {
- var slice = description.toSlice(default_allocator);
- defer if (slice.allocated) slice.deinit();
- writer.print(comptime Output.prettyFmt("<r><cyan>Symbol<r><d>(<green>{}<r><d>)<r>", enable_ansi_colors), .{
- JSPrinter.formatJSONString(slice.slice()),
- });
+ writer.print(comptime Output.prettyFmt("<r><cyan>Symbol<r><d>(<green>{}<r><d>)<r>", enable_ansi_colors), .{description});
} else {
writer.print(comptime Output.prettyFmt("<r><cyan>Symbol<r>", enable_ansi_colors), .{});
}
@@ -1976,20 +2003,19 @@ pub const ZigConsoleClient = struct {
print_children: {
switch (tag.tag) {
.String => {
- var children_slice = children.toSlice(this.globalThis, default_allocator);
- defer if (children_slice.allocated) children_slice.deinit();
- if (children_slice.len == 0) break :print_children;
+ var children_string = children.getZigString(this.globalThis);
+ if (children_string.len == 0) break :print_children;
if (comptime enable_ansi_colors) writer.writeAll(comptime Output.prettyFmt("<r>", true));
writer.writeAll(">");
- if (children_slice.len < 128) {
- writer.writeAll(children_slice.slice());
+ if (children_string.len < 128) {
+ writer.writeString(children_string);
} else {
this.indent += 1;
writer.writeAll("\n");
this.writeIndent(Writer, writer_) catch unreachable;
this.indent -|= 1;
- writer.writeAll(children_slice.slice());
+ writer.writeString(children_string);
writer.writeAll("\n");
this.writeIndent(Writer, writer_) catch unreachable;
}
@@ -2093,25 +2119,43 @@ pub const ZigConsoleClient = struct {
defer CAPI.JSStringRelease(property_name_ref);
const len = CAPI.JSStringGetLength(property_name_ref);
if (len == 0) continue;
- var prop = CAPI.JSStringGetCharacters8Ptr(property_name_ref)[0..len];
+ const encoding = CAPI.JSStringEncoding(property_name_ref);
var property_value = CAPI.JSObjectGetProperty(this.globalThis.ref(), object, property_name_ref, null);
const tag = Tag.get(JSValue.fromRef(property_value), this.globalThis);
if (tag.cell.isHidden()) continue;
- const key = prop[0..@minimum(prop.len, 128)];
+ const key: ZigString = if (encoding == .char8)
+ ZigString.init((JSC.C.JSStringGetCharacters8Ptr(property_name_ref))[0..@minimum(len, 128)])
+ else
+ ZigString.init16(JSC.C.JSStringGetCharactersPtr(property_name_ref)[0..@minimum(len, 128)]);
// TODO: make this one pass?
- if (JSLexer.isLatin1Identifier(@TypeOf(key), key)) {
+ if (!key.is16Bit() and JSLexer.isLatin1Identifier(@TypeOf(key.slice()), key.slice())) {
writer.print(
- comptime Output.prettyFmt("{s}<d>:<r> ", enable_ansi_colors),
+ comptime Output.prettyFmt("{}<d>:<r> ", enable_ansi_colors),
.{key},
);
+ } else if (key.is16Bit()) {
+ var utf16Slice = key.utf16SliceAligned();
+ writer.writeAll("\"");
+
+ while (strings.indexOfAny16(utf16Slice, "\"")) |j| {
+ writer.write16Bit(utf16Slice[0..j]);
+ writer.writeAll("\\\"");
+ utf16Slice = utf16Slice[j + 1 ..];
+ }
+
+ writer.write16Bit(utf16Slice);
+ writer.print(
+ comptime Output.prettyFmt("\"<d>:<r> ", enable_ansi_colors),
+ .{},
+ );
} else {
writer.print(
comptime Output.prettyFmt("{s}<d>:<r> ", enable_ansi_colors),
- .{JSPrinter.formatJSONString(key)},
+ .{JSPrinter.formatJSONString(key.slice())},
);
}
diff --git a/src/bun.js/javascript_core_c_api.zig b/src/bun.js/javascript_core_c_api.zig
index 82145defc..00f35c45a 100644
--- a/src/bun.js/javascript_core_c_api.zig
+++ b/src/bun.js/javascript_core_c_api.zig
@@ -244,13 +244,13 @@ pub extern "c" fn JSContextGetGroup(ctx: JSContextRef) JSContextGroupRef;
pub extern "c" fn JSContextGetGlobalContext(ctx: JSContextRef) JSGlobalContextRef;
pub extern "c" fn JSGlobalContextCopyName(ctx: JSGlobalContextRef) JSStringRef;
pub extern "c" fn JSGlobalContextSetName(ctx: JSGlobalContextRef, name: JSStringRef) void;
-pub const JSChar = c_ushort;
+pub const JSChar = u16;
pub extern fn JSStringCreateWithCharacters(chars: [*c]const JSChar, numChars: usize) JSStringRef;
pub extern fn JSStringCreateWithUTF8CString(string: [*c]const u8) JSStringRef;
pub extern fn JSStringRetain(string: JSStringRef) JSStringRef;
pub extern fn JSStringRelease(string: JSStringRef) void;
pub extern fn JSStringGetLength(string: JSStringRef) usize;
-pub extern fn JSStringGetCharactersPtr(string: JSStringRef) [*c]const JSChar;
+pub extern fn JSStringGetCharactersPtr(string: JSStringRef) [*]const JSChar;
pub extern fn JSStringGetMaximumUTF8CStringSize(string: JSStringRef) usize;
pub extern fn JSStringGetUTF8CString(string: JSStringRef, buffer: [*c]u8, bufferSize: usize) usize;
pub extern fn JSStringIsEqual(a: JSStringRef, b: JSStringRef) bool;
diff --git a/src/bun.js/webcore/response.zig b/src/bun.js/webcore/response.zig
index 6171468b1..ab567c75a 100644
--- a/src/bun.js/webcore/response.zig
+++ b/src/bun.js/webcore/response.zig
@@ -208,7 +208,7 @@ pub const Response = struct {
try formatter.writeIndent(Writer, writer);
try writer.writeAll("statusText: ");
- try JSPrinter.writeJSONString(this.status_text, Writer, writer, false);
+ try JSPrinter.writeJSONString(this.status_text, Writer, writer, .ascii);
formatter.printComma(Writer, writer, enable_ansi_colors) catch unreachable;
try writer.writeAll("\n");
diff --git a/src/js_printer.zig b/src/js_printer.zig
index 945e0a382..e6f31aa85 100644
--- a/src/js_printer.zig
+++ b/src/js_printer.zig
@@ -226,30 +226,45 @@ const JSONFormatter = struct {
input: []const u8,
pub fn format(self: JSONFormatter, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
- try writeJSONString(self.input, @TypeOf(writer), writer, false);
+ try writeJSONString(self.input, @TypeOf(writer), writer, .latin1);
}
};
+/// Expects latin1
pub fn formatJSONString(text: []const u8) JSONFormatter {
return JSONFormatter{ .input = text };
}
-pub fn writeJSONString(text: []const u8, comptime Writer: type, writer: Writer, comptime ascii_only: bool) !void {
+pub fn writeJSONString(input: []const u8, comptime Writer: type, writer: Writer, comptime encoding: strings.Encoding) !void {
try writer.writeAll("\"");
- var i: usize = 0;
- var n: usize = text.len;
- while (i < n) {
- const width = strings.wtf8ByteSequenceLengthWithInvalid(text[i]);
- const c = strings.decodeWTF8RuneT(text.ptr[i .. i + 4][0..4], width, i32, 0);
- if (canPrintWithoutEscape(i32, c, ascii_only)) {
- const remain = text[i + @as(usize, width) ..];
+ var text = input;
+ const end = text.ptr + text.len;
+ if (comptime encoding == .utf16) {
+ @compileError("not implemented yet");
+ }
+
+ while (text.ptr != end) {
+ const width = if (comptime encoding == .latin1 or encoding == .ascii)
+ 1
+ else
+ strings.wtf8ByteSequenceLengthWithInvalid(text[0]);
+
+ const c: i32 = if (comptime encoding == .utf8)
+ strings.decodeWTF8RuneT(text.ptr[0..4], width, i32, 0)
+ else brk: {
+ const char = text[0];
+ if (char <= 0x7F) {
+ break :brk char;
+ } else break :brk strings.latin1ToCodepointAssumeNotASCII(char, i32);
+ };
+ if (canPrintWithoutEscape(i32, c, false)) {
+ const remain = text[@as(usize, width)..];
if (strings.indexOfNeedsEscape(remain)) |j| {
- try writer.writeAll(text[i .. i + j + @as(usize, width)]);
- i += j + @as(usize, width);
+ try writer.writeAll(text[0 .. j + @as(usize, width)]);
+ text = text[j + @as(usize, width) ..];
continue;
} else {
- try writer.writeAll(text[i..]);
- i = n;
+ try writer.writeAll(text);
break;
}
}
@@ -260,46 +275,46 @@ pub fn writeJSONString(text: []const u8, comptime Writer: type, writer: Writer,
// allowed in strict mode (or in template strings).
0x07 => {
try writer.writeAll("\\x07");
- i += 1;
+ text = text[1..];
},
0x08 => {
try writer.writeAll("\\b");
- i += 1;
+ text = text[1..];
},
0x0C => {
try writer.writeAll("\\f");
- i += 1;
+ text = text[1..];
},
'\n' => {
try writer.writeAll("\\n");
- i += 1;
+ text = text[1..];
},
std.ascii.control_code.CR => {
try writer.writeAll("\\r");
- i += 1;
+ text = text[1..];
},
// \v
std.ascii.control_code.VT => {
try writer.writeAll("\\v");
- i += 1;
+ text = text[1..];
},
// "\\"
'\\' => {
try writer.writeAll("\\\\");
- i += 1;
+ text = text[1..];
},
'"' => {
try writer.writeAll("\\\"");
- i += 1;
+ text = text[1..];
},
'\t' => {
try writer.writeAll("\\t");
- i += 1;
+ text = text[1..];
},
else => {
- i += @as(usize, width);
+ text = text[@as(usize, width)..];
if (c < 0xFFFF) {
const k = @intCast(usize, c);
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 479342025..ec3a2ecbe 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -7,6 +7,14 @@ const CodePoint = @import("string_types.zig").CodePoint;
const bun = @import("global.zig");
pub const joiner = @import("./string_joiner.zig");
const assert = std.debug.assert;
+
+pub const Encoding = enum {
+ ascii,
+ utf8,
+ latin1,
+ utf16,
+};
+
pub inline fn containsChar(self: string, char: u8) bool {
return indexOfChar(self, char) != null;
}
@@ -2189,8 +2197,7 @@ pub fn copyUTF16IntoUTF8(buf: []u8, comptime Type: type, utf16: Type) EncodeInto
while (firstNonASCII16(Type, utf16_remaining)) |i| {
const end = @minimum(i, remaining.len);
- const to_copy = utf16_remaining[0..end];
- copyU16IntoU8(remaining, Type, to_copy);
+ if (end > 0) copyU16IntoU8(remaining, Type, utf16_remaining[0..end]);
remaining = remaining[end..];
utf16_remaining = utf16_remaining[end..];
@@ -3133,35 +3140,43 @@ test "firstNonASCII16" {
}
}
-pub fn formatUTF16(slice_: []align(1) const u16, writer: anytype) !void {
+pub fn formatUTF16Type(comptime Slice: type, slice_: Slice, writer: anytype) !void {
var slice = slice_;
- var chunk: [512 + 4]u8 = undefined;
- var chunk_i: u16 = 0;
+ const chunk_size = 2048;
+ var chunk: [chunk_size + 4]u8 = undefined;
while (slice.len > 0) {
- if (chunk_i >= chunk.len - 5) {
- try writer.writeAll(chunk[0..chunk_i]);
- chunk_i = 0;
- }
+ const result = strings.copyUTF16IntoUTF8(&chunk, Slice, slice);
+ if (result.read == 0 or result.written == 0)
+ break;
+ try writer.writeAll(chunk[0..result.written]);
+ slice = slice[result.read..];
+ }
+}
- var cp: u32 = slice[0];
- slice = slice[1..];
- if (cp & ~@as(u32, 0x03ff) == 0xd800 and slice.len > 0) {
- cp = 0x10000 + (((cp & 0x03ff) << 10) | (slice[0] & 0x03ff));
- slice = slice[1..];
- }
+pub fn formatUTF16(slice_: []align(1) const u16, writer: anytype) !void {
+ return formatUTF16Type([]align(1) const u16, slice_, writer);
+}
- chunk_i += @as(
- u8,
- @call(
- .{ .modifier = .always_inline },
- encodeWTF8RuneT,
- .{ chunk[chunk_i..][0..4], u32, cp },
- ),
- );
+pub fn formatLatin1(slice_: []const u8, writer: anytype) !void {
+ var slice = slice_;
+ const chunk_size = 2048;
+ var chunk: [chunk_size + 4]u8 = undefined;
+
+ while (strings.firstNonASCII(slice)) |i| {
+ if (i > 0) {
+ try writer.writeAll(slice[0..i]);
+ slice = slice[i..];
+ }
+ const result = strings.copyLatin1IntoUTF8(&chunk, @TypeOf(slice), slice[0..@minimum(chunk.len, slice.len)]);
+ if (result.read == 0 or result.written == 0)
+ break;
+ try writer.writeAll(chunk[0..result.written]);
+ slice = slice[result.read..];
}
- try writer.writeAll(chunk[0..chunk_i]);
+ if (slice.len > 0)
+ try writer.writeAll(slice); // write the remaining bytes
}
test "print UTF16" {
diff --git a/test/bun.js/inspect.test.js b/test/bun.js/inspect.test.js
index aa38f5c2d..138b395db 100644
--- a/test/bun.js/inspect.test.js
+++ b/test/bun.js/inspect.test.js
@@ -1,5 +1,22 @@
import { it, expect } from "bun:test";
+it("utf16 property name", () => {
+ var { Database } = require("bun:sqlite");
+ const db = Database.open(":memory:");
+ expect(Bun.inspect(db.prepare("select '😀' as 笑").all())).toBe(
+ '[ { "笑": "😀" } ]'
+ );
+});
+
+it("latin1", () => {
+ expect(Bun.inspect("English")).toBe("English");
+ expect(Bun.inspect("Français")).toBe("Français");
+ expect(Bun.inspect("Ελληνική")).toBe("Ελληνική");
+ expect(Bun.inspect("日本語")).toBe("日本語");
+ expect(Bun.inspect("Emoji😎")).toBe("Emoji😎");
+ expect(Bun.inspect("Français / Ελληνική")).toBe("Français / Ελληνική");
+});
+
it("Request object", () => {
expect(Bun.inspect(new Request({ url: "https://example.com" })).trim()).toBe(
`