From ab04e82f55eb14347b08d9ec98399a1546b3b306 Mon Sep 17 00:00:00 2001
From: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
Date: Sun, 5 Jun 2022 04:44:05 -0700
Subject: good enough for now
---
bench/snippets/escapeHTML.js | 56 +-
integration/bunjs-only-snippets/escapeHTML.test.js | 68 ++-
package.json | 4 +-
src/javascript/jsc/api/bun.zig | 57 ++-
src/javascript/jsc/bindings/bindings.cpp | 21 +-
src/javascript/jsc/test/jest.zig | 12 +
src/string_immutable.zig | 569 ++++++++++++++++-----
7 files changed, 601 insertions(+), 186 deletions(-)
diff --git a/bench/snippets/escapeHTML.js b/bench/snippets/escapeHTML.js
index 63e68861d..b6330b630 100644
--- a/bench/snippets/escapeHTML.js
+++ b/bench/snippets/escapeHTML.js
@@ -1,27 +1,10 @@
import { group } from "mitata";
import { bench, run } from "mitata";
+import { encode as htmlEntityEncode } from "html-entities";
+import { escape as heEscape } from "he";
var bunEscapeHTML_ = globalThis.escapeHTML || Bun.escapeHTML;
-var bunEscapeHTML = function (str) {
- if (str.length === 1) {
- switch (str.charCodeAt(0)) {
- case 34: // "
- return """;
- case 38: // &
- return "&";
- case 39: // '
- return "'"; // modified from escape-html; used to be '''
- case 60: // <
- return "<";
- case 62: // >
- return ">";
- default:
- return str;
- }
- }
-
- return bunEscapeHTML_(str);
-};
+var bunEscapeHTML = bunEscapeHTML_;
const matchHtmlRegExp = /["'&<>]/;
@@ -44,6 +27,11 @@ const FIXTURE = require("fs")
})
.join("");
+const FIXTURE_WITH_UNICODE = require("fs").readFileSync(
+ import.meta.dir + "/_fixture.txt",
+ "utf8"
+);
+
function reactEscapeHtml(string) {
const str = "" + string;
const match = matchHtmlRegExp.exec(str);
@@ -90,25 +78,29 @@ function reactEscapeHtml(string) {
}
for (let input of [
- // " ",
- // "",
- // "hello world",
- // "hello world",
- // "<",
- // ">",
- // `short value`,
- `nothing to escape `.repeat(99999),
+ "",
+ `long string, nothing to escape... `.repeat(9999),
+ `long utf16 string, no esc 🤔🤔🤔🤔🤔` + "tex".repeat(4000),
+ `smol`,
+ // `medium string with `,
+
FIXTURE,
+ // "[unicode]" + FIXTURE_WITH_UNICODE,
]) {
group(
{
summary: true,
- name: `"` + input.substring(0, Math.min(input.length, 32)) + `"`,
+ name:
+ `"` +
+ input.substring(0, Math.min(input.length, 32)) +
+ `"` +
+ ` (${input.length} chars)`,
},
() => {
- bench(`react's escapeHTML`, () => reactEscapeHtml(input));
-
- bench(`bun's escapeHTML`, () => bunEscapeHTML(input));
+ bench(`ReactDOM.escapeHTML`, () => reactEscapeHtml(input));
+ bench(`html-entities.encode`, () => htmlEntityEncode(input));
+ bench(`he.escape`, () => heEscape(input));
+ bench(`Bun.escapeHTML`, () => bunEscapeHTML(input));
}
);
}
diff --git a/integration/bunjs-only-snippets/escapeHTML.test.js b/integration/bunjs-only-snippets/escapeHTML.test.js
index 13ff138c9..6c709bf76 100644
--- a/integration/bunjs-only-snippets/escapeHTML.test.js
+++ b/integration/bunjs-only-snippets/escapeHTML.test.js
@@ -2,7 +2,19 @@ import { describe, it, expect } from "bun:test";
import { gcTick } from "./gc";
describe("escapeHTML", () => {
+ // The matrix of cases we need to test for:
+ // 1. Works with short strings
+ // 2. Works with long strings
+ // 3. Works with latin1 strings
+ // 4. Works with utf16 strings
+ // 5. Works when the text to escape is somewhere in the middle
+ // 6. Works when the text to escape is in the beginning
+ // 7. Works when the text to escape is in the end
+ // 8. Returns the same string when there's no need to escape
it("works", () => {
+ expect(escapeHTML("absolutely nothing to do here")).toBe(
+ "absolutely nothing to do here"
+ );
expect(escapeHTML("")).toBe(
"<script>alert(1)</script>"
);
@@ -18,16 +30,10 @@ describe("escapeHTML", () => {
expect(escapeHTML("\v")).toBe("\v");
expect(escapeHTML("\b")).toBe("\b");
expect(escapeHTML("\u00A0")).toBe("\u00A0");
+ expect(escapeHTML("" + "lalala")).toBe(
"lalala<script>alert(1)</script>lalala"
);
@@ -39,6 +45,13 @@ describe("escapeHTML", () => {
"lalala" + "<script>alert(1)</script>"
);
+ expect(escapeHTML("What does 😊 mean?")).toBe("What does 😊 mean?");
+ const output = escapeHTML("What does 😊 mean in text?")).toBe(
+ "<div>What does 😊 mean in text?"
+ );
+
expect(
escapeHTML(
("lalala" + "" + "lalala").repeat(900)
@@ -50,5 +63,42 @@ describe("escapeHTML", () => {
expect(
escapeHTML(("lalala" + "").repeat(900))
).toBe(("lalala" + "<script>alert(1)</script>").repeat(900));
+
+ // the positions of the unicode codepoint are important
+ // our simd code for U16 is at 8 bytes, so we need to especially check the boundaries
+ expect(
+ escapeHTML("😊lalala" + "" + "lalala")
+ ).toBe("😊lalala<script>alert(1)</script>lalala");
+ expect(escapeHTML("" + "lalala")).toBe(
+ "<script>😊alert(1)</script>lalala"
+ );
+ expect(escapeHTML("" + "lalala")).toBe(
+ "<script>alert(1)😊</script>lalala"
+ );
+ expect(escapeHTML("" + "😊lalala")).toBe(
+ "<script>alert(1)</script>😊lalala"
+ );
+ expect(escapeHTML("" + "lal😊ala")).toBe(
+ "<script>alert(1)</script>lal😊ala"
+ );
+ expect(
+ escapeHTML("" + "lal😊ala".repeat(10))
+ ).toBe("<script>alert(1)</script>" + "lal😊ala".repeat(10));
+
+ for (let i = 1; i < 10; i++)
+ expect(escapeHTML("" + "la😊".repeat(i))).toBe(
+ "<script>alert(1)</script>" + "la😊".repeat(i)
+ );
+
+ expect(escapeHTML("la😊" + "")).toBe(
+ "la😊" + "<script>alert(1)</script>"
+ );
+ expect(
+ escapeHTML(("lalala" + "😊").repeat(1))
+ ).toBe(("lalala" + "<script>alert(1)</script>😊").repeat(1));
+
+ expect(escapeHTML("😊".repeat(100))).toBe("😊".repeat(100));
+ expect(escapeHTML("😊<".repeat(100))).toBe("😊<".repeat(100));
+ expect(escapeHTML("<😊>".repeat(100))).toBe("<😊>".repeat(100));
});
});
diff --git a/package.json b/package.json
index 1e7a82c3f..8e6b1d517 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"dependencies": {
- "mitata": "^0.0.10",
+ "mitata": "^0.1.3",
"peechy": "0.4.32",
"react": "^17.0.2"
},
@@ -9,6 +9,8 @@
"build-fallback": "esbuild --target=esnext --bundle src/fallback.ts --format=iife --platform=browser --minify > src/fallback.out.js"
},
"devDependencies": {
+ "he": "^1.2.0",
+ "html-entities": "^2.3.3",
"prettier": "^2.4.1",
"typescript": "4.6.3"
},
diff --git a/src/javascript/jsc/api/bun.zig b/src/javascript/jsc/api/bun.zig
index 8228139de..72d61d2e9 100644
--- a/src/javascript/jsc/api/bun.zig
+++ b/src/javascript/jsc/api/bun.zig
@@ -1623,8 +1623,49 @@ pub export fn Bun__escapeHTML(
const input_value = arguments[0];
const zig_str = input_value.getZigString(globalObject);
+ if (zig_str.len == 0)
+ return ZigString.Empty.toValue(globalObject);
+
if (zig_str.is16Bit()) {
- return input_value;
+ var input_slice = zig_str.utf16SliceAligned();
+ var escaped_html = strings.escapeHTMLForUTF16Input(globalObject.bunVM().allocator, input_slice) catch {
+ globalObject.vm().throwError(globalObject, ZigString.init("Out of memory").toValue(globalObject));
+ return JSC.JSValue.jsUndefined();
+ };
+
+ if (escaped_html.ptr == input_slice.ptr and escaped_html.len == input_slice.len) {
+ return input_value;
+ }
+
+ if (input_slice.len == 1) {
+ // single character escaped strings are statically allocated
+ return ZigString.init(std.mem.sliceAsBytes(escaped_html)).to16BitValue(globalObject);
+ }
+
+ if (comptime Environment.allow_assert) {
+ // assert that re-encoding the string produces the same result
+ std.debug.assert(
+ std.mem.eql(
+ u16,
+ (strings.toUTF16Alloc(bun.default_allocator, strings.toUTF8Alloc(bun.default_allocator, escaped_html) catch unreachable, false) catch unreachable).?,
+ escaped_html,
+ ),
+ );
+
+ // assert we do not allocate a new string unnecessarily
+ std.debug.assert(
+ !std.mem.eql(
+ u16,
+ input_slice,
+ escaped_html,
+ ),
+ );
+
+ // the output should always be longer than the input
+ std.debug.assert(escaped_html.len > input_slice.len);
+ }
+
+ return ZigString.from16(escaped_html.ptr, escaped_html.len).toExternalValue(globalObject);
} else {
var input_slice = zig_str.slice();
var escaped_html = strings.escapeHTMLForLatin1Input(globalObject.bunVM().allocator, input_slice) catch {
@@ -1641,6 +1682,20 @@ pub export fn Bun__escapeHTML(
return ZigString.init(escaped_html).toValue(globalObject);
}
+ if (comptime Environment.allow_assert) {
+ // the output should always be longer than the input
+ std.debug.assert(escaped_html.len > input_slice.len);
+
+ // assert we do not allocate a new string unnecessarily
+ std.debug.assert(
+ !std.mem.eql(
+ u8,
+ input_slice,
+ escaped_html,
+ ),
+ );
+ }
+
return ZigString.init(escaped_html).toExternalValue(globalObject);
}
}
diff --git a/src/javascript/jsc/bindings/bindings.cpp b/src/javascript/jsc/bindings/bindings.cpp
index 7505a8a8f..a4435f06b 100644
--- a/src/javascript/jsc/bindings/bindings.cpp
+++ b/src/javascript/jsc/bindings/bindings.cpp
@@ -1101,23 +1101,26 @@ static void free_global_string(void* str, void* ptr, unsigned len)
JSC__JSValue ZigString__toExternalU16(const uint16_t* arg0, size_t len, JSC__JSGlobalObject* global)
{
- return JSC::JSValue::encode(JSC::JSValue(JSC::jsOwnedString(
- global->vm(),
- ExternalStringImpl::create(reinterpret_cast(arg0), len, nullptr, free_global_string))));
+ auto ref = String(ExternalStringImpl::create(reinterpret_cast(arg0), len, nullptr, free_global_string));
+
+ return JSC::JSValue::encode(JSC::JSValue(JSC::jsString(
+ global->vm(), WTFMove(ref))));
}
// This must be a globally allocated string
JSC__JSValue ZigString__toExternalValue(const ZigString* arg0, JSC__JSGlobalObject* arg1)
{
ZigString str = *arg0;
if (Zig::isTaggedUTF16Ptr(str.ptr)) {
- return JSC::JSValue::encode(JSC::JSValue(JSC::jsOwnedString(
+ auto ref = String(ExternalStringImpl::create(reinterpret_cast(Zig::untag(str.ptr)), str.len, nullptr, free_global_string));
+
+ return JSC::JSValue::encode(JSC::JSValue(JSC::jsString(
+ arg1->vm(), WTFMove(ref))));
+ } else {
+ auto ref = String(ExternalStringImpl::create(Zig::untag(str.ptr), str.len, nullptr, free_global_string));
+ return JSC::JSValue::encode(JSC::JSValue(JSC::jsString(
arg1->vm(),
- ExternalStringImpl::create(reinterpret_cast(Zig::untag(str.ptr)), str.len, nullptr, free_global_string))));
+ WTFMove(ref))));
}
-
- return JSC::JSValue::encode(JSC::JSValue(JSC::jsOwnedString(
- arg1->vm(),
- ExternalStringImpl::create(Zig::untag(str.ptr), str.len, nullptr, free_global_string))));
}
VirtualMachine* JSC__JSGlobalObject__bunVM(JSC__JSGlobalObject* arg0)
diff --git a/src/javascript/jsc/test/jest.zig b/src/javascript/jsc/test/jest.zig
index a07a4bcac..db6353d21 100644
--- a/src/javascript/jsc/test/jest.zig
+++ b/src/javascript/jsc/test/jest.zig
@@ -367,7 +367,16 @@ pub const Expect = struct {
this.scope.tests.items[this.test_id].counter.actual += 1;
const left = JSValue.fromRef(arguments[0]);
const right = JSValue.fromRef(this.value);
+
if (!left.isSameValue(right, ctx.ptr())) {
+ if (left.isString() and right.isString()) {
+ var left_slice = left.toSlice(ctx, getAllocator(ctx));
+ defer left_slice.deinit();
+ var right_slice = right.toSlice(ctx, getAllocator(ctx));
+ defer right_slice.deinit();
+ std.debug.assert(!strings.eqlLong(left_slice.slice(), right_slice.slice(), false));
+ }
+
var lhs_formatter: JSC.ZigConsoleClient.Formatter = JSC.ZigConsoleClient.Formatter{ .globalThis = ctx.ptr() };
var rhs_formatter: JSC.ZigConsoleClient.Formatter = JSC.ZigConsoleClient.Formatter{ .globalThis = ctx.ptr() };
@@ -381,8 +390,10 @@ pub const Expect = struct {
ctx,
exception,
);
+
return null;
}
+
return thisObject;
}
@@ -563,6 +574,7 @@ pub const ExpectPrototype = struct {
.scope = DescribeScope.active,
.test_id = DescribeScope.active.current_test_id,
};
+ expect_.value.?.value().ensureStillAlive();
return Expect.Class.make(ctx, expect_);
}
};
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index 849691ca2..dd5f28f8a 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -15,6 +15,10 @@ pub inline fn contains(self: string, str: string) bool {
return std.mem.indexOf(u8, self, str) != null;
}
+pub fn toUTF16Literal(comptime str: []const u8) []const u16 {
+ return comptime std.unicode.utf8ToUtf16LeStringLiteral(str);
+}
+
const OptionalUsize = std.meta.Int(.unsigned, @bitSizeOf(usize) - 1);
pub fn indexOfAny(self: string, comptime str: anytype) ?OptionalUsize {
for (self) |c, i| {
@@ -324,7 +328,7 @@ test "eqlComptimeCheckLen" {
}
test "eqlComptimeUTF16" {
- try std.testing.expectEqual(eqlComptimeUTF16(std.unicode.utf8ToUtf16LeStringLiteral("bun-darwin-aarch64.zip"), "bun-darwin-aarch64.zip"), true);
+ try std.testing.expectEqual(eqlComptimeUTF16(toUTF16Literal("bun-darwin-aarch64.zip"), "bun-darwin-aarch64.zip"), true);
const sizes = [_]u16{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 23, 22, 24 };
inline for (sizes) |size| {
var buf: [size]u16 = undefined;
@@ -542,7 +546,7 @@ pub fn eqlComptime(self: string, comptime alt: anytype) bool {
}
pub fn eqlComptimeUTF16(self: []const u16, comptime alt: []const u8) bool {
- return eqlComptimeCheckLenWithType(u16, self, comptime std.unicode.utf8ToUtf16LeStringLiteral(alt), true);
+ return eqlComptimeCheckLenWithType(u16, self, comptime toUTF16Literal(alt), true);
}
pub fn eqlComptimeIgnoreLen(self: string, comptime alt: anytype) bool {
@@ -703,7 +707,7 @@ pub fn index(self: string, str: string) i32 {
}
pub fn eqlUtf16(comptime self: string, other: []const u16) bool {
- return std.mem.eql(u16, std.unicode.utf8ToUtf16LeStringLiteral(self), other);
+ return std.mem.eql(u16, toUTF16Literal(self), other);
}
pub fn toUTF8Alloc(allocator: std.mem.Allocator, js: []const u16) !string {
@@ -1316,8 +1320,8 @@ pub fn elementLengthLatin1IntoUTF16(comptime Type: type, latin1_: Type) usize {
}
pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8) ![]const u8 {
- const Pusher = struct {
- const lengths: [std.math.maxInt(u8)]u4 = brk: {
+ const Scalar = struct {
+ pub const lengths: [std.math.maxInt(u8)]u4 = brk: {
var values: [std.math.maxInt(u8)]u4 = undefined;
for (values) |_, i| {
switch (i) {
@@ -1365,19 +1369,26 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8
else => unreachable,
};
}
- pub inline fn push(comptime c: anytype, chars: []const u8, allo: std.mem.Allocator) []const u8 {
+
+ pub inline fn push(comptime len: anytype, chars_: *const [len]u8, allo: std.mem.Allocator) []const u8 {
+ const chars = chars_.*;
var total: usize = 0;
- inline for (comptime bun.range(0, c)) |i| {
- total += @as(usize, lengths[chars[i]]);
+
+ comptime var remain_to_comp = len;
+ comptime var comp_i = 0;
+
+ inline while (remain_to_comp > 0) : (remain_to_comp -= 1) {
+ total += lengths[chars[comp_i]];
+ comp_i += 1;
}
- if (total == c) {
- return chars;
+ if (total == len) {
+ return chars_;
}
var output = allo.alloc(u8, total) catch unreachable;
var head = output.ptr;
- inline for (comptime bun.range(0, c)) |i| {
+ inline for (comptime bun.range(0, len)) |i| {
head += @This().append(head, chars[i]);
}
@@ -1417,19 +1428,38 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8
return strings.append(allocator, first, second);
},
- 3 => return Pusher.push(3, latin1, allocator),
- 4 => return Pusher.push(4, latin1, allocator),
- 5 => return Pusher.push(5, latin1, allocator),
- 6 => return Pusher.push(6, latin1, allocator),
- 7 => return Pusher.push(7, latin1, allocator),
- 8 => return Pusher.push(8, latin1, allocator),
- 9 => return Pusher.push(9, latin1, allocator),
- 10 => return Pusher.push(10, latin1, allocator),
- 11 => return Pusher.push(11, latin1, allocator),
- 12 => return Pusher.push(12, latin1, allocator),
- 13 => return Pusher.push(13, latin1, allocator),
- 14 => return Pusher.push(14, latin1, allocator),
- 15 => return Pusher.push(15, latin1, allocator),
+
+ // The simd implementation is slower for inputs less than 32 bytes.
+ 3 => return Scalar.push(3, latin1[0..3], allocator),
+ 4 => return Scalar.push(4, latin1[0..4], allocator),
+ 5 => return Scalar.push(5, latin1[0..5], allocator),
+ 6 => return Scalar.push(6, latin1[0..6], allocator),
+ 7 => return Scalar.push(7, latin1[0..7], allocator),
+ 8 => return Scalar.push(8, latin1[0..8], allocator),
+ 9 => return Scalar.push(9, latin1[0..9], allocator),
+ 10 => return Scalar.push(10, latin1[0..10], allocator),
+ 11 => return Scalar.push(11, latin1[0..11], allocator),
+ 12 => return Scalar.push(12, latin1[0..12], allocator),
+ 13 => return Scalar.push(13, latin1[0..13], allocator),
+ 14 => return Scalar.push(14, latin1[0..14], allocator),
+ 15 => return Scalar.push(15, latin1[0..15], allocator),
+ 16 => return Scalar.push(16, latin1[0..16], allocator),
+ 17 => return Scalar.push(17, latin1[0..17], allocator),
+ 18 => return Scalar.push(18, latin1[0..18], allocator),
+ 19 => return Scalar.push(19, latin1[0..19], allocator),
+ 20 => return Scalar.push(20, latin1[0..20], allocator),
+ 21 => return Scalar.push(21, latin1[0..21], allocator),
+ 22 => return Scalar.push(22, latin1[0..22], allocator),
+ 23 => return Scalar.push(23, latin1[0..23], allocator),
+ 24 => return Scalar.push(24, latin1[0..24], allocator),
+ 25 => return Scalar.push(25, latin1[0..25], allocator),
+ 26 => return Scalar.push(26, latin1[0..26], allocator),
+ 27 => return Scalar.push(27, latin1[0..27], allocator),
+ 28 => return Scalar.push(28, latin1[0..28], allocator),
+ 29 => return Scalar.push(29, latin1[0..29], allocator),
+ 30 => return Scalar.push(30, latin1[0..30], allocator),
+ 31 => return Scalar.push(31, latin1[0..31], allocator),
+ 32 => return Scalar.push(32, latin1[0..32], allocator),
else => {
var remaining = latin1;
@@ -1454,40 +1484,52 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8
std.debug.assert(!any_needs_escape);
}
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
- if (@reduce(.Min, (vec ^ vecs[0]) &
- (vec ^ vecs[1]) &
- (vec ^ vecs[2]) &
- (vec ^ vecs[3]) &
- (vec ^ vecs[4])) == 0)
+ if (@reduce(.Max, @bitCast(AsciiVectorU1, (vec == vecs[0])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[1])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[2])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[3])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[4]))) == 1)
{
buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
@memcpy(buf.items.ptr, latin1.ptr, copy_len);
buf.items.len = copy_len;
any_needs_escape = true;
- var i: usize = 0;
- while (i < ascii_vector_size) : (i += 1) {
+ comptime var i: usize = 0;
+ inline while (i < ascii_vector_size) : (i += 1) {
switch (vec[i]) {
- '"', '&', '\'', '<', '>' => |c| {
- const result = switch (c) {
- '"' => """,
- '&' => "&",
- '\'' => "'",
- '<' => "<",
- '>' => ">",
- else => unreachable,
- };
-
- buf.appendSlice(result) catch unreachable;
- remaining = remaining[1..];
+ '"' => {
+ buf.ensureUnusedCapacity((ascii_vector_size - i) + """.len) catch unreachable;
+ buf.items.ptr[buf.items.len .. buf.items.len + """.len][0..""".len].* = """.*;
+ buf.items.len += """.len;
+ },
+ '&' => {
+ buf.ensureUnusedCapacity((ascii_vector_size - i) + "&".len) catch unreachable;
+ buf.items.ptr[buf.items.len .. buf.items.len + "&".len][0.."&".len].* = "&".*;
+ buf.items.len += "&".len;
+ },
+ '\'' => {
+ buf.ensureUnusedCapacity((ascii_vector_size - i) + "'".len) catch unreachable;
+ buf.items.ptr[buf.items.len .. buf.items.len + "'".len][0.."'".len].* = "'".*;
+ buf.items.len += "'".len;
+ },
+ '<' => {
+ buf.ensureUnusedCapacity((ascii_vector_size - i) + "<".len) catch unreachable;
+ buf.items.ptr[buf.items.len .. buf.items.len + "<".len][0.."<".len].* = "<".*;
+ buf.items.len += "<".len;
+ },
+ '>' => {
+ buf.ensureUnusedCapacity((ascii_vector_size - i) + ">".len) catch unreachable;
+ buf.items.ptr[buf.items.len .. buf.items.len + ">".len][0..">".len].* = ">".*;
+ buf.items.len += ">".len;
},
else => |c| {
- buf.append(c) catch unreachable;
- remaining = remaining[1..];
+ buf.appendAssumeCapacity(c);
},
}
}
+ remaining = remaining[ascii_vector_size..];
break :scan_and_allocate_lazily;
}
@@ -1500,33 +1542,43 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8
// so we'll go ahead and copy the buffer into a new buffer
while (remaining.len >= ascii_vector_size) {
const vec: AsciiVector = remaining[0..ascii_vector_size].*;
- if (@reduce(.Min, (vec ^ vecs[0]) &
- (vec ^ vecs[1]) &
- (vec ^ vecs[2]) &
- (vec ^ vecs[3]) &
- (vec ^ vecs[4])) == 0)
+ if (@reduce(.Max, @bitCast(AsciiVectorU1, (vec == vecs[0])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[1])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[2])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[3])) |
+ @bitCast(AsciiVectorU1, (vec == vecs[4]))) == 1)
{
- buf.ensureUnusedCapacity(ascii_vector_size) catch unreachable;
- var i: usize = 0;
- while (i < ascii_vector_size) : (i += 1) {
+ buf.ensureUnusedCapacity(ascii_vector_size + 6) catch unreachable;
+ comptime var i: usize = 0;
+ inline while (i < ascii_vector_size) : (i += 1) {
switch (vec[i]) {
'"' => {
- buf.appendSlice(""") catch unreachable;
+ buf.ensureUnusedCapacity((ascii_vector_size - i) + """.len) catch unreachable;
+ buf.items.ptr[buf.items.len .. buf.items.len + """.len][0..""".len].* = """.*;
+ buf.items.len += """.len;
},
'&' => {
- buf.appendSlice("&") catch unreachable;
+ buf.ensureUnusedCapacity((ascii_vector_size - i) + "&".len) catch unreachable;
+ buf.items.ptr[buf.items.len .. buf.items.len + "&".len][0.."&".len].* = "&".*;
+ buf.items.len += "&".len;
},
'\'' => {
- buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be '''
+ buf.ensureUnusedCapacity((ascii_vector_size - i) + "'".len) catch unreachable;
+ buf.items.ptr[buf.items.len .. buf.items.len + "'".len][0.."'".len].* = "'".*;
+ buf.items.len += "'".len;
},
'<' => {
- buf.appendSlice("<") catch unreachable;
+ buf.ensureUnusedCapacity((ascii_vector_size - i) + "<".len) catch unreachable;
+ buf.items.ptr[buf.items.len .. buf.items.len + "<".len][0.."<".len].* = "<".*;
+ buf.items.len += "<".len;
},
'>' => {
- buf.appendSlice(">") catch unreachable;
+ buf.ensureUnusedCapacity((ascii_vector_size - i) + ">".len) catch unreachable;
+ buf.items.ptr[buf.items.len .. buf.items.len + ">".len][0..">".len].* = ">".*;
+ buf.items.len += ">".len;
},
else => |c| {
- buf.append(c) catch unreachable;
+ buf.appendAssumeCapacity(c);
},
}
}
@@ -1542,94 +1594,343 @@ pub fn escapeHTMLForLatin1Input(allocator: std.mem.Allocator, latin1: []const u8
}
}
+ var ptr = remaining.ptr;
+ const end = remaining.ptr + remaining.len;
+
if (!any_needs_escape) {
- scan_and_allocate_lazily: while (remaining.len > 0) {
- switch (remaining[0]) {
- '"' => {
- const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
- buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
- @memcpy(buf.items.ptr, latin1.ptr, copy_len);
+ scan_and_allocate_lazily: while (ptr != end) : (ptr += 1) {
+ switch (ptr[0]) {
+ '"', '&', '\'', '<', '>' => |c| {
+ buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + @as(usize, Scalar.lengths[c]));
+ const copy_len = @ptrToInt(ptr) - @ptrToInt(latin1.ptr);
+ @memcpy(buf.items.ptr, latin1.ptr, copy_len - 1);
buf.items.len = copy_len;
- buf.appendSlice(""") catch unreachable;
- remaining = remaining[1..];
any_needs_escape = true;
break :scan_and_allocate_lazily;
},
- '&' => {
- const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
- buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
- @memcpy(buf.items.ptr, latin1.ptr, copy_len);
- buf.items.len = copy_len;
- buf.appendSlice("&") catch unreachable;
- remaining = remaining[1..];
- any_needs_escape = true;
- break :scan_and_allocate_lazily;
- },
- '\'' => {
- const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
- buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
- @memcpy(buf.items.ptr, latin1.ptr, copy_len);
- buf.items.len = copy_len;
- buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be '''
- remaining = remaining[1..];
- any_needs_escape = true;
- break :scan_and_allocate_lazily;
- },
- '<' => {
- const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
- buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
- @memcpy(buf.items.ptr, latin1.ptr, copy_len);
- buf.items.len = copy_len;
- buf.appendSlice("<") catch unreachable;
- remaining = remaining[1..];
+ else => {},
+ }
+ }
+ }
+
+ while (ptr != end) : (ptr += 1) {
+ switch (ptr[0]) {
+ '"' => {
+ buf.appendSlice(""") catch unreachable;
+ },
+ '&' => {
+ buf.appendSlice("&") catch unreachable;
+ },
+ '\'' => {
+ buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be '''
+ },
+ '<' => {
+ buf.appendSlice("<") catch unreachable;
+ },
+ '>' => {
+ buf.appendSlice(">") catch unreachable;
+ },
+ else => |c| {
+ buf.append(c) catch unreachable;
+ },
+ }
+ }
+
+ if (!any_needs_escape) {
+ return latin1;
+ }
+
+ return buf.toOwnedSlice();
+ },
+ }
+}
+
+pub fn escapeHTMLForUTF16Input(allocator: std.mem.Allocator, utf16: []const u16) ![]const u16 {
+ const Scalar = struct {
+ pub const lengths: [std.math.maxInt(u8)]u4 = brk: {
+ var values: [std.math.maxInt(u8)]u4 = undefined;
+ for (values) |_, i| {
+ values[i] = switch (i) {
+ '"' => """.len,
+ '&' => "&".len,
+ '\'' => "'".len,
+ '<' => "<".len,
+ '>' => ">".len,
+ else => 1,
+ };
+ }
+
+ break :brk values;
+ };
+ };
+ switch (utf16.len) {
+ 0 => return &[_]u16{},
+ 1 => return switch (utf16[0]) {
+ '"' => toUTF16Literal("""),
+ '&' => toUTF16Literal("&"),
+ '\'' => toUTF16Literal("'"),
+ '<' => toUTF16Literal("<"),
+ '>' => toUTF16Literal(">"),
+ else => utf16,
+ },
+ 2 => {
+ const first = std.mem.sliceAsBytes(switch (utf16[0]) {
+ '"' => toUTF16Literal("""),
+ '&' => toUTF16Literal("&"),
+ '\'' => toUTF16Literal("'"),
+ '<' => toUTF16Literal("<"),
+ '>' => toUTF16Literal(">"),
+ else => @as([]const u16, utf16[0..1]),
+ });
+ const second = std.mem.sliceAsBytes(switch (utf16[1]) {
+ '"' => toUTF16Literal("""),
+ '&' => toUTF16Literal("&"),
+ '\'' => toUTF16Literal("'"),
+ '<' => toUTF16Literal("<"),
+ '>' => toUTF16Literal(">"),
+ else => @as([]const u16, utf16[1..2]),
+ });
+ if (first.len == 1 and second.len == 1) {
+ return utf16;
+ }
+ const outlen = first.len + second.len;
+ var buf = allocator.alloc(u16, outlen / 2) catch unreachable;
+ var buf_ = std.mem.sliceAsBytes(buf);
+ @memcpy(buf_.ptr, first.ptr, first.len);
+ @memcpy(buf_.ptr + first.len, second.ptr, second.len);
+ return buf;
+ },
+
+ else => {
+ var remaining = utf16;
+
+ var any_needs_escape = false;
+ var buf: std.ArrayList(u16) = undefined;
+
+ if (comptime Environment.isAarch64 or Environment.isX64) {
+ const vec_chars = "\"&'<>";
+ const vecs: [vec_chars.len]AsciiU16Vector = brk: {
+ var _vecs: [vec_chars.len]AsciiU16Vector = undefined;
+ for (vec_chars) |c, i| {
+ _vecs[i] = @splat(ascii_u16_vector_size, @as(u16, c));
+ }
+ break :brk _vecs;
+ };
+ // pass #1: scan for any characters that need escaping
+ // assume most strings won't need any escaping, so don't actually allocate the buffer
+ scan_and_allocate_lazily: while (remaining.len >= ascii_u16_vector_size) {
+ if (comptime Environment.allow_assert) {
+ std.debug.assert(!any_needs_escape);
+ }
+ const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
+ if (@reduce(.Max, @bitCast(AsciiVectorU16U1, vec > @splat(ascii_u16_vector_size, @as(u16, 127))) |
+ @bitCast(AsciiVectorU16U1, (vec == vecs[0])) |
+ @bitCast(AsciiVectorU16U1, (vec == vecs[1])) |
+ @bitCast(AsciiVectorU16U1, (vec == vecs[2])) |
+ @bitCast(AsciiVectorU16U1, (vec == vecs[3])) |
+ @bitCast(AsciiVectorU16U1, (vec == vecs[4]))) == 1)
+ {
+ var i: u16 = 0;
+ lazy: {
+ while (i < ascii_u16_vector_size) {
+ switch (remaining[i]) {
+ '"', '&', '\'', '<', '>' => {
+ any_needs_escape = true;
+ break :lazy;
+ },
+ 128...std.math.maxInt(u16) => {
+ const cp = utf16Codepoint([]const u16, remaining[i..]);
+ i += @as(u16, cp.len);
+ },
+ else => {
+ i += 1;
+ },
+ }
+ }
+ }
+
+ if (!any_needs_escape) {
+ remaining = remaining[i..];
+ continue :scan_and_allocate_lazily;
+ }
+
+ buf = try std.ArrayList(u16).initCapacity(allocator, utf16.len + 6);
+ std.debug.assert(@ptrToInt(remaining.ptr + i) >= @ptrToInt(utf16.ptr));
+ const to_copy = std.mem.sliceAsBytes(utf16)[0 .. @ptrToInt(remaining.ptr + i) - @ptrToInt(utf16.ptr)];
+ @memcpy(@ptrCast([*]align(2) u8, buf.items.ptr), to_copy.ptr, to_copy.len);
+ buf.items.len = std.mem.bytesAsSlice(u16, to_copy).len;
+
+ while (i < ascii_u16_vector_size) {
+ switch (remaining[i]) {
+ '"', '&', '\'', '<', '>' => |c| {
+ const result = switch (c) {
+ '"' => toUTF16Literal("""),
+ '&' => toUTF16Literal("&"),
+ '\'' => toUTF16Literal("'"),
+ '<' => toUTF16Literal("<"),
+ '>' => toUTF16Literal(">"),
+ else => unreachable,
+ };
+
+ buf.appendSlice(result) catch unreachable;
+ i += 1;
+ },
+ 128...std.math.maxInt(u16) => {
+ const cp = utf16Codepoint([]const u16, remaining[i..]);
+
+ buf.appendSlice(remaining[i..][0..@as(usize, cp.len)]) catch unreachable;
+ i += @as(u16, cp.len);
+ },
+ else => |c| {
+ i += 1;
+ buf.append(c) catch unreachable;
+ },
+ }
+ }
+
+ // edgecase: code point width could exceed asdcii_u16_vector_size
+ remaining = remaining[i..];
+ break :scan_and_allocate_lazily;
+ }
+
+ remaining = remaining[ascii_u16_vector_size..];
+ }
+
+ if (any_needs_escape) {
+ // pass #2: we found something that needed an escape
+ // but there's still some more text to
+ // so we'll go ahead and copy the buffer into a new buffer
+ while (remaining.len >= ascii_u16_vector_size) {
+ const vec: AsciiU16Vector = remaining[0..ascii_u16_vector_size].*;
+ if (@reduce(.Max, @bitCast(AsciiVectorU16U1, vec > @splat(ascii_u16_vector_size, @as(u16, 127))) |
+ @bitCast(AsciiVectorU16U1, (vec == vecs[0])) |
+ @bitCast(AsciiVectorU16U1, (vec == vecs[1])) |
+ @bitCast(AsciiVectorU16U1, (vec == vecs[2])) |
+ @bitCast(AsciiVectorU16U1, (vec == vecs[3])) |
+ @bitCast(AsciiVectorU16U1, (vec == vecs[4]))) == 1)
+ {
+ buf.ensureUnusedCapacity(ascii_u16_vector_size) catch unreachable;
+ var i: u16 = 0;
+ while (i < ascii_u16_vector_size) {
+ switch (remaining[i]) {
+ '"' => {
+ buf.appendSlice(toUTF16Literal(""")) catch unreachable;
+ i += 1;
+ },
+ '&' => {
+ buf.appendSlice(toUTF16Literal("&")) catch unreachable;
+ i += 1;
+ },
+ '\'' => {
+ buf.appendSlice(toUTF16Literal("'")) catch unreachable; // modified from escape-html; used to be '''
+ i += 1;
+ },
+ '<' => {
+ buf.appendSlice(toUTF16Literal("<")) catch unreachable;
+ i += 1;
+ },
+ '>' => {
+ buf.appendSlice(toUTF16Literal(">")) catch unreachable;
+ i += 1;
+ },
+ 128...std.math.maxInt(u16) => {
+ const cp = utf16Codepoint([]const u16, remaining[i..]);
+
+ buf.appendSlice(remaining[i..][0..@as(usize, cp.len)]) catch unreachable;
+ i += @as(u16, cp.len);
+ },
+ else => |c| {
+ buf.append(c) catch unreachable;
+ i += 1;
+ },
+ }
+ }
+
+ remaining = remaining[i..];
+ continue;
+ }
+
+ try buf.ensureUnusedCapacity(ascii_u16_vector_size);
+ buf.items.ptr[buf.items.len .. buf.items.len + ascii_u16_vector_size][0..ascii_u16_vector_size].* = remaining[0..ascii_u16_vector_size].*;
+ buf.items.len += ascii_u16_vector_size;
+ remaining = remaining[ascii_u16_vector_size..];
+ }
+ }
+ }
+
+ var ptr = remaining.ptr;
+ const end = remaining.ptr + remaining.len;
+
+ if (!any_needs_escape) {
+ scan_and_allocate_lazily: while (ptr != end) {
+ switch (ptr[0]) {
+ '"', '&', '\'', '<', '>' => |c| {
+ buf = try std.ArrayList(u16).initCapacity(allocator, utf16.len + @as(usize, Scalar.lengths[c]));
+ std.debug.assert(@ptrToInt(ptr) >= @ptrToInt(utf16.ptr));
+
+ const to_copy = std.mem.sliceAsBytes(utf16)[0 .. @ptrToInt(ptr) - @ptrToInt(utf16.ptr)];
+
+ @memcpy(
+ @ptrCast([*]align(2) u8, buf.items.ptr),
+ to_copy.ptr,
+ to_copy.len,
+ );
+
+ buf.items.len = std.mem.bytesAsSlice(u16, to_copy).len;
any_needs_escape = true;
break :scan_and_allocate_lazily;
},
- '>' => {
- const copy_len = @ptrToInt(remaining.ptr) - @ptrToInt(latin1.ptr);
- buf = try std.ArrayList(u8).initCapacity(allocator, latin1.len + 6);
- @memcpy(buf.items.ptr, latin1.ptr, copy_len);
- buf.items.len = copy_len;
- buf.appendSlice(">") catch unreachable;
- remaining = remaining[1..];
- any_needs_escape = true;
- break :scan_and_allocate_lazily;
+ 128...std.math.maxInt(u16) => {
+ const cp = utf16Codepoint([]const u16, ptr[0..2]);
+
+ buf.appendSlice(ptr[0..@as(usize, cp.len)]) catch unreachable;
+ ptr += @as(u16, cp.len);
},
else => {
- remaining = remaining[1..];
+ ptr += 1;
},
}
}
}
- if (remaining.len > 0) {
- std.debug.assert(any_needs_escape);
- for (remaining) |c| {
- switch (c) {
- '"' => {
- buf.appendSlice(""") catch unreachable;
- },
- '&' => {
- buf.appendSlice("&") catch unreachable;
- },
- '\'' => {
- buf.appendSlice("'") catch unreachable; // modified from escape-html; used to be '''
- },
- '<' => {
- buf.appendSlice("<") catch unreachable;
- },
- '>' => {
- buf.appendSlice(">") catch unreachable;
- },
- else => {
- buf.append(c) catch unreachable;
- },
- }
+ while (ptr != end) {
+ switch (ptr[0]) {
+ '"' => {
+ buf.appendSlice(toUTF16Literal(""")) catch unreachable;
+ ptr += 1;
+ },
+ '&' => {
+ buf.appendSlice(toUTF16Literal("&")) catch unreachable;
+ ptr += 1;
+ },
+ '\'' => {
+ buf.appendSlice(toUTF16Literal("'")) catch unreachable; // modified from escape-html; used to be '''
+ ptr += 1;
+ },
+ '<' => {
+ buf.appendSlice(toUTF16Literal("<")) catch unreachable;
+ ptr += 1;
+ },
+ '>' => {
+ buf.appendSlice(toUTF16Literal(">")) catch unreachable;
+ ptr += 1;
+ },
+ 128...std.math.maxInt(u16) => {
+ const cp = utf16Codepoint([]const u16, ptr[0..2]);
+
+ buf.appendSlice(ptr[0..@as(usize, cp.len)]) catch unreachable;
+ ptr += @as(u16, cp.len);
+ },
+
+ else => |c| {
+ buf.append(c) catch unreachable;
+ ptr += 1;
+ },
}
}
if (!any_needs_escape) {
- return latin1;
+ return utf16;
}
return buf.toOwnedSlice();
@@ -2483,27 +2784,27 @@ test "firstNonASCII" {
test "firstNonASCII16" {
@setEvalBranchQuota(99999);
- const yes = std.mem.span(std.unicode.utf8ToUtf16LeStringLiteral("aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
+ const yes = std.mem.span(toUTF16Literal("aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
try std.testing.expectEqual(true, firstNonASCII16(@TypeOf(yes), yes) == null);
{
@setEvalBranchQuota(99999);
- const no = std.mem.span(std.unicode.utf8ToUtf16LeStringLiteral("aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdoka🙂sdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
+ const no = std.mem.span(toUTF16Literal("aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdoka🙂sdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
try std.testing.expectEqual(@as(u32, 50), firstNonASCII16(@TypeOf(no), no).?);
}
{
@setEvalBranchQuota(99999);
- const no = std.mem.span(std.unicode.utf8ToUtf16LeStringLiteral("🙂sdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
+ const no = std.mem.span(toUTF16Literal("🙂sdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
try std.testing.expectEqual(@as(u32, 0), firstNonASCII16(@TypeOf(no), no).?);
}
{
@setEvalBranchQuota(99999);
- const no = std.mem.span(std.unicode.utf8ToUtf16LeStringLiteral("a🙂sdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
+ const no = std.mem.span(toUTF16Literal("a🙂sdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123"));
try std.testing.expectEqual(@as(u32, 1), firstNonASCII16(@TypeOf(no), no).?);
}
{
@setEvalBranchQuota(99999);
- const no = std.mem.span(std.unicode.utf8ToUtf16LeStringLiteral("aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd12312🙂3"));
+ const no = std.mem.span(toUTF16Literal("aspdokasdpokasdpokasd aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd123123aspdokasdpokasdpokasdaspdokasdpokasdpokasdaspdokasdpokasdpokasd12312🙂3"));
try std.testing.expectEqual(@as(u32, 366), firstNonASCII16(@TypeOf(no), no).?);
}
}
@@ -2541,7 +2842,7 @@ pub fn formatUTF16(slice_: []align(1) const u16, writer: anytype) !void {
test "print UTF16" {
var err = std.io.getStdErr();
- const utf16 = comptime std.unicode.utf8ToUtf16LeStringLiteral("❌ ✅ opkay ");
+ const utf16 = comptime toUTF16Literal("❌ ✅ opkay ");
try formatUTF16(utf16, err.writer());
// std.unicode.fmtUtf16le(utf16le: []const u16)
}
--
cgit v1.2.3