From 662335d81aa39dc53b83f2759312ff0ff3a93af2 Mon Sep 17 00:00:00 2001 From: Brúnó Salomon <35275408+bru02@users.noreply.github.com> Date: Mon, 26 Jun 2023 01:48:03 +0200 Subject: chore: update lol-html version (#3356) * chore: update lol-html version * add tests --------- Co-authored-by: Jarred Sumner --- test/js/workerd/html-rewriter.test.js | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'test/js/workerd/html-rewriter.test.js') diff --git a/test/js/workerd/html-rewriter.test.js b/test/js/workerd/html-rewriter.test.js index 1ca92a567..b6131a09f 100644 --- a/test/js/workerd/html-rewriter.test.js +++ b/test/js/workerd/html-rewriter.test.js @@ -300,4 +300,19 @@ describe("HTMLRewriter", () => { .text(), ).toEqual("
"); }); + + it("it supports lastInTextNode", async () => { + let lastInTextNode; + + await new HTMLRewriter() + .on("p", { + text(text) { + lastInTextNode ??= text.lastInTextNode; + }, + }) + .transform(new Response("

Lorem ipsum!

")) + .text(); + + expect(lastInTextNode).toBeBoolean(); + }); }); -- cgit v1.2.3 From 116bcf424564a5f38ddf5f99f34453c175526fdc Mon Sep 17 00:00:00 2001 From: Jarred Sumner Date: Sun, 25 Jun 2023 18:49:16 -0700 Subject: Fixes #3334 (#3401) Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> --- src/bun.js/api/html_rewriter.zig | 97 +++++++++++++++++++++++------------ test/js/workerd/html-rewriter.test.js | 21 ++++++++ 2 files changed, 84 insertions(+), 34 deletions(-) (limited to 'test/js/workerd/html-rewriter.test.js') diff --git a/src/bun.js/api/html_rewriter.zig b/src/bun.js/api/html_rewriter.zig index d09c14e42..5e14d5503 100644 --- a/src/bun.js/api/html_rewriter.zig +++ b/src/bun.js/api/html_rewriter.zig @@ -106,7 +106,7 @@ pub const HTMLRewriter = struct { var selector = LOLHTML.HTMLSelector.parse(selector_slice) catch return throwLOLHTMLError(global); - var handler_ = ElementHandler.init(global, listener, exception); + var handler_ = ElementHandler.init(global, listener, exception) catch return .zero; if (exception.* != null) { selector.deinit(); return JSValue.fromRef(exception.*); @@ -154,7 +154,7 @@ pub const HTMLRewriter = struct { thisObject: JSC.C.JSObjectRef, exception: JSC.C.ExceptionRef, ) JSValue { - var handler_ = DocumentHandler.init(global, listener, exception); + var handler_ = DocumentHandler.init(global, listener, exception) catch return .zero; if (exception.* != null) { return JSValue.fromRef(exception.*); } @@ -446,10 +446,14 @@ pub const HTMLRewriter = struct { }, }; - result.body.init.headers = original.body.init.headers; result.body.init.method = original.body.init.method; result.body.init.status_code = original.body.init.status_code; + // https://github.com/oven-sh/bun/issues/3334 + if (original.body.init.headers) |headers| { + result.body.init.headers = headers.cloneThis(global); + } + result.url = bun.default_allocator.dupe(u8, original.url) catch unreachable; result.status_text = bun.default_allocator.dupe(u8, original.status_text) catch unreachable; @@ -723,29 +727,44 @@ const DocumentHandler = struct { "onEndCallback", ); - pub fn init(global: *JSGlobalObject, thisObject: JSValue, exception: JSC.C.ExceptionRef) DocumentHandler { + pub fn init(global: *JSGlobalObject, thisObject: JSValue, exception: JSC.C.ExceptionRef) !DocumentHandler { var handler = DocumentHandler{ .thisObject = thisObject, .global = global, }; - switch (thisObject.jsType()) { - .Object, .ProxyObject, .Cell, .FinalObject => {}, - else => |kind| { - JSC.throwInvalidArguments( - "Expected object but received {s}", - .{@as(string, @tagName(kind))}, - global, - exception, - ); - return undefined; - }, + if (!thisObject.isObject()) { + JSC.throwInvalidArguments( + "Expected object", + .{}, + global, + exception, + ); + return error.InvalidArguments; + } + + errdefer { + if (handler.onDocTypeCallback) |cb| { + cb.unprotect(); + } + + if (handler.onCommentCallback) |cb| { + cb.unprotect(); + } + + if (handler.onTextCallback) |cb| { + cb.unprotect(); + } + + if (handler.onEndCallback) |cb| { + cb.unprotect(); + } } if (thisObject.get(global, "doctype")) |val| { if (val.isUndefinedOrNull() or !val.isCell() or !val.isCallable(global.vm())) { JSC.throwInvalidArguments("doctype must be a function", .{}, global, exception); - return undefined; + return error.InvalidArguments; } JSC.C.JSValueProtect(global, val.asObjectRef()); handler.onDocTypeCallback = val; @@ -754,7 +773,7 @@ const DocumentHandler = struct { if (thisObject.get(global, "comments")) |val| { if (val.isUndefinedOrNull() or !val.isCell() or !val.isCallable(global.vm())) { JSC.throwInvalidArguments("comments must be a function", .{}, global, exception); - return undefined; + return error.InvalidArguments; } JSC.C.JSValueProtect(global, val.asObjectRef()); handler.onCommentCallback = val; @@ -763,7 +782,7 @@ const DocumentHandler = struct { if (thisObject.get(global, "text")) |val| { if (val.isUndefinedOrNull() or !val.isCell() or !val.isCallable(global.vm())) { JSC.throwInvalidArguments("text must be a function", .{}, global, exception); - return undefined; + return error.InvalidArguments; } JSC.C.JSValueProtect(global, val.asObjectRef()); handler.onTextCallback = val; @@ -772,7 +791,7 @@ const DocumentHandler = struct { if (thisObject.get(global, "end")) |val| { if (val.isUndefinedOrNull() or !val.isCell() or !val.isCallable(global.vm())) { JSC.throwInvalidArguments("end must be a function", .{}, global, exception); - return undefined; + return error.InvalidArguments; } JSC.C.JSValueProtect(global, val.asObjectRef()); handler.onEndCallback = val; @@ -863,29 +882,39 @@ const ElementHandler = struct { global: *JSGlobalObject, ctx: ?*HTMLRewriter.BufferOutputSink = null, - pub fn init(global: *JSGlobalObject, thisObject: JSValue, exception: JSC.C.ExceptionRef) ElementHandler { + pub fn init(global: *JSGlobalObject, thisObject: JSValue, exception: JSC.C.ExceptionRef) !ElementHandler { var handler = ElementHandler{ .thisObject = thisObject, .global = global, }; + errdefer { + if (handler.onCommentCallback) |cb| { + cb.unprotect(); + } - switch (thisObject.jsType()) { - .Object, .ProxyObject, .Cell, .FinalObject => {}, - else => |kind| { - JSC.throwInvalidArguments( - "Expected object but received {s}", - .{@as(string, @tagName(kind))}, - global, - exception, - ); - return undefined; - }, + if (handler.onElementCallback) |cb| { + cb.unprotect(); + } + + if (handler.onTextCallback) |cb| { + cb.unprotect(); + } + } + + if (!thisObject.isObject()) { + JSC.throwInvalidArguments( + "Expected object", + .{}, + global, + exception, + ); + return error.InvalidArguments; } if (thisObject.get(global, "element")) |val| { if (val.isUndefinedOrNull() or !val.isCell() or !val.isCallable(global.vm())) { JSC.throwInvalidArguments("element must be a function", .{}, global, exception); - return undefined; + return error.InvalidArguments; } JSC.C.JSValueProtect(global, val.asObjectRef()); handler.onElementCallback = val; @@ -894,7 +923,7 @@ const ElementHandler = struct { if (thisObject.get(global, "comments")) |val| { if (val.isUndefinedOrNull() or !val.isCell() or !val.isCallable(global.vm())) { JSC.throwInvalidArguments("comments must be a function", .{}, global, exception); - return undefined; + return error.InvalidArguments; } JSC.C.JSValueProtect(global, val.asObjectRef()); handler.onCommentCallback = val; @@ -903,7 +932,7 @@ const ElementHandler = struct { if (thisObject.get(global, "text")) |val| { if (val.isUndefinedOrNull() or !val.isCell() or !val.isCallable(global.vm())) { JSC.throwInvalidArguments("text must be a function", .{}, global, exception); - return undefined; + return error.InvalidArguments; } JSC.C.JSValueProtect(global, val.asObjectRef()); handler.onTextCallback = val; diff --git a/test/js/workerd/html-rewriter.test.js b/test/js/workerd/html-rewriter.test.js index b6131a09f..3f7b7493d 100644 --- a/test/js/workerd/html-rewriter.test.js +++ b/test/js/workerd/html-rewriter.test.js @@ -316,3 +316,24 @@ describe("HTMLRewriter", () => { expect(lastInTextNode).toBeBoolean(); }); }); + +// By not segfaulting, this test passes +it("#3334 regression", async () => { + for (let i = 0; i < 10; i++) { + const headers = new Headers({ + "content-type": "text/html", + }); + const response = new Response("
content
", { headers }); + + const result = await new HTMLRewriter() + .on("div", { + element(elem) { + elem.setInnerContent("new"); + }, + }) + .transform(response) + .text(); + expect(result).toEqual("
new
"); + } + Bun.gc(true); +}); -- cgit v1.2.3 From b05879e9e2e9d52359eb91a1305c1e694169c030 Mon Sep 17 00:00:00 2001 From: Jarred Sumner Date: Sun, 2 Jul 2023 01:06:40 -0700 Subject: Fixes #3489 (#3490) Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> --- src/bun.js/api/html_rewriter.zig | 35 ++++++++--------------------------- src/deps/lol-html.zig | 22 +++++++++++++++++++++- test/js/workerd/html-rewriter.test.js | 28 ++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 28 deletions(-) (limited to 'test/js/workerd/html-rewriter.test.js') diff --git a/src/bun.js/api/html_rewriter.zig b/src/bun.js/api/html_rewriter.zig index 5e14d5503..532e0ae1e 100644 --- a/src/bun.js/api/html_rewriter.zig +++ b/src/bun.js/api/html_rewriter.zig @@ -996,26 +996,14 @@ const getterWrap = JSC.getterWrap; const setterWrap = JSC.setterWrap; const wrap = JSC.wrapSync; -pub fn free_html_writer_string(_: ?*anyopaque, ptr: ?*anyopaque, len: usize) callconv(.C) void { - var str = LOLHTML.HTMLString{ .ptr = bun.cast([*]const u8, ptr.?), .len = len }; - str.deinit(); -} - fn throwLOLHTMLError(global: *JSGlobalObject) JSValue { - var err = LOLHTML.HTMLString.lastError(); - return ZigString.init(err.slice()).toErrorInstance(global); + const err = LOLHTML.HTMLString.lastError(); + defer err.deinit(); + return ZigString.fromUTF8(err.slice()).toErrorInstance(global); } fn htmlStringValue(input: LOLHTML.HTMLString, globalObject: *JSGlobalObject) JSValue { - var str = ZigString.init( - input.slice(), - ); - str.detectEncoding(); - - return str.toExternalValueWithCallback( - globalObject, - free_html_writer_string, - ); + return input.toJS(globalObject); } pub const TextChunk = struct { @@ -1328,7 +1316,7 @@ pub const Comment = struct { pub fn getText(this: *Comment, global: *JSGlobalObject) JSValue { if (this.comment == null) return JSValue.jsNull(); - return ZigString.init(this.comment.?.getText().slice()).withEncoding().toValueGC(global); + return this.comment.?.getText().toJS(global); } pub fn setText( @@ -1458,7 +1446,7 @@ pub const EndTag = struct { if (this.end_tag == null) return JSC.JSValue.jsUndefined(); - return ZigString.init(this.end_tag.?.getName().slice()).withEncoding().toValueGC(global); + return this.end_tag.?.getName().toJS(global); } pub fn setName( @@ -1696,19 +1684,12 @@ pub const Element = struct { var slice = name.toSlice(bun.default_allocator); defer slice.deinit(); - var attr = this.element.?.getAttribute(slice.slice()).slice(); + var attr = this.element.?.getAttribute(slice.slice()); if (attr.len == 0) return JSC.JSValue.jsNull(); - var str = ZigString.init( - attr, - ); - - return str.toExternalValueWithCallback( - globalObject, - free_html_writer_string, - ); + return attr.toJS(globalObject); } /// Returns a boolean indicating whether an attribute exists on the element. diff --git a/src/deps/lol-html.zig b/src/deps/lol-html.zig index 9c5345046..50dab9592 100644 --- a/src/deps/lol-html.zig +++ b/src/deps/lol-html.zig @@ -1,6 +1,6 @@ pub const Error = error{Fail}; const std = @import("std"); - +const bun = @import("root").bun; pub const MemorySettings = extern struct { preallocated_parsing_buffer_size: usize, max_allowed_memory_usage: usize, @@ -563,6 +563,26 @@ pub const HTMLString = extern struct { @setRuntimeSafety(false); return this.ptr[0..this.len]; } + + fn deinit_external(ctx: *anyopaque, ptr: *anyopaque, len: u32) callconv(.C) void { + _ = ctx; + auto_disable(); + lol_html_str_free(.{ .ptr = @ptrCast([*]const u8, ptr), .len = len }); + } + + pub fn toJS(this: HTMLString, globalThis: *bun.JSC.JSGlobalObject) bun.JSC.JSValue { + const bytes = this.slice(); + if (bun.strings.isAllASCII(bytes)) { + var external = bun.String.createExternal(bytes, true, @constCast(bytes.ptr), &deinit_external); + defer external.deref(); + return external.toJS(globalThis); + } + defer this.deinit(); + + var str = bun.String.create(bytes); + defer str.deref(); + return str.toJS(globalThis); + } }; pub const EndTag = opaque { diff --git a/test/js/workerd/html-rewriter.test.js b/test/js/workerd/html-rewriter.test.js index 3f7b7493d..aaf912aff 100644 --- a/test/js/workerd/html-rewriter.test.js +++ b/test/js/workerd/html-rewriter.test.js @@ -337,3 +337,31 @@ it("#3334 regression", async () => { } Bun.gc(true); }); + +it("#3489", async () => { + var el; + await new HTMLRewriter() + .on("p", { + element(element) { + el = element.getAttribute("id"); + }, + }) + .transform(new Response('

')) + .text(); + expect(el).toEqual("Šžõäöü"); +}); + +it("get attribute - ascii", async () => { + for (let i = 0; i < 10; i++) { + var el; + await new HTMLRewriter() + .on("p", { + element(element) { + el = element.getAttribute("id"); + }, + }) + .transform(new Response(`

`)) + .text(); + expect(el).toEqual("asciii"); + } +}); -- cgit v1.2.3 From a7f5a91cfbf93a31cc53974a6c27185e57a57f64 Mon Sep 17 00:00:00 2001 From: Jarred Sumner Date: Wed, 5 Jul 2023 00:08:59 -0700 Subject: Fixes #3520 (#3522) * Fixes #3520 * Update html_rewriter.zig --------- Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> --- src/bun.js/api/html_rewriter.zig | 26 ++++++++------------------ src/deps/lol-html.zig | 11 ++++++----- test/js/workerd/html-rewriter.test.js | 23 +++++++++++++++++++++++ 3 files changed, 37 insertions(+), 23 deletions(-) (limited to 'test/js/workerd/html-rewriter.test.js') diff --git a/src/bun.js/api/html_rewriter.zig b/src/bun.js/api/html_rewriter.zig index 532e0ae1e..b309e07d7 100644 --- a/src/bun.js/api/html_rewriter.zig +++ b/src/bun.js/api/html_rewriter.zig @@ -1558,27 +1558,16 @@ pub const AttributeIterator = struct { return JSC.JSValue.jsNull(); }; - // TODO: don't clone here const value = attribute.value(); const name = attribute.name(); - defer name.deinit(); - defer value.deinit(); - var strs = [2]ZigString{ - ZigString.init(name.slice()), - ZigString.init(value.slice()), - }; - - var valid_strs: []ZigString = strs[0..2]; - - var array = JSC.JSValue.createStringArray( + return bun.String.toJSArray( globalObject, - valid_strs.ptr, - valid_strs.len, - true, + &[_]bun.String{ + name.toString(), + value.toString(), + }, ); - - return array; } }; pub const Element = struct { @@ -1864,8 +1853,9 @@ pub const Element = struct { pub fn getNamespaceURI(this: *Element, globalObject: *JSGlobalObject) JSValue { if (this.element == null) return JSValue.jsUndefined(); - - return ZigString.init(std.mem.span(this.element.?.namespaceURI())).toValueGC(globalObject); + var str = bun.String.create(std.mem.span(this.element.?.namespaceURI())); + defer str.deref(); + return str.toJS(globalObject); } pub fn getAttributes(this: *Element, globalObject: *JSGlobalObject) JSValue { diff --git a/src/deps/lol-html.zig b/src/deps/lol-html.zig index 50dab9592..bac422fb0 100644 --- a/src/deps/lol-html.zig +++ b/src/deps/lol-html.zig @@ -570,16 +570,17 @@ pub const HTMLString = extern struct { lol_html_str_free(.{ .ptr = @ptrCast([*]const u8, ptr), .len = len }); } - pub fn toJS(this: HTMLString, globalThis: *bun.JSC.JSGlobalObject) bun.JSC.JSValue { + pub fn toString(this: HTMLString) bun.String { const bytes = this.slice(); if (bun.strings.isAllASCII(bytes)) { - var external = bun.String.createExternal(bytes, true, @constCast(bytes.ptr), &deinit_external); - defer external.deref(); - return external.toJS(globalThis); + return bun.String.createExternal(bytes, true, @constCast(bytes.ptr), &deinit_external); } defer this.deinit(); + return bun.String.create(bytes); + } - var str = bun.String.create(bytes); + pub fn toJS(this: HTMLString, globalThis: *bun.JSC.JSGlobalObject) bun.JSC.JSValue { + var str = this.toString(); defer str.deref(); return str.toJS(globalThis); } diff --git a/test/js/workerd/html-rewriter.test.js b/test/js/workerd/html-rewriter.test.js index aaf912aff..44961df3b 100644 --- a/test/js/workerd/html-rewriter.test.js +++ b/test/js/workerd/html-rewriter.test.js @@ -365,3 +365,26 @@ it("get attribute - ascii", async () => { expect(el).toEqual("asciii"); } }); + +it("#3520", async () => { + const pairs = []; + + await new HTMLRewriter() + .on("p", { + element(element) { + for (const pair of element.attributes) { + pairs.push(pair); + } + }, + }) + .transform(new Response('

')) + .text(); + + expect(pairs).toEqual([ + ["šž", "Õäöü"], + ["ab", "Õäöü"], + ["šž", "Õäöü"], + ["šž", "dc"], + ["šž", "🕵🏻"], + ]); +}); -- cgit v1.2.3