aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <jarred@jarredsumner.com> 2023-07-21 23:27:28 -0700
committerGravatar GitHub <noreply@github.com> 2023-07-21 23:27:28 -0700
commit636cec03e10ab487b1df3057aaab60b4d2b02c99 (patch)
treefbf10b89dcd64270ba7fee1d78efba2b5caf2414
parent1ecd9f8a18da1af9f2090791b15a18ff3e68411d (diff)
downloadbun-636cec03e10ab487b1df3057aaab60b4d2b02c99.tar.gz
bun-636cec03e10ab487b1df3057aaab60b4d2b02c99.tar.zst
bun-636cec03e10ab487b1df3057aaab60b4d2b02c99.zip
Use WebKit's URL parser in fetch() and `bun install` (#3730)
* Use WebKit's URL parser in fetch() and `bun install` * Allocate less memory * Fix test --------- Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
-rw-r--r--src/bun.js/bindings/BunString.cpp109
-rw-r--r--src/bun.js/bindings/bindings.zig83
-rw-r--r--src/bun.js/webcore/response.zig103
-rw-r--r--src/install/install.zig49
-rw-r--r--src/install/npm.zig2
-rw-r--r--src/string.zig8
-rw-r--r--src/url.zig28
-rw-r--r--test/js/node/async_hooks/AsyncLocalStorage.test.ts2
8 files changed, 321 insertions, 63 deletions
diff --git a/src/bun.js/bindings/BunString.cpp b/src/bun.js/bindings/BunString.cpp
index 0676dce19..714f10080 100644
--- a/src/bun.js/bindings/BunString.cpp
+++ b/src/bun.js/bindings/BunString.cpp
@@ -297,4 +297,113 @@ extern "C" void BunString__toWTFString(BunString* bunString)
bunString->impl.wtf = Zig::toStringStatic(bunString->impl.zig).impl();
bunString->tag = BunStringTag::WTFStringImpl;
}
+}
+
+extern "C" WTF::URL* URL__fromJS(EncodedJSValue encodedValue, JSC::JSGlobalObject* globalObject)
+{
+ auto throwScope = DECLARE_THROW_SCOPE(globalObject->vm());
+ JSC::JSValue value = JSC::JSValue::decode(encodedValue);
+ auto str = value.toWTFString(globalObject);
+ RETURN_IF_EXCEPTION(throwScope, nullptr);
+ if (str.isEmpty()) {
+ return nullptr;
+ }
+
+ auto url = WTF::URL(str);
+ if (!url.isValid() || url.isNull())
+ return nullptr;
+
+ return new WTF::URL(WTFMove(url));
+}
+
+extern "C" BunString URL__getHrefFromJS(EncodedJSValue encodedValue, JSC::JSGlobalObject* globalObject)
+{
+ auto throwScope = DECLARE_THROW_SCOPE(globalObject->vm());
+ JSC::JSValue value = JSC::JSValue::decode(encodedValue);
+ auto str = value.toWTFString(globalObject);
+ RETURN_IF_EXCEPTION(throwScope, { BunStringTag::Dead });
+ if (str.isEmpty()) {
+ return { BunStringTag::Dead };
+ }
+
+ auto url = WTF::URL(str);
+ if (!url.isValid() || url.isEmpty())
+ return { BunStringTag::Dead };
+
+ return Bun::toStringRef(url.string());
+}
+
+extern "C" BunString URL__getHref(BunString* input)
+{
+ auto str = Bun::toWTFString(*input);
+ auto url = WTF::URL(str);
+ if (!url.isValid() || url.isEmpty())
+ return { BunStringTag::Dead };
+
+ return Bun::toStringRef(url.string());
+}
+
+extern "C" WTF::URL* URL__fromString(BunString* input)
+{
+ auto str = Bun::toWTFString(*input);
+ auto url = WTF::URL(str);
+ if (!url.isValid())
+ return nullptr;
+
+ return new WTF::URL(WTFMove(url));
+}
+
+extern "C" BunString URL__protocol(WTF::URL* url)
+{
+ return Bun::toStringRef(url->protocol().toStringWithoutCopying());
+}
+
+extern "C" void URL__deinit(WTF::URL* url)
+{
+ delete url;
+}
+
+extern "C" BunString URL__href(WTF::URL* url)
+{
+ return Bun::toStringRef(url->string());
+}
+
+extern "C" BunString URL__username(WTF::URL* url)
+{
+ return Bun::toStringRef(url->user());
+}
+
+extern "C" BunString URL__password(WTF::URL* url)
+{
+ return Bun::toStringRef(url->password());
+}
+
+extern "C" BunString URL__search(WTF::URL* url)
+{
+ return Bun::toStringRef(url->query().toStringWithoutCopying());
+}
+
+extern "C" BunString URL__host(WTF::URL* url)
+{
+ return Bun::toStringRef(url->host().toStringWithoutCopying());
+}
+extern "C" BunString URL__hostname(WTF::URL* url)
+{
+ return Bun::toStringRef(url->hostAndPort());
+}
+
+extern "C" uint32_t URL__port(WTF::URL* url)
+{
+ auto port = url->port();
+
+ if (port.has_value()) {
+ return port.value();
+ }
+
+ return std::numeric_limits<uint32_t>::max();
+}
+
+extern "C" BunString URL__pathname(WTF::URL* url)
+{
+ return Bun::toStringRef(url->path().toStringWithoutCopying());
} \ No newline at end of file
diff --git a/src/bun.js/bindings/bindings.zig b/src/bun.js/bindings/bindings.zig
index 1d5517dac..edd1be1a9 100644
--- a/src/bun.js/bindings/bindings.zig
+++ b/src/bun.js/bindings/bindings.zig
@@ -5384,6 +5384,89 @@ pub fn untrackFunction(
return private.Bun__untrackFFIFunction(globalObject, value);
}
+pub const URL = opaque {
+ extern fn URL__fromJS(JSValue, *JSC.JSGlobalObject) ?*URL;
+ extern fn URL__fromString(*bun.String) ?*URL;
+ extern fn URL__protocol(*URL) String;
+ extern fn URL__href(*URL) String;
+ extern fn URL__username(*URL) String;
+ extern fn URL__password(*URL) String;
+ extern fn URL__search(*URL) String;
+ extern fn URL__host(*URL) String;
+ extern fn URL__hostname(*URL) String;
+ extern fn URL__port(*URL) String;
+ extern fn URL__deinit(*URL) void;
+ extern fn URL__pathname(*URL) String;
+ extern fn URL__getHrefFromJS(JSValue, *JSC.JSGlobalObject) String;
+ extern fn URL__getHref(*String) String;
+ pub fn hrefFromString(str: bun.String) String {
+ JSC.markBinding(@src());
+ var input = str;
+ return URL__getHref(&input);
+ }
+
+ /// This percent-encodes the URL, punycode-encodes the hostname, and returns the result
+ /// If it fails, the tag is marked Dead
+ pub fn hrefFromJS(value: JSValue, globalObject: *JSC.JSGlobalObject) String {
+ JSC.markBinding(@src());
+ return URL__getHrefFromJS(value, globalObject);
+ }
+
+ pub fn fromJS(value: JSValue, globalObject: *JSC.JSGlobalObject) ?*URL {
+ JSC.markBinding(@src());
+ return URL__fromJS(value, globalObject);
+ }
+
+ pub fn fromUTF8(input: []const u8) ?*URL {
+ return fromString(String.fromUTF8(input));
+ }
+ pub fn fromString(str: bun.String) ?*URL {
+ JSC.markBinding(@src());
+ var input = str;
+ return URL__fromString(&input);
+ }
+ pub fn protocol(url: *URL) String {
+ JSC.markBinding(@src());
+ return URL__protocol(url);
+ }
+ pub fn href(url: *URL) String {
+ JSC.markBinding(@src());
+ return URL__href(url);
+ }
+ pub fn username(url: *URL) String {
+ JSC.markBinding(@src());
+ return URL__username(url);
+ }
+ pub fn password(url: *URL) String {
+ JSC.markBinding(@src());
+ return URL__password(url);
+ }
+ pub fn search(url: *URL) String {
+ JSC.markBinding(@src());
+ return URL__search(url);
+ }
+ pub fn host(url: *URL) String {
+ JSC.markBinding(@src());
+ return URL__host(url);
+ }
+ pub fn hostname(url: *URL) String {
+ JSC.markBinding(@src());
+ return URL__hostname(url);
+ }
+ pub fn port(url: *URL) String {
+ JSC.markBinding(@src());
+ return URL__port(url);
+ }
+ pub fn deinit(url: *URL) void {
+ JSC.markBinding(@src());
+ return URL__deinit(url);
+ }
+ pub fn pathname(url: *URL) String {
+ JSC.markBinding(@src());
+ return URL__pathname(url);
+ }
+};
+
pub const URLSearchParams = opaque {
extern fn URLSearchParams__create(globalObject: *JSGlobalObject, *const ZigString) JSValue;
pub fn create(globalObject: *JSGlobalObject, init: ZigString) JSValue {
diff --git a/src/bun.js/webcore/response.zig b/src/bun.js/webcore/response.zig
index 5987f749b..ab2a6010d 100644
--- a/src/bun.js/webcore/response.zig
+++ b/src/bun.js/webcore/response.zig
@@ -1040,9 +1040,7 @@ pub const Fetch = struct {
if (first_arg.as(Request)) |request| {
request.ensureURL() catch unreachable;
- var url_slice = bun.default_allocator.dupe(u8, request.url) catch unreachable;
-
- if (url_slice.len == 0) {
+ if (request.url.len == 0) {
const err = JSC.toTypeError(.ERR_INVALID_ARG_VALUE, fetch_error_blank_url, .{}, ctx);
// clean hostname if any
if (hostname) |host| {
@@ -1051,8 +1049,19 @@ pub const Fetch = struct {
return JSPromise.rejectedPromiseValue(globalThis, err);
}
- url = ZigURL.parse(url_slice);
- url_proxy_buffer = url_slice;
+ url = ZigURL.fromUTF8(bun.default_allocator, request.url) catch {
+ const err = JSC.toTypeError(.ERR_INVALID_ARG_VALUE, "fetch() URL is invalid", .{}, ctx);
+ // clean hostname if any
+ if (hostname) |host| {
+ bun.default_allocator.free(host);
+ }
+
+ return JSPromise.rejectedPromiseValue(
+ globalThis,
+ err,
+ );
+ };
+ url_proxy_buffer = url.href;
if (args.nextEat()) |options| {
if (options.isObject() or options.jsType() == .DOMWrapper) {
@@ -1138,22 +1147,25 @@ pub const Fetch = struct {
if (options.get(globalThis, "proxy")) |proxy_arg| {
if (proxy_arg.isString() and proxy_arg.getLength(ctx) > 0) {
- defer bun.default_allocator.free(url_slice);
-
- var proxy_str = proxy_arg.toStringOrNull(globalThis) orelse return .zero;
- var proxy_url_zig = proxy_str.toSlice(globalThis, bun.default_allocator);
- defer proxy_url_zig.deinit();
-
- var buffer = getAllocator(ctx).alloc(u8, url_slice.len + proxy_url_zig.len) catch {
- JSC.JSError(bun.default_allocator, "Out of memory", .{}, ctx, exception);
+ var href = JSC.URL.hrefFromJS(proxy_arg, globalThis);
+ if (href.tag == .Dead) {
+ const err = JSC.toTypeError(.ERR_INVALID_ARG_VALUE, "fetch() proxy URL is invalid", .{}, ctx);
+ // clean hostname if any
+ if (hostname) |host| {
+ bun.default_allocator.free(host);
+ }
+ bun.default_allocator.free(url_proxy_buffer);
+
+ return JSPromise.rejectedPromiseValue(globalThis, err);
+ }
+ defer href.deref();
+ var buffer = std.fmt.allocPrint(bun.default_allocator, "{s}{}", .{ url_proxy_buffer, href }) catch {
+ globalThis.throwOutOfMemory();
return .zero;
};
- @memcpy(buffer[0..url_slice.len], url_slice);
- var proxy_url_slice = buffer[url_slice.len..];
- @memcpy(proxy_url_slice[0..proxy_url_zig.len], proxy_url_zig.ptr[0..proxy_url_zig.len]);
-
- url = ZigURL.parse(buffer[0..url_slice.len]);
- proxy = ZigURL.parse(proxy_url_slice);
+ url = ZigURL.parse(buffer[0..url.href.len]);
+ proxy = ZigURL.parse(buffer[url.href.len..]);
+ bun.default_allocator.free(url_proxy_buffer);
url_proxy_buffer = buffer;
}
}
@@ -1172,29 +1184,25 @@ pub const Fetch = struct {
signal = signal_;
}
}
- } else if (first_arg.toStringOrNull(globalThis)) |jsstring| {
-
- // Check the URL
- var url_slice = jsstring.toSlice(globalThis, bun.default_allocator).cloneIfNeeded(bun.default_allocator) catch {
+ } else if (bun.String.tryFromJS(first_arg, globalThis)) |str| {
+ if (str.isEmpty()) {
+ const err = JSC.toTypeError(.ERR_INVALID_ARG_VALUE, fetch_error_blank_url, .{}, ctx);
// clean hostname if any
if (hostname) |host| {
bun.default_allocator.free(host);
}
- JSC.JSError(bun.default_allocator, "Out of memory", .{}, ctx, exception);
- return .zero;
- };
+ return JSPromise.rejectedPromiseValue(globalThis, err);
+ }
- if (url_slice.len == 0) {
- const err = JSC.toTypeError(.ERR_INVALID_ARG_VALUE, fetch_error_blank_url, .{}, ctx);
+ url = ZigURL.fromString(bun.default_allocator, str) catch {
// clean hostname if any
if (hostname) |host| {
bun.default_allocator.free(host);
}
+ const err = JSC.toTypeError(.ERR_INVALID_ARG_VALUE, "fetch() URL is invalid", .{}, ctx);
return JSPromise.rejectedPromiseValue(globalThis, err);
- }
-
- url = ZigURL.parse(url_slice.slice());
- url_proxy_buffer = url_slice.slice();
+ };
+ url_proxy_buffer = url.href;
if (args.nextEat()) |options| {
if (options.isObject() or options.jsType() == .DOMWrapper) {
@@ -1274,22 +1282,25 @@ pub const Fetch = struct {
if (options.getTruthy(globalThis, "proxy")) |proxy_arg| {
if (proxy_arg.isString() and proxy_arg.getLength(globalThis) > 0) {
- defer url_slice.deinit();
-
- var proxy_str = proxy_arg.toStringOrNull(globalThis) orelse return .zero;
- var proxy_url_zig = proxy_str.toSlice(globalThis, bun.default_allocator);
- defer proxy_url_zig.deinit();
-
- var buffer = getAllocator(ctx).alloc(u8, url_slice.len + proxy_url_zig.len) catch {
- JSC.JSError(bun.default_allocator, "Out of memory", .{}, ctx, exception);
+ var href = JSC.URL.hrefFromJS(proxy_arg, globalThis);
+ if (href.tag == .Dead) {
+ const err = JSC.toTypeError(.ERR_INVALID_ARG_VALUE, "fetch() proxy URL is invalid", .{}, ctx);
+ // clean hostname if any
+ if (hostname) |host| {
+ bun.default_allocator.free(host);
+ }
+ bun.default_allocator.free(url_proxy_buffer);
+
+ return JSPromise.rejectedPromiseValue(globalThis, err);
+ }
+ defer href.deref();
+ var buffer = std.fmt.allocPrint(bun.default_allocator, "{s}{}", .{ url_proxy_buffer, href }) catch {
+ globalThis.throwOutOfMemory();
return .zero;
};
- @memcpy(buffer[0..url_slice.len], url_slice.ptr[0..url_slice.len]);
- var proxy_url_slice = buffer[url_slice.len..];
- @memcpy(proxy_url_slice[0..proxy_url_zig.len], proxy_url_zig.ptr[0..proxy_url_zig.len]);
-
- url = ZigURL.parse(buffer[0..url_slice.len]);
- proxy = ZigURL.parse(proxy_url_slice);
+ url = ZigURL.parse(buffer[0..url.href.len]);
+ proxy = ZigURL.parse(buffer[url.href.len..]);
+ bun.default_allocator.free(url_proxy_buffer);
url_proxy_buffer = buffer;
}
}
diff --git a/src/install/install.zig b/src/install/install.zig
index a91a3dd93..7b8d71bad 100644
--- a/src/install/install.zig
+++ b/src/install/install.zig
@@ -4475,7 +4475,23 @@ pub const PackageManager = struct {
for (scoped.scopes, 0..) |name, i| {
var registry = scoped.registries[i];
if (registry.url.len == 0) registry.url = base.url;
- try this.registries.put(allocator, Npm.Registry.Scope.hash(name), try Npm.Registry.Scope.fromAPI(name, registry, allocator, env));
+ try this.registries.put(allocator, Npm.Registry.Scope.hash(name), Npm.Registry.Scope.fromAPI(name, registry, allocator, env) catch |err| {
+ if (err == error.InvalidURL) {
+ log.addErrorFmt(
+ null,
+ logger.Loc.Empty,
+ allocator,
+ "{} is not a valid registry URL",
+ .{
+ strings.QuotedFormatter{
+ .text = registry.url,
+ },
+ },
+ ) catch unreachable;
+ }
+
+ return err;
+ });
}
}
@@ -4588,12 +4604,21 @@ pub const PackageManager = struct {
{
const prev_scope = this.scope;
var api_registry = std.mem.zeroes(Api.NpmRegistry);
- api_registry.url = registry_;
- api_registry.token = prev_scope.token;
- this.scope = try Npm.Registry.Scope.fromAPI("", api_registry, allocator, env);
- did_set = true;
- // stage1 bug: break inside inline is broken
- // break :load_registry;
+ var href = bun.JSC.URL.hrefFromString(bun.String.fromUTF8(registry_));
+ if (href.tag == .Dead) {
+ try log.addErrorFmt(null, logger.Loc.Empty, bun.default_allocator, "${s} has invalid URL {}", .{
+ registry_key, strings.QuotedFormatter{
+ .text = registry_,
+ },
+ });
+ } else {
+ defer href.deref();
+
+ api_registry.token = prev_scope.token;
+ this.scope = try Npm.Registry.Scope.fromAPI("", api_registry, allocator, env);
+ api_registry.url = try href.toOwnedSlice(bun.default_allocator);
+ did_set = true;
+ }
}
}
}
@@ -4626,7 +4651,15 @@ pub const PackageManager = struct {
if (cli.registry.len > 0 and strings.startsWith(cli.registry, "https://") or
strings.startsWith(cli.registry, "http://"))
{
- this.scope.url = URL.parse(cli.registry);
+ if (URL.fromUTF8(instance.allocator, cli.registry)) |url| {
+ this.scope.url = url;
+ } else |_| {
+ try log.addErrorFmt(null, logger.Loc.Empty, bun.default_allocator, "--registry has invalid URL {}", .{
+ strings.QuotedFormatter{
+ .text = cli.registry,
+ },
+ });
+ }
}
if (cli.exact) {
diff --git a/src/install/npm.zig b/src/install/npm.zig
index 0a25fe636..b135a9548 100644
--- a/src/install/npm.zig
+++ b/src/install/npm.zig
@@ -68,7 +68,7 @@ pub const Registry = struct {
pub fn fromAPI(name: string, registry_: Api.NpmRegistry, allocator: std.mem.Allocator, env: *DotEnv.Loader) !Scope {
var registry = registry_;
- var url = URL.parse(registry.url);
+ var url = try URL.fromUTF8(bun.default_allocator, registry.url);
var auth: string = "";
if (registry.token.len == 0) {
diff --git a/src/string.zig b/src/string.zig
index e592537c2..1e2ee752e 100644
--- a/src/string.zig
+++ b/src/string.zig
@@ -280,15 +280,15 @@ pub const String = extern struct {
switch (this.tag) {
.ZigString => return try this.value.ZigString.toOwnedSlice(allocator),
.WTFStringImpl => {
- var utf8_slice = this.value.WTFStringImpl.toUTF8(allocator);
+ var utf8_slice = this.value.WTFStringImpl.toUTF8WithoutRef(allocator);
if (utf8_slice.allocator.get()) |alloc| {
- if (isWTFAllocator(alloc)) {
- return @constCast((try utf8_slice.clone(allocator)).slice());
+ if (!isWTFAllocator(alloc)) {
+ return @constCast(utf8_slice.slice());
}
}
- return @constCast(utf8_slice.slice());
+ return @constCast((try utf8_slice.clone(allocator)).slice());
},
.StaticZigString => return try this.value.StaticZigString.toOwnedSlice(allocator),
.Empty => return &[_]u8{},
diff --git a/src/url.zig b/src/url.zig
index 605f438ee..dcb1e91cd 100644
--- a/src/url.zig
+++ b/src/url.zig
@@ -11,6 +11,7 @@ const MutableString = bun.MutableString;
const stringZ = bun.stringZ;
const default_allocator = bun.default_allocator;
const C = bun.C;
+const JSC = bun.JSC;
// This is close to WHATWG URL, but we don't want the validation errors
pub const URL = struct {
@@ -35,6 +36,29 @@ pub const URL = struct {
username: string = "",
port_was_automatically_set: bool = false,
+ pub fn fromJS(js_value: JSC.JSValue, globalObject: *JSC.JSGlobalObject, allocator: std.mem.Allocator) !URL {
+ var href = JSC.URL.hrefFromJS(globalObject, js_value);
+ if (href.tag == .Dead) {
+ return error.InvalidURL;
+ }
+
+ return URL.parse(try href.toOwnedSlice(allocator));
+ }
+
+ pub fn fromString(allocator: std.mem.Allocator, input: bun.String) !URL {
+ var href = JSC.URL.hrefFromString(input);
+ if (href.tag == .Dead) {
+ return error.InvalidURL;
+ }
+
+ defer href.deref();
+ return URL.parse(try href.toOwnedSlice(allocator));
+ }
+
+ pub fn fromUTF8(allocator: std.mem.Allocator, input: []const u8) !URL {
+ return fromString(allocator, bun.String.fromUTF8(input));
+ }
+
pub fn isLocalhost(this: *const URL) bool {
return this.hostname.len == 0 or strings.eqlComptime(this.hostname, "localhost") or strings.eqlComptime(this.hostname, "0.0.0.0");
}
@@ -197,8 +221,7 @@ pub const URL = struct {
}
}
- pub fn parse(base_: string) URL {
- const base = std.mem.trim(u8, base_, &std.ascii.whitespace);
+ pub fn parse(base: string) URL {
if (base.len == 0) return URL{};
var url = URL{};
url.href = base;
@@ -970,7 +993,6 @@ pub const FormData = struct {
.Multipart => |boundary| return toJSFromMultipartData(globalThis, input, boundary),
}
}
- const JSC = bun.JSC;
pub fn jsFunctionFromMultipartData(
globalThis: *JSC.JSGlobalObject,
diff --git a/test/js/node/async_hooks/AsyncLocalStorage.test.ts b/test/js/node/async_hooks/AsyncLocalStorage.test.ts
index 1a7aad050..48f4cf16d 100644
--- a/test/js/node/async_hooks/AsyncLocalStorage.test.ts
+++ b/test/js/node/async_hooks/AsyncLocalStorage.test.ts
@@ -264,7 +264,7 @@ describe("async context passes through", () => {
},
});
- const response = await fetch(server.hostname + ":" + server.port);
+ const response = await fetch("http://" + server.hostname + ":" + server.port);
expect(await response.text()).toBe("value");
expect(s.getStore()).toBe("value");