diff options
author | 2023-05-31 00:43:59 -0700 | |
---|---|---|
committer | 2023-05-31 00:43:59 -0700 | |
commit | 557aac6a34bcd06bba98060f67bb7295d6c462d9 (patch) | |
tree | 02d3581f08497dd245b6b6ea8e5b58cd29dc4784 | |
parent | 0aab11a95d1e81f786a25a03d730987181f4d661 (diff) | |
download | bun-557aac6a34bcd06bba98060f67bb7295d6c462d9.tar.gz bun-557aac6a34bcd06bba98060f67bb7295d6c462d9.tar.zst bun-557aac6a34bcd06bba98060f67bb7295d6c462d9.zip |
Support FormData & file uploads in `fetch` body (#3123)
* Fixes #2264
* fixup
* Don't leak HTTP headers
* Include the mime type. It's cleaner
---------
Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
-rw-r--r-- | src/bun.js/webcore/blob.zig | 45 | ||||
-rw-r--r-- | src/bun.js/webcore/response.zig | 175 | ||||
-rw-r--r-- | src/url.zig | 14 | ||||
-rw-r--r-- | test/js/bun/http/fetch-file-upload.test.ts | 51 | ||||
-rw-r--r-- | test/js/bun/http/serve.test.ts | 52 | ||||
-rw-r--r-- | test/js/web/html/FormData.test.ts | 119 |
6 files changed, 406 insertions, 50 deletions
diff --git a/src/bun.js/webcore/blob.zig b/src/bun.js/webcore/blob.zig index dd19e1da0..a5d3c968d 100644 --- a/src/bun.js/webcore/blob.zig +++ b/src/bun.js/webcore/blob.zig @@ -85,6 +85,7 @@ pub const Blob = struct { store: ?*Store = null, content_type: string = "", content_type_allocated: bool = false, + content_type_was_set: bool = false, /// JavaScriptCore strings are either latin1 or UTF-16 /// When UTF-16, they're nearly always due to non-ascii characters @@ -111,6 +112,10 @@ pub const Blob = struct { return bun.FormData.AsyncFormData.init(this.allocator orelse bun.default_allocator, encoding) catch unreachable; } + pub fn hasContentTypeFromUser(this: *const Blob) bool { + return this.content_type_was_set or (this.store != null and this.store.?.data == .file); + } + const FormDataContext = struct { allocator: std.mem.Allocator, joiner: StringJoiner, @@ -228,6 +233,7 @@ pub const Blob = struct { var blob = Blob.initWithStore(store, globalThis); blob.content_type = store.mime_type.value; + blob.content_type_was_set = true; return blob; } @@ -268,6 +274,7 @@ pub const Blob = struct { var blob = Blob.initWithStore(store, globalThis); blob.content_type = std.fmt.allocPrint(allocator, "multipart/form-data; boundary=\"{s}\"", .{boundary}) catch unreachable; blob.content_type_allocated = true; + blob.content_type_was_set = true; return blob; } @@ -288,7 +295,7 @@ pub const Blob = struct { export fn Blob__dupe(ptr: *anyopaque) *Blob { var this = bun.cast(*Blob, ptr); var new = bun.default_allocator.create(Blob) catch unreachable; - new.* = this.dupe(); + new.* = this.dupeWithContentType(true); new.allocator = bun.default_allocator; return new; } @@ -2527,6 +2534,7 @@ pub const Blob = struct { blob.content_type = content_type; blob.content_type_allocated = content_type_was_allocated; + blob.content_type_was_set = this.content_type_was_set or content_type_was_allocated; var blob_ = allocator.create(Blob) catch unreachable; blob_.* = blob; @@ -2754,6 +2762,8 @@ pub const Blob = struct { if (!strings.isAllASCII(slice)) { break :inner; } + blob.content_type_was_set = true; + if (globalThis.bunVM().mimeType(slice)) |mime| { blob.content_type = mime.value; break :inner; @@ -2769,6 +2779,7 @@ pub const Blob = struct { if (blob.content_type.len == 0) { blob.content_type = ""; + blob.content_type_was_set = false; } }, } @@ -2870,8 +2881,33 @@ pub const Blob = struct { /// This creates a new view /// and increment the reference count pub fn dupe(this: *const Blob) Blob { + return this.dupeWithContentType(false); + } + + pub fn dupeWithContentType(this: *const Blob, include_content_type: bool) Blob { if (this.store != null) this.store.?.ref(); var duped = this.*; + if (duped.content_type_allocated and duped.allocator != null and !include_content_type) { + + // for now, we just want to avoid a use-after-free here + if (JSC.VirtualMachine.get().mimeType(duped.content_type)) |mime| { + duped.content_type = mime.value; + } else { + // TODO: fix this + // this is a bug. + // it means whenever + duped.content_type = ""; + } + + duped.content_type_allocated = false; + duped.content_type_was_set = false; + if (this.content_type_was_set) { + duped.content_type_was_set = duped.content_type.len > 0; + } + } else if (duped.content_type_allocated and duped.allocator != null and include_content_type) { + duped.content_type = bun.default_allocator.dupe(u8, this.content_type) catch @panic("Out of memory"); + } + duped.allocator = null; return duped; } @@ -3477,6 +3513,13 @@ pub const AnyBlob = union(enum) { // InlineBlob: InlineBlob, InternalBlob: InternalBlob, + pub fn hasContentTypeFromUser(this: AnyBlob) bool { + return switch (this) { + .Blob => this.Blob.hasContentTypeFromUser(), + .InternalBlob => false, + }; + } + pub fn toJSON(this: *AnyBlob, global: *JSGlobalObject, comptime lifetime: JSC.WebCore.Lifetime) JSValue { switch (this.*) { .Blob => return this.Blob.toJSON(global, lifetime), diff --git a/src/bun.js/webcore/response.zig b/src/bun.js/webcore/response.zig index 8d1bfb961..1ba0c82cc 100644 --- a/src/bun.js/webcore/response.zig +++ b/src/bun.js/webcore/response.zig @@ -961,6 +961,14 @@ pub const Fetch = struct { var url = ZigURL{}; var first_arg = args.nextEat().?; + + // We must always get the Body before the Headers That way, we can set + // the Content-Type header from the Blob if no Content-Type header is + // set in the Headers + // + // which is important for FormData. + // https://github.com/oven-sh/bun/issues/2264 + // var body: AnyBlob = AnyBlob{ .Blob = .{}, }; @@ -988,46 +996,45 @@ pub const Fetch = struct { method = request.method; } + if (options.fastGet(ctx.ptr(), .body)) |body__| { + if (Body.Value.fromJS(ctx.ptr(), body__)) |body_const| { + var body_value = body_const; + // TODO: buffer ReadableStream? + // we have to explicitly check for InternalBlob + body = body_value.useAsAnyBlob(); + } else { + // clean hostname if any + if (hostname) |host| { + bun.default_allocator.free(host); + } + // an error was thrown + return JSC.JSValue.jsUndefined(); + } + } else { + body = request.body.value.useAsAnyBlob(); + } + if (options.fastGet(ctx.ptr(), .headers)) |headers_| { if (headers_.as(FetchHeaders)) |headers__| { if (headers__.fastGet(JSC.FetchHeaders.HTTPHeaderName.Host)) |_hostname| { hostname = _hostname.toOwnedSliceZ(bun.default_allocator) catch unreachable; } - headers = Headers.from(headers__, bun.default_allocator) catch unreachable; + headers = Headers.from(headers__, bun.default_allocator, .{ .body = &body }) catch unreachable; // TODO: make this one pass } else if (FetchHeaders.createFromJS(ctx.ptr(), headers_)) |headers__| { if (headers__.fastGet(JSC.FetchHeaders.HTTPHeaderName.Host)) |_hostname| { hostname = _hostname.toOwnedSliceZ(bun.default_allocator) catch unreachable; } - headers = Headers.from(headers__, bun.default_allocator) catch unreachable; + headers = Headers.from(headers__, bun.default_allocator, .{ .body = &body }) catch unreachable; headers__.deref(); } else if (request.headers) |head| { if (head.fastGet(JSC.FetchHeaders.HTTPHeaderName.Host)) |_hostname| { hostname = _hostname.toOwnedSliceZ(bun.default_allocator) catch unreachable; } - headers = Headers.from(head, bun.default_allocator) catch unreachable; + headers = Headers.from(head, bun.default_allocator, .{ .body = &body }) catch unreachable; } } else if (request.headers) |head| { - headers = Headers.from(head, bun.default_allocator) catch unreachable; - } - - if (options.fastGet(ctx.ptr(), .body)) |body__| { - if (Body.Value.fromJS(ctx.ptr(), body__)) |body_const| { - var body_value = body_const; - // TODO: buffer ReadableStream? - // we have to explicitly check for InternalBlob - - body = body_value.useAsAnyBlob(); - } else { - // clean hostname if any - if (hostname) |host| { - bun.default_allocator.free(host); - } - // an error was thrown - return JSC.JSValue.jsUndefined(); - } - } else { - body = request.body.value.useAsAnyBlob(); + headers = Headers.from(head, bun.default_allocator, .{ .body = &body }) catch unreachable; } if (options.get(ctx, "timeout")) |timeout_value| { @@ -1100,13 +1107,13 @@ pub const Fetch = struct { } } else { method = request.method; + body = request.body.value.useAsAnyBlob(); if (request.headers) |head| { if (head.fastGet(JSC.FetchHeaders.HTTPHeaderName.Host)) |_hostname| { hostname = _hostname.toOwnedSliceZ(bun.default_allocator) catch unreachable; } - headers = Headers.from(head, bun.default_allocator) catch unreachable; + headers = Headers.from(head, bun.default_allocator, .{ .body = &body }) catch unreachable; } - body = request.body.value.useAsAnyBlob(); // no proxy only url url = ZigURL.parse(getAllocator(ctx).dupe(u8, request.url) catch unreachable); url_proxy_buffer = url.href; @@ -1124,19 +1131,35 @@ pub const Fetch = struct { method = Method.which(slice_.slice()) orelse .GET; } + if (options.fastGet(ctx.ptr(), .body)) |body__| { + if (Body.Value.fromJS(ctx.ptr(), body__)) |body_const| { + var body_value = body_const; + // TODO: buffer ReadableStream? + // we have to explicitly check for InternalBlob + body = body_value.useAsAnyBlob(); + } else { + // clean hostname if any + if (hostname) |host| { + bun.default_allocator.free(host); + } + // an error was thrown + return JSC.JSValue.jsUndefined(); + } + } + if (options.fastGet(ctx.ptr(), .headers)) |headers_| { if (headers_.as(FetchHeaders)) |headers__| { if (headers__.fastGet(JSC.FetchHeaders.HTTPHeaderName.Host)) |_hostname| { hostname = _hostname.toOwnedSliceZ(bun.default_allocator) catch unreachable; } - headers = Headers.from(headers__, bun.default_allocator) catch unreachable; + headers = Headers.from(headers__, bun.default_allocator, .{ .body = &body }) catch unreachable; // TODO: make this one pass } else if (FetchHeaders.createFromJS(ctx.ptr(), headers_)) |headers__| { defer headers__.deref(); if (headers__.fastGet(JSC.FetchHeaders.HTTPHeaderName.Host)) |_hostname| { hostname = _hostname.toOwnedSliceZ(bun.default_allocator) catch unreachable; } - headers = Headers.from(headers__, bun.default_allocator) catch unreachable; + headers = Headers.from(headers__, bun.default_allocator, .{ .body = &body }) catch unreachable; } else { // Converting the headers failed; return null and // let the set exception get thrown @@ -1144,22 +1167,6 @@ pub const Fetch = struct { } } - if (options.fastGet(ctx.ptr(), .body)) |body__| { - if (Body.Value.fromJS(ctx.ptr(), body__)) |body_const| { - var body_value = body_const; - // TODO: buffer ReadableStream? - // we have to explicitly check for InternalBlob - body = body_value.useAsAnyBlob(); - } else { - // clean hostname if any - if (hostname) |host| { - bun.default_allocator.free(host); - } - // an error was thrown - return JSC.JSValue.jsUndefined(); - } - } - if (options.get(ctx, "timeout")) |timeout_value| { if (timeout_value.isBoolean()) { disable_timeout = !timeout_value.asBoolean(); @@ -1324,6 +1331,47 @@ pub const Fetch = struct { return JSPromise.rejectedPromiseValue(globalThis, err); } + if (headers == null and body.size() > 0 and body.hasContentTypeFromUser()) { + headers = Headers.from( + null, + bun.default_allocator, + .{ .body = &body }, + ) catch unreachable; + } + + if (body.needsToReadFile()) { + // TODO: make this async + lazy + const res = JSC.Node.NodeFS.readFile( + globalThis.bunVM().nodeFS(), + .{ + .encoding = .buffer, + .path = body.Blob.store.?.data.file.pathlike, + .offset = body.Blob.offset, + .max_size = body.Blob.size, + }, + .sync, + ); + + switch (res) { + .err => |err| { + bun.default_allocator.free(url_proxy_buffer); + + const rejected_value = JSPromise.rejectedPromiseValue(globalThis, err.toJSC(globalThis)); + body.detach(); + if (headers) |*headers_| { + headers_.buf.deinit(bun.default_allocator); + headers_.entries.deinit(bun.default_allocator); + } + + return rejected_value; + }, + .result => |result| { + body.detach(); + body.from(std.ArrayList(u8).fromOwnedSlice(bun.default_allocator, @constCast(result.slice()))); + }, + } + } + // Only create this after we have validated all the input. // or else we will leak it var promise = JSPromise.Strong.init(globalThis); @@ -1376,15 +1424,31 @@ pub const Headers = struct { ""; } - pub fn from(headers_ref: *FetchHeaders, allocator: std.mem.Allocator) !Headers { + pub const Options = struct { + body: ?*const AnyBlob = null, + }; + + pub fn from(fetch_headers_ref: ?*FetchHeaders, allocator: std.mem.Allocator, options: Options) !Headers { var header_count: u32 = 0; var buf_len: u32 = 0; - headers_ref.count(&header_count, &buf_len); + if (fetch_headers_ref) |headers_ref| + headers_ref.count(&header_count, &buf_len); var headers = Headers{ .entries = .{}, .buf = .{}, .allocator = allocator, }; + const buf_len_before_content_type = buf_len; + const needs_content_type = brk: { + if (options.body) |body| { + if (body.hasContentTypeFromUser() and (fetch_headers_ref == null or !fetch_headers_ref.?.fastHas(.ContentType))) { + header_count += 1; + buf_len += @truncate(u32, body.contentType().len + "Content-Type".len); + break :brk true; + } + } + break :brk false; + }; headers.entries.ensureTotalCapacity(allocator, header_count) catch unreachable; headers.entries.len = header_count; headers.buf.ensureTotalCapacityPrecise(allocator, buf_len) catch unreachable; @@ -1392,7 +1456,24 @@ pub const Headers = struct { var sliced = headers.entries.slice(); var names = sliced.items(.name); var values = sliced.items(.value); - headers_ref.copyTo(names.ptr, values.ptr, headers.buf.items.ptr); + if (fetch_headers_ref) |headers_ref| + headers_ref.copyTo(names.ptr, values.ptr, headers.buf.items.ptr); + + // TODO: maybe we should send Content-Type header first instead of last? + if (needs_content_type) { + bun.copy(u8, headers.buf.items[buf_len_before_content_type..], "Content-Type"); + names[header_count - 1] = .{ + .offset = buf_len_before_content_type, + .length = "Content-Type".len, + }; + + bun.copy(u8, headers.buf.items[buf_len_before_content_type + "Content-Type".len ..], options.body.?.contentType()); + values[header_count - 1] = .{ + .offset = buf_len_before_content_type + @as(u32, "Content-Type".len), + .length = @truncate(u32, options.body.?.contentType().len), + }; + } + return headers; } }; @@ -1567,7 +1648,7 @@ pub const FetchEvent = struct { var content_length: ?usize = null; if (response.body.init.headers) |headers_ref| { - var headers = Headers.from(headers_ref, request_context.allocator) catch unreachable; + var headers = Headers.from(headers_ref, request_context.allocator, .{}) catch unreachable; var i: usize = 0; while (i < headers.entries.len) : (i += 1) { diff --git a/src/url.zig b/src/url.zig index 1e1b284b5..d0fcdab38 100644 --- a/src/url.zig +++ b/src/url.zig @@ -997,6 +997,9 @@ pub const FormData = struct { defer blob.detach(); var filename = bun.JSC.ZigString.initUTF8(filename_str); const content_type: []const u8 = brk: { + if (!field.content_type.isEmpty()) { + break :brk field.content_type.slice(buf); + } if (filename_str.len > 0) { if (bun.HTTP.MimeType.byExtensionNoDefault(std.fs.path.extension(filename_str))) |mime| { break :brk mime.value; @@ -1011,8 +1014,15 @@ pub const FormData = struct { }; if (content_type.len > 0) { - blob.content_type = content_type; - blob.content_type_allocated = false; + if (!field.content_type.isEmpty()) { + blob.content_type_allocated = true; + blob.content_type = bun.default_allocator.dupe(u8, content_type) catch @panic("failed to allocate memory for blob content type"); + blob.content_type_was_set = true; + } else { + blob.content_type = content_type; + blob.content_type_was_set = false; + blob.content_type_allocated = false; + } } wrap.form.appendBlob(wrap.globalThis, &key, &blob, &filename); diff --git a/test/js/bun/http/fetch-file-upload.test.ts b/test/js/bun/http/fetch-file-upload.test.ts new file mode 100644 index 000000000..33f0d0581 --- /dev/null +++ b/test/js/bun/http/fetch-file-upload.test.ts @@ -0,0 +1,51 @@ +import { expect, test, describe } from "bun:test"; +import { withoutAggressiveGC } from "harness"; + +test("uploads roundtrip", async () => { + const body = Bun.file(import.meta.dir + "/fetch.js.txt"); + const bodyText = await body.text(); + + const server = Bun.serve({ + port: 0, + development: false, + async fetch(req) { + const text = await req.text(); + expect(text).toBe(bodyText); + + return new Response(Bun.file(import.meta.dir + "/fetch.js.txt")); + }, + }); + + // @ts-ignore + const reqBody = new Request(`http://${server.hostname}:${server.port}`, { + body, + method: "POST", + }); + const res = await fetch(reqBody); + expect(res.status).toBe(200); + + // but it does for Response + expect(res.headers.get("Content-Type")).toBe("text/plain;charset=utf-8"); + const resText = await res.text(); + expect(resText).toBe(bodyText); + + server.stop(true); +}); + +test("missing file throws the expected error", async () => { + Bun.gc(true); + // Run this 1000 times to check for GC bugs + withoutAggressiveGC(() => { + const body = Bun.file(import.meta.dir + "/fetch123123231123.js.txt"); + for (let i = 0; i < 1000; i++) { + const resp = fetch(`http://example.com`, { + body, + method: "POST", + proxy: "http://localhost:3000", + }); + expect(Bun.peek.status(resp)).toBe("rejected"); + expect(async () => await resp).toThrow("No such file or directory"); + } + }); + Bun.gc(true); +}); diff --git a/test/js/bun/http/serve.test.ts b/test/js/bun/http/serve.test.ts index 73de6a381..a152e7e09 100644 --- a/test/js/bun/http/serve.test.ts +++ b/test/js/bun/http/serve.test.ts @@ -1006,3 +1006,55 @@ it("request body and signal life cycle", async () => { server.stop(true); } }); + +it("propagates content-type from a Bun.file()'s file path in fetch()", async () => { + const body = Bun.file(import.meta.dir + "/fetch.js.txt"); + const bodyText = await body.text(); + + const server = Bun.serve({ + port: 0, + development: false, + async fetch(req) { + expect(req.headers.get("Content-Type")).toBe("text/plain;charset=utf-8"); + const text = await req.text(); + expect(text).toBe(bodyText); + + return new Response(Bun.file(import.meta.dir + "/fetch.js.txt")); + }, + }); + + // @ts-ignore + const reqBody = new Request(`http://${server.hostname}:${server.port}`, { + body, + method: "POST", + }); + const res = await fetch(reqBody); + expect(res.status).toBe(200); + + // but it does for Response + expect(res.headers.get("Content-Type")).toBe("text/plain;charset=utf-8"); + + server.stop(true); +}); + +it("does propagate type for Blob", async () => { + const server = Bun.serve({ + port: 0, + development: false, + async fetch(req) { + expect(req.headers.get("Content-Type")).toBeNull(); + return new Response(new Blob(["hey"], { type: "text/plain;charset=utf-8" })); + }, + }); + + const body = new Blob(["hey"], { type: "text/plain;charset=utf-8" }); + // @ts-ignore + const res = await fetch(`http://${server.hostname}:${server.port}`, { + body, + method: "POST", + }); + expect(res.status).toBe(200); + expect(res.headers.get("Content-Type")).toBe("text/plain;charset=utf-8"); + + server.stop(true); +}); diff --git a/test/js/web/html/FormData.test.ts b/test/js/web/html/FormData.test.ts index abb298c1a..cbaf5aaa7 100644 --- a/test/js/web/html/FormData.test.ts +++ b/test/js/web/html/FormData.test.ts @@ -302,6 +302,125 @@ describe("FormData", () => { server.stop(true); }); + for (let useRequestConstructor of [true, false]) { + describe(useRequestConstructor ? "Request constructor" : "fetch()", () => { + function send(args: Parameters<typeof fetch>) { + if (useRequestConstructor) { + return fetch(new Request(...args)); + } else { + return fetch(...args); + } + } + for (let headers of [{}, undefined, { headers: { X: "Y" } }]) { + describe("headers: " + Bun.inspect(headers).replaceAll(/([\n ])/gim, ""), () => { + it("send on HTTP server with FormData & Blob (roundtrip)", async () => { + let contentType = ""; + const server = Bun.serve({ + port: 0, + development: false, + async fetch(req) { + const formData = await req.formData(); + contentType = req.headers.get("Content-Type")!; + return new Response(formData); + }, + }); + + const form = new FormData(); + form.append("foo", new Blob(["baz"], { type: "text/plain" }), "bar"); + form.append("bar", "baz"); + + // @ts-ignore + const reqBody = [ + `http://${server.hostname}:${server.port}`, + { + body: form, + + headers, + method: "POST", + }, + ]; + const res = await send(reqBody); + const body = await res.formData(); + expect(await (body.get("foo") as Blob).text()).toBe("baz"); + expect(body.get("bar")).toBe("baz"); + server.stop(true); + }); + + it("send on HTTP server with FormData & Bun.file (roundtrip)", async () => { + let contentType = ""; + const server = Bun.serve({ + port: 0, + development: false, + async fetch(req) { + const formData = await req.formData(); + contentType = req.headers.get("Content-Type")!; + return new Response(formData); + }, + }); + + const form = new FormData(); + const file = Bun.file(import.meta.dir + "/form-data-fixture.txt"); + const text = await file.text(); + form.append("foo", file); + form.append("bar", "baz"); + + // @ts-ignore + const reqBody = [ + `http://${server.hostname}:${server.port}`, + { + body: form, + + headers, + method: "POST", + }, + ]; + const res = await send(reqBody); + const body = await res.formData(); + expect(await (body.get("foo") as Blob).text()).toBe(text); + expect(contentType).toContain("multipart/form-data"); + expect(body.get("bar")).toBe("baz"); + expect(contentType).toContain("multipart/form-data"); + + server.stop(true); + }); + + it("send on HTTP server with FormData (roundtrip)", async () => { + let contentType = ""; + const server = Bun.serve({ + port: 0, + development: false, + async fetch(req) { + const formData = await req.formData(); + contentType = req.headers.get("Content-Type")!; + return new Response(formData); + }, + }); + + const form = new FormData(); + form.append("foo", "boop"); + form.append("bar", "baz"); + + // @ts-ignore + const reqBody = [ + `http://${server.hostname}:${server.port}`, + { + body: form, + + headers, + method: "POST", + }, + ]; + const res = await send(reqBody); + const body = await res.formData(); + expect(contentType).toContain("multipart/form-data"); + expect(body.get("foo")).toBe("boop"); + expect(body.get("bar")).toBe("baz"); + server.stop(true); + }); + }); + } + }); + } describe("Bun.file support", () => { describe("roundtrip", () => { const path = import.meta.dir + "/form-data-fixture.txt"; |