fix(node/fetch): Make data URL fetch consistent with node (#5126)

author: David Hewitt <davidmhewitt@users.noreply.github.com> 2023-09-13 13:35:39 +0100
committer: GitHub <noreply@github.com> 2023-09-13 05:35:39 -0700
commit: c3455c0ceee6bbe399781819a42fff6cf24792e2 (patch)
tree: 203c044cedd5508a0ad2b3f42d50686ed6d0f049
parent: 9101774593288e32c7d9f0c77ab6f133628ebd10 (diff)
download: bun-c3455c0ceee6bbe399781819a42fff6cf24792e2.tar.gz
bun-c3455c0ceee6bbe399781819a42fff6cf24792e2.tar.zst
bun-c3455c0ceee6bbe399781819a42fff6cf24792e2.zip
2 files changed, 37 insertions, 7 deletions
diff --git a/src/resolver/data_url.zig b/src/resolver/data_url.zig
index f2a042c4f..771f56ffe 100644
--- a/src/resolver/data_url.zig
+++ b/src/resolver/data_url.zig
@@ -39,18 +39,24 @@ pub const PercentEncoding = struct {
         };
     }
 
-    /// decode path if it is percent encoded
+    /// decode path if it is percent encoded, returns EncodeError if URL unsafe characters are present and not percent encoded
     pub fn decode(allocator: Allocator, path: []const u8) EncodeError!?[]u8 {
+        return _decode(allocator, path, true);
+    }
+
+    /// Replaces percent encoded entities within `path` without throwing an error if other URL unsafe characters are present
+    pub fn decodeUnstrict(allocator: Allocator, path: []const u8) EncodeError!?[]u8 {
+        return _decode(allocator, path, false);
+    }
+
+    fn _decode(allocator: Allocator, path: []const u8, strict: bool) EncodeError!?[]u8 {
         var ret: ?[]u8 = null;
         errdefer if (ret) |some| allocator.free(some);
         var ret_index: usize = 0;
         var i: usize = 0;
 
         while (i < path.len) : (i += 1) {
-            if (path[i] == '%') {
-                if (!isPchar(path[i..])) {
-                    return error.InvalidCharacter;
-                }
+            if (path[i] == '%' and path[i..].len >= 3 and isHex(path[i + 1]) and isHex(path[i + 2])) {
                 if (ret == null) {
                     ret = try allocator.alloc(u8, path.len);
                     bun.copy(u8, ret.?, path[0..i]);
@@ -63,7 +69,7 @@ pub const PercentEncoding = struct {
                 ret.?[ret_index] = new;
                 ret_index += 1;
                 i += 2;
-            } else if (path[i] != '/' and !isPchar(path[i..])) {
+            } else if (path[i] != '/' and !isPchar(path[i..]) and strict) {
                 return error.InvalidCharacter;
             } else if (ret != null) {
                 ret.?[ret_index] = path[i];
@@ -112,7 +118,7 @@ pub const DataURL = struct {
     }
 
     pub fn decodeData(url: DataURL, allocator: std.mem.Allocator) ![]u8 {
-        const percent_decoded = PercentEncoding.decode(allocator, url.data) catch url.data orelse url.data;
+        const percent_decoded = PercentEncoding.decodeUnstrict(allocator, url.data) catch url.data orelse url.data;
         if (url.is_base64) {
             const len = bun.base64.decodeLen(percent_decoded);
             var buf = try allocator.alloc(u8, len);
diff --git a/test/js/web/fetch/fetch.test.ts b/test/js/web/fetch/fetch.test.ts
index aa44ee76a..4ef5d7bba 100644
--- a/test/js/web/fetch/fetch.test.ts
+++ b/test/js/web/fetch/fetch.test.ts
@@ -93,6 +93,30 @@ describe("fetch data urls", () => {
     expect(blob.type).toBe("text/plain;charset=utf-8");
     expect(blob.text()).resolves.toBe("helloworld!");
   });
+  it("unstrict parsing of invalid URL characters", async () => {
+    var url = "data:application/json,{%7B%7D}";
+    var res = await fetch(url);
+    expect(res.status).toBe(200);
+    expect(res.statusText).toBe("OK");
+    expect(res.ok).toBe(true);
+
+    var blob = await res.blob();
+    expect(blob.size).toBe(4);
+    expect(blob.type).toBe("application/json;charset=utf-8");
+    expect(blob.text()).resolves.toBe("{{}}");
+  });
+  it("unstrict parsing of double percent characters", async () => {
+    var url = "data:application/json,{%%7B%7D%%}%%";
+    var res = await fetch(url);
+    expect(res.status).toBe(200);
+    expect(res.statusText).toBe("OK");
+    expect(res.ok).toBe(true);
+
+    var blob = await res.blob();
+    expect(blob.size).toBe(9);
+    expect(blob.type).toBe("application/json;charset=utf-8");
+    expect(blob.text()).resolves.toBe("{%{}%%}%%");
+  });
   it("data url (invalid)", async () => {
     var url = "data:Hello%2C%20World!";
     expect(async () => {
author	David Hewitt <davidmhewitt@users.noreply.github.com>	2023-09-13 13:35:39 +0100
committer	GitHub <noreply@github.com>	2023-09-13 05:35:39 -0700
commit	c3455c0ceee6bbe399781819a42fff6cf24792e2 (patch)
tree	203c044cedd5508a0ad2b3f42d50686ed6d0f049
parent	9101774593288e32c7d9f0c77ab6f133628ebd10 (diff)
download	bun-c3455c0ceee6bbe399781819a42fff6cf24792e2.tar.gz bun-c3455c0ceee6bbe399781819a42fff6cf24792e2.tar.zst bun-c3455c0ceee6bbe399781819a42fff6cf24792e2.zip