diff options
author | 2021-10-10 23:41:18 -0700 | |
---|---|---|
committer | 2021-10-10 23:41:18 -0700 | |
commit | 0db7af6166dba28126ab71814fc84c75bbd9acef (patch) | |
tree | 2c387e4bfb7ef9794164fb8341b5f507b2d03a58 | |
parent | 5e3d1dd4b5618a64dac60a1c3345fba084be34b2 (diff) | |
download | bun-0db7af6166dba28126ab71814fc84c75bbd9acef.tar.gz bun-0db7af6166dba28126ab71814fc84c75bbd9acef.tar.zst bun-0db7af6166dba28126ab71814fc84c75bbd9acef.zip |
[fetch] Add support for gzip & deflate to the http client
Powered by Cloudflare's zlib fork
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | .gitmodules | 3 | ||||
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | README.md | 1 | ||||
-rw-r--r-- | fixtures_example.com.html.gz | bin | 0 -> 648 bytes | |||
-rw-r--r-- | src/Wyhash.zig | 332 | ||||
m--------- | src/deps/zlib | 0 | ||||
-rw-r--r-- | src/global.zig | 5 | ||||
-rw-r--r-- | src/hash_map.zig | 2 | ||||
-rw-r--r-- | src/http.zig | 6 | ||||
-rw-r--r-- | src/http_client.zig | 302 | ||||
-rw-r--r-- | src/linker.zig | 1 | ||||
-rw-r--r-- | src/resolver/resolver.zig | 2 | ||||
-rw-r--r-- | src/string_types.zig | 1 | ||||
-rw-r--r-- | src/zee_alloc.zig | 667 | ||||
-rw-r--r-- | src/zlib.test.gz | bin | 0 -> 4654 bytes | |||
-rw-r--r-- | src/zlib.test.txt | 651 | ||||
-rw-r--r-- | src/zlib.zig | 573 |
18 files changed, 1460 insertions, 1095 deletions
diff --git a/.gitignore b/.gitignore index b11d68d38..d4eca5e8c 100644 --- a/.gitignore +++ b/.gitignore @@ -66,4 +66,5 @@ packages/debug-* packages/bun-cli/postinstall.js packages/bun-*/bin/* -packages/bun-cli/bin/*
\ No newline at end of file +packages/bun-cli/bin/* +bun-test-scratch
\ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 23d7b0e9b..88b3489cb 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,6 @@ path = src/deps/mimalloc url = https://github.com/microsoft/mimalloc.git ignore = dirty +[submodule "src/deps/zlib"] + path = src/deps/zlib + url = https://github.com/cloudflare/zlib @@ -68,10 +68,13 @@ endif bun: vendor build-obj bun-link-lld-release -vendor-without-check: api analytics node-fallbacks runtime_js fallback_decoder bun_error mimalloc picohttp +vendor-without-check: api analytics node-fallbacks runtime_js fallback_decoder bun_error mimalloc picohttp zlib vendor: require init-submodules vendor-without-check +zlib: + cd src/deps/zlib; cmake .; make; + require: @echo "Checking if the required utilities are available..." @realpath --version >/dev/null 2>&1 || (echo "ERROR: realpath is required."; exit 1) @@ -317,6 +320,7 @@ BUN_LLD_FLAGS := $(OBJ_FILES) \ ${JSC_FILES} \ src/deps/picohttpparser.o \ src/deps/mimalloc/libmimalloc.a \ + src/deps/zlib/libz.a \ $(CLANG_FLAGS) \ @@ -557,6 +557,7 @@ Bun also statically links these libraries: - `libicu`, which can be found here: https://github.com/unicode-org/icu/blob/main/icu4c/LICENSE - [`picohttp`](https://github.com/h2o/picohttpparser), which is dual-licensed under the Perl License or the MIT License - [`mimalloc`](https://github.com/microsoft/mimalloc), which is MIT licensed +- [`zlib-cloudflare`](https://github.com/cloudflare/zlib), which is zlib licensed For compatibiltiy reasons, these NPM packages are embedded into Bun's binary and injected if imported. diff --git a/fixtures_example.com.html.gz b/fixtures_example.com.html.gz Binary files differnew file mode 100644 index 000000000..247043b6d --- /dev/null +++ b/fixtures_example.com.html.gz diff --git a/src/Wyhash.zig b/src/Wyhash.zig deleted file mode 100644 index b13e267b4..000000000 --- a/src/Wyhash.zig +++ /dev/null @@ -1,332 +0,0 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2015-2021 Zig Contributors -// This file is part of [zig](https://ziglang.org/), which is MIT licensed. -// The MIT license requires this copyright notice to be included in all copies -// and substantial portions of the software. -const std = @import("std"); -const mem = std.mem; - -const primes = [_]u64{ - 0xa0761d6478bd642f, - 0xe7037ed1a0b428db, - 0x8ebc6af09c88c6e3, - 0x589965cc75374cc3, - 0x1d8e4e27c47d124f, -}; - -fn read_bytes(comptime bytes: u8, data: []const u8) u64 { - const T = std.meta.Int(.unsigned, 8 * bytes); - return mem.readIntLittle(T, data[0..bytes]); -} - -fn read_8bytes_swapped(data: []const u8) u64 { - return (read_bytes(4, data) << 32 | read_bytes(4, data[4..])); -} - -fn mum(a: u64, b: u64) u64 { - var r = std.math.mulWide(u64, a, b); - r = (r >> 64) ^ r; - return @truncate(u64, r); -} - -fn mix0(a: u64, b: u64, seed: u64) u64 { - return mum(a ^ seed ^ primes[0], b ^ seed ^ primes[1]); -} - -fn mix1(a: u64, b: u64, seed: u64) u64 { - return mum(a ^ seed ^ primes[2], b ^ seed ^ primes[3]); -} - -// Wyhash version which does not store internal state for handling partial buffers. -// This is needed so that we can maximize the speed for the short key case, which will -// use the non-iterative api which the public Wyhash exposes. -pub fn WyhashGenerator(comptime ValueType: type) type { - return struct { - seed: u64, - msg_len: usize, - - pub fn init(seed: u64) WyhashStateless { - return WyhashStateless{ - .seed = seed, - .msg_len = 0, - }; - } - - fn round(self: *WyhashStateless, b: []const u8) void { - std.debug.assert(b.len == 32); - - self.seed = mix0( - read_bytes(8, b[0..]), - read_bytes(8, b[8..]), - self.seed, - ) ^ mix1( - read_bytes(8, b[16..]), - read_bytes(8, b[24..]), - self.seed, - ); - } - - pub fn update(self: *WyhashStateless, b: []const u8) void { - std.debug.assert(b.len % 32 == 0); - - var off: usize = 0; - while (off < b.len) : (off += 32) { - @call(.{ .modifier = .always_inline }, self.round, .{b[off .. off + 32]}); - } - - self.msg_len += b.len; - } - - pub fn final(self: *WyhashStateless, b: []const u8) u64 { - std.debug.assert(b.len < 32); - - const seed = self.seed; - const rem_len = @intCast(u5, b.len); - const rem_key = b[0..rem_len]; - - self.seed = switch (rem_len) { - 0 => seed, - 1 => mix0(read_bytes(1, rem_key), primes[4], seed), - 2 => mix0(read_bytes(2, rem_key), primes[4], seed), - 3 => mix0((read_bytes(2, rem_key) << 8) | read_bytes(1, rem_key[2..]), primes[4], seed), - 4 => mix0(read_bytes(4, rem_key), primes[4], seed), - 5 => mix0((read_bytes(4, rem_key) << 8) | read_bytes(1, rem_key[4..]), primes[4], seed), - 6 => mix0((read_bytes(4, rem_key) << 16) | read_bytes(2, rem_key[4..]), primes[4], seed), - 7 => mix0((read_bytes(4, rem_key) << 24) | (read_bytes(2, rem_key[4..]) << 8) | read_bytes(1, rem_key[6..]), primes[4], seed), - 8 => mix0(read_8bytes_swapped(rem_key), primes[4], seed), - 9 => mix0(read_8bytes_swapped(rem_key), read_bytes(1, rem_key[8..]), seed), - 10 => mix0(read_8bytes_swapped(rem_key), read_bytes(2, rem_key[8..]), seed), - 11 => mix0(read_8bytes_swapped(rem_key), (read_bytes(2, rem_key[8..]) << 8) | read_bytes(1, rem_key[10..]), seed), - 12 => mix0(read_8bytes_swapped(rem_key), read_bytes(4, rem_key[8..]), seed), - 13 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 8) | read_bytes(1, rem_key[12..]), seed), - 14 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 16) | read_bytes(2, rem_key[12..]), seed), - 15 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 24) | (read_bytes(2, rem_key[12..]) << 8) | read_bytes(1, rem_key[14..]), seed), - 16 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed), - 17 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(1, rem_key[16..]), primes[4], seed), - 18 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(2, rem_key[16..]), primes[4], seed), - 19 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(2, rem_key[16..]) << 8) | read_bytes(1, rem_key[18..]), primes[4], seed), - 20 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(4, rem_key[16..]), primes[4], seed), - 21 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 8) | read_bytes(1, rem_key[20..]), primes[4], seed), - 22 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 16) | read_bytes(2, rem_key[20..]), primes[4], seed), - 23 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 24) | (read_bytes(2, rem_key[20..]) << 8) | read_bytes(1, rem_key[22..]), primes[4], seed), - 24 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), primes[4], seed), - 25 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(1, rem_key[24..]), seed), - 26 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(2, rem_key[24..]), seed), - 27 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(2, rem_key[24..]) << 8) | read_bytes(1, rem_key[26..]), seed), - 28 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(4, rem_key[24..]), seed), - 29 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 8) | read_bytes(1, rem_key[28..]), seed), - 30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed), - 31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed), - }; - - self.msg_len += b.len; - return mum(self.seed ^ self.msg_len, primes[4]); - } - - pub fn hash(seed: u64, value: ValueType) u64 { - const input = std.mem.asBytes(&value); - const aligned_len = @sizeOf(value) - (@sizeOf(value) % 32); - - var c = WyhashStateless.init(seed); - @call(.{ .modifier = .always_inline }, c.update, .{input[0..aligned_len]}); - return @call(.{ .modifier = .always_inline }, c.final, .{input[aligned_len..]}); - } - }; -} -// Wyhash version which does not store internal state for handling partial buffers. -// This is needed so that we can maximize the speed for the short key case, which will -// use the non-iterative api which the public Wyhash exposes. -const WyhashStateless = struct { - seed: u64, - msg_len: usize, - - pub fn init(seed: u64) WyhashStateless { - return WyhashStateless{ - .seed = seed, - .msg_len = 0, - }; - } - - fn round(self: *WyhashStateless, b: []const u8) void { - std.debug.assert(b.len == 32); - - self.seed = mix0( - read_bytes(8, b[0..]), - read_bytes(8, b[8..]), - self.seed, - ) ^ mix1( - read_bytes(8, b[16..]), - read_bytes(8, b[24..]), - self.seed, - ); - } - - pub fn update(self: *WyhashStateless, b: []const u8) void { - std.debug.assert(b.len % 32 == 0); - - var off: usize = 0; - while (off < b.len) : (off += 32) { - @call(.{ .modifier = .always_inline }, self.round, .{b[off .. off + 32]}); - } - - self.msg_len += b.len; - } - - pub fn final(self: *WyhashStateless, b: []const u8) u64 { - std.debug.assert(b.len < 32); - - const seed = self.seed; - const rem_len = @intCast(u5, b.len); - const rem_key = b[0..rem_len]; - - self.seed = switch (rem_len) { - 0 => seed, - 1 => mix0(read_bytes(1, rem_key), primes[4], seed), - 2 => mix0(read_bytes(2, rem_key), primes[4], seed), - 3 => mix0((read_bytes(2, rem_key) << 8) | read_bytes(1, rem_key[2..]), primes[4], seed), - 4 => mix0(read_bytes(4, rem_key), primes[4], seed), - 5 => mix0((read_bytes(4, rem_key) << 8) | read_bytes(1, rem_key[4..]), primes[4], seed), - 6 => mix0((read_bytes(4, rem_key) << 16) | read_bytes(2, rem_key[4..]), primes[4], seed), - 7 => mix0((read_bytes(4, rem_key) << 24) | (read_bytes(2, rem_key[4..]) << 8) | read_bytes(1, rem_key[6..]), primes[4], seed), - 8 => mix0(read_8bytes_swapped(rem_key), primes[4], seed), - 9 => mix0(read_8bytes_swapped(rem_key), read_bytes(1, rem_key[8..]), seed), - 10 => mix0(read_8bytes_swapped(rem_key), read_bytes(2, rem_key[8..]), seed), - 11 => mix0(read_8bytes_swapped(rem_key), (read_bytes(2, rem_key[8..]) << 8) | read_bytes(1, rem_key[10..]), seed), - 12 => mix0(read_8bytes_swapped(rem_key), read_bytes(4, rem_key[8..]), seed), - 13 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 8) | read_bytes(1, rem_key[12..]), seed), - 14 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 16) | read_bytes(2, rem_key[12..]), seed), - 15 => mix0(read_8bytes_swapped(rem_key), (read_bytes(4, rem_key[8..]) << 24) | (read_bytes(2, rem_key[12..]) << 8) | read_bytes(1, rem_key[14..]), seed), - 16 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed), - 17 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(1, rem_key[16..]), primes[4], seed), - 18 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(2, rem_key[16..]), primes[4], seed), - 19 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(2, rem_key[16..]) << 8) | read_bytes(1, rem_key[18..]), primes[4], seed), - 20 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_bytes(4, rem_key[16..]), primes[4], seed), - 21 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 8) | read_bytes(1, rem_key[20..]), primes[4], seed), - 22 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 16) | read_bytes(2, rem_key[20..]), primes[4], seed), - 23 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1((read_bytes(4, rem_key[16..]) << 24) | (read_bytes(2, rem_key[20..]) << 8) | read_bytes(1, rem_key[22..]), primes[4], seed), - 24 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), primes[4], seed), - 25 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(1, rem_key[24..]), seed), - 26 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(2, rem_key[24..]), seed), - 27 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(2, rem_key[24..]) << 8) | read_bytes(1, rem_key[26..]), seed), - 28 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), read_bytes(4, rem_key[24..]), seed), - 29 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 8) | read_bytes(1, rem_key[28..]), seed), - 30 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 16) | read_bytes(2, rem_key[28..]), seed), - 31 => mix0(read_8bytes_swapped(rem_key), read_8bytes_swapped(rem_key[8..]), seed) ^ mix1(read_8bytes_swapped(rem_key[16..]), (read_bytes(4, rem_key[24..]) << 24) | (read_bytes(2, rem_key[28..]) << 8) | read_bytes(1, rem_key[30..]), seed), - }; - - self.msg_len += b.len; - return mum(self.seed ^ self.msg_len, primes[4]); - } - - pub fn hash(seed: u64, input: []const u8) u64 { - const aligned_len = input.len - (input.len % 32); - - var c = WyhashStateless.init(seed); - @call(.{ .modifier = .always_inline }, c.update, .{input[0..aligned_len]}); - return @call(.{ .modifier = .always_inline }, c.final, .{input[aligned_len..]}); - } -}; - -/// Fast non-cryptographic 64bit hash function. -/// See https://github.com/wangyi-fudan/wyhash -pub const Wyhash = struct { - state: WyhashStateless, - - buf: [32]u8, - buf_len: usize, - - pub fn init(seed: u64) Wyhash { - return Wyhash{ - .state = WyhashStateless.init(seed), - .buf = undefined, - .buf_len = 0, - }; - } - - pub fn update(self: *Wyhash, b: []const u8) void { - var off: usize = 0; - - if (self.buf_len != 0 and self.buf_len + b.len >= 32) { - off += 32 - self.buf_len; - mem.copy(u8, self.buf[self.buf_len..], b[0..off]); - self.state.update(self.buf[0..]); - self.buf_len = 0; - } - - const remain_len = b.len - off; - const aligned_len = remain_len - (remain_len % 32); - self.state.update(b[off .. off + aligned_len]); - - mem.copy(u8, self.buf[self.buf_len..], b[off + aligned_len ..]); - self.buf_len += @intCast(u8, b[off + aligned_len ..].len); - } - - pub fn final(self: *Wyhash) u64 { - const seed = self.state.seed; - const rem_len = @intCast(u5, self.buf_len); - const rem_key = self.buf[0..self.buf_len]; - - return self.state.final(rem_key); - } - - pub fn hash(seed: u64, input: []const u8) u64 { - return WyhashStateless.hash(seed, input); - } -}; - -const expectEqual = std.testing.expectEqual; - -test "test vectors" { - const hash = Wyhash.hash; - - try expectEqual(hash(0, ""), 0x0); - try expectEqual(hash(1, "a"), 0xbed235177f41d328); - try expectEqual(hash(2, "abc"), 0xbe348debe59b27c3); - try expectEqual(hash(3, "message digest"), 0x37320f657213a290); - try expectEqual(hash(4, "abcdefghijklmnopqrstuvwxyz"), 0xd0b270e1d8a7019c); - try expectEqual(hash(5, "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"), 0x602a1894d3bbfe7f); - try expectEqual(hash(6, "12345678901234567890123456789012345678901234567890123456789012345678901234567890"), 0x829e9c148b75970e); -} - -test "test vectors streaming" { - var wh = Wyhash.init(5); - for ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") |e| { - wh.update(mem.asBytes(&e)); - } - try expectEqual(wh.final(), 0x602a1894d3bbfe7f); - - const pattern = "1234567890"; - const count = 8; - const result = 0x829e9c148b75970e; - try expectEqual(Wyhash.hash(6, pattern ** 8), result); - - wh = Wyhash.init(6); - var i: u32 = 0; - while (i < count) : (i += 1) { - wh.update(pattern); - } - try expectEqual(wh.final(), result); -} - -test "iterative non-divisible update" { - var buf: [8192]u8 = undefined; - for (buf) |*e, i| { - e.* = @truncate(u8, i); - } - - const seed = 0x128dad08f; - - var end: usize = 32; - while (end < buf.len) : (end += 32) { - const non_iterative_hash = Wyhash.hash(seed, buf[0..end]); - - var wy = Wyhash.init(seed); - var i: usize = 0; - while (i < end) : (i += 33) { - wy.update(buf[i..std.math.min(i + 33, end)]); - } - const iterative_hash = wy.final(); - - try std.testing.expectEqual(iterative_hash, non_iterative_hash); - } -} diff --git a/src/deps/zlib b/src/deps/zlib new file mode 160000 +Subproject 959b4ea305821e753385e873ec4edfaa9a5d49b diff --git a/src/global.zig b/src/global.zig index 651d59590..738b294b5 100644 --- a/src/global.zig +++ b/src/global.zig @@ -87,6 +87,11 @@ pub const Output = struct { return enable_ansi_colors and !isWindows; } + pub fn initTest() void { + var in = std.io.getStdErr(); + var src = Output.Source.init(in, in); + Output.Source.set(&src); + } pub fn enableBuffering() void { enable_buffering = true; } diff --git a/src/hash_map.zig b/src/hash_map.zig index 1c5d86900..fa553f939 100644 --- a/src/hash_map.zig +++ b/src/hash_map.zig @@ -12,7 +12,7 @@ const mem = std.mem; const meta = std.meta; const trait = meta.trait; const Allocator = mem.Allocator; -const Wyhash = @import("./Wyhash.zig").Wyhash; +const Wyhash = std.hash.Wyhash; pub fn getAutoHashFn(comptime K: type) (fn (K) u64) { comptime { diff --git a/src/http.zig b/src/http.zig index 33c544436..5cc7376cb 100644 --- a/src/http.zig +++ b/src/http.zig @@ -36,7 +36,7 @@ const IPv6 = std.x.os.IPv6; const Socket = std.x.os.Socket; const os = std.os; -const picohttp = @import("picohttp"); +const picohttp = @import("./deps/picohttp.zig"); const Header = picohttp.Header; const Request = picohttp.Request; const Response = picohttp.Response; @@ -592,8 +592,8 @@ pub const RequestContext = struct { } pub fn appendHeader(ctx: *RequestContext, comptime key: string, value: string) void { - if (isDebug or isTest) std.debug.assert(!ctx.has_written_last_header); - if (isDebug or isTest) std.debug.assert(ctx.res_headers_count < res_headers_buf.len); + if (comptime isDebug or isTest) std.debug.assert(!ctx.has_written_last_header); + if (comptime isDebug or isTest) std.debug.assert(ctx.res_headers_count < res_headers_buf.len); res_headers_buf[ctx.res_headers_count] = Header{ .name = key, .value = value }; ctx.res_headers_count += 1; } diff --git a/src/http_client.zig b/src/http_client.zig index 111656492..c14e5d1c2 100644 --- a/src/http_client.zig +++ b/src/http_client.zig @@ -1,3 +1,6 @@ +// @link "/Users/jarred/Code/bun/src/deps/zlib/libz.a" +// @link "/Users/jarred/Code/bun/src/deps/picohttpparser.o" + const picohttp = @import("picohttp"); usingnamespace @import("./global.zig"); const std = @import("std"); @@ -6,7 +9,7 @@ const URL = @import("./query_string_map.zig").URL; const Method = @import("./http.zig").Method; const iguanaTLS = @import("iguanaTLS"); const Api = @import("./api/schema.zig").Api; - +const Lock = @import("./lock.zig").Lock; const HTTPClient = @This(); const SOCKET_FLAGS = os.SOCK_CLOEXEC; @@ -35,7 +38,8 @@ header_entries: Headers.Entries, header_buf: string, url: URL, allocator: *std.mem.Allocator, -verbose: bool = false, +verbose: bool = isTest, +tcp_client: tcp.Client = undefined, pub fn init(allocator: *std.mem.Allocator, method: Method, url: URL, header_entries: Headers.Entries, header_buf: string) HTTPClient { return HTTPClient{ @@ -79,13 +83,23 @@ fn hashHeaderName(name: string) u64 { const host_header_hash = hashHeaderName("Host"); const connection_header_hash = hashHeaderName("Connection"); +pub const Encoding = enum { + identity, + gzip, + deflate, + brotli, +}; + const content_encoding_hash = hashHeaderName("Content-Encoding"); + const host_header_name = "Host"; const content_length_header_name = "Content-Length"; const content_length_header_hash = hashHeaderName("Content-Length"); const connection_header = picohttp.Header{ .name = "Connection", .value = "close" }; const accept_header = picohttp.Header{ .name = "Accept", .value = "*/*" }; const accept_header_hash = hashHeaderName("Accept"); +const accept_encoding_header = picohttp.Header{ .name = "Accept-Encoding", .value = "deflate, gzip" }; +const accept_encoding_header_hash = hashHeaderName("Accept-Encoding"); const user_agent_header = picohttp.Header{ .name = "User-Agent", .value = "Bun.js " ++ Global.package_json_version }; const user_agent_header_hash = hashHeaderName("User-Agent"); @@ -99,11 +113,13 @@ pub fn buildRequest(this: *const HTTPClient, body_len: usize) picohttp.Request { var header_names = header_entries.items(.name); var header_values = header_entries.items(.value); + var override_accept_encoding = false; + var override_user_agent = false; for (header_names) |head, i| { const name = this.headerStr(head); // Hash it as lowercase - const hash = hashHeaderName(request_headers_buf[header_count].name); + const hash = hashHeaderName(name); // Skip host and connection header // we manage those @@ -111,13 +127,14 @@ pub fn buildRequest(this: *const HTTPClient, body_len: usize) picohttp.Request { host_header_hash, connection_header_hash, content_length_header_hash, - accept_header_hash, => continue, else => {}, } override_user_agent = override_user_agent or hash == user_agent_header_hash; + override_accept_encoding = override_accept_encoding or hash == accept_encoding_header_hash; + request_headers_buf[header_count] = picohttp.Header{ .name = name, .value = this.headerStr(header_values[i]), @@ -156,6 +173,11 @@ pub fn buildRequest(this: *const HTTPClient, body_len: usize) picohttp.Request { }; header_count += 1; + if (!override_accept_encoding) { + request_headers_buf[header_count] = accept_encoding_header; + header_count += 1; + } + if (body_len > 0) { request_headers_buf[header_count] = picohttp.Header{ .name = content_length_header_name, @@ -209,15 +231,15 @@ pub inline fn send(this: *HTTPClient, body: []const u8, body_out_str: *MutableSt } pub fn sendHTTP(this: *HTTPClient, body: []const u8, body_out_str: *MutableString) !picohttp.Response { - var client = try this.connect(); + this.tcp_client = try this.connect(); defer { - std.os.closeSocket(client.socket.fd); + std.os.closeSocket(this.tcp_client.socket.fd); } var request = buildRequest(this, body.len); if (this.verbose) { Output.prettyErrorln("{s}", .{request}); } - var client_writer = client.writer(SOCKET_FLAGS); + var client_writer = this.tcp_client.writer(SOCKET_FLAGS); { var client_writer_buffered = std.io.bufferedWriter(client_writer); var client_writer_buffered_writer = client_writer_buffered.writer(); @@ -231,8 +253,56 @@ pub fn sendHTTP(this: *HTTPClient, body: []const u8, body_out_str: *MutableStrin try client_writer.writeAll(body); } - var client_reader = client.reader(SOCKET_FLAGS); + var client_reader = this.tcp_client.reader(SOCKET_FLAGS); + return this.processResponse( + false, + @TypeOf(client_reader), + client_reader, + body_out_str, + ); +} + +const ZlibPool = struct { + lock: Lock = Lock.init(), + items: std.ArrayList(*MutableString), + allocator: *std.mem.Allocator, + pub var instance: ZlibPool = undefined; + pub var loaded: bool = false; + + pub fn init(allocator: *std.mem.Allocator) ZlibPool { + return ZlibPool{ + .allocator = allocator, + .items = std.ArrayList(*MutableString).init(allocator), + }; + } + + pub fn get(this: *ZlibPool) !*MutableString { + this.lock.lock(); + defer this.lock.unlock(); + switch (this.items.items.len) { + 0 => { + var mutable = try this.allocator.create(MutableString); + mutable.* = try MutableString.init(this.allocator, 0); + return mutable; + }, + else => { + return this.items.pop(); + }, + } + + return item; + } + + pub fn put(this: *ZlibPool, mutable: *MutableString) !void { + this.lock.lock(); + defer this.lock.unlock(); + mutable.reset(); + try this.items.append(mutable); + } +}; + +pub fn processResponse(this: *HTTPClient, comptime is_https: bool, comptime Client: type, client: Client, body_out_str: *MutableString) !picohttp.Response { var response: picohttp.Response = undefined; { @@ -241,7 +311,10 @@ pub fn sendHTTP(this: *HTTPClient, body: []const u8, body_out_str: *MutableStrin var response_length: usize = 0; restart: while (req_buf_read != 0) { - req_buf_read = try client_reader.read(&http_req_buf); + req_buf_read = try client.read(if (comptime is_https) + &http_req_buf + else + http_req_buf[req_buf_len..]); req_buf_len += req_buf_read; var request_buffer = http_req_buf[0..req_buf_len]; @@ -262,17 +335,27 @@ pub fn sendHTTP(this: *HTTPClient, body: []const u8, body_out_str: *MutableStrin body_out_str.reset(); var content_length: u32 = 0; + var encoding = Encoding.identity; + for (response.headers) |header| { + if (this.verbose) { + Output.prettyErrorln("Response: {s}", .{response}); + } + switch (hashHeaderName(header.name)) { content_length_header_hash => { content_length = std.fmt.parseInt(u32, header.value, 10) catch 0; - // Always write a sentinel - try body_out_str.inflate(content_length + 1); + try body_out_str.inflate(content_length); body_out_str.list.expandToCapacity(); - body_out_str.list.items[content_length] = 0; }, content_encoding_hash => { - return error.UnsupportedEncoding; + if (strings.eqlComptime(header.value, "gzip")) { + encoding = Encoding.gzip; + } else if (strings.eqlComptime(header.value, "deflate")) { + encoding = Encoding.deflate; + } else if (!strings.eqlComptime(header.value, "identity")) { + return error.UnsupportedEncoding; + } }, else => {}, } @@ -283,22 +366,62 @@ pub fn sendHTTP(this: *HTTPClient, body: []const u8, body_out_str: *MutableStrin var remainder = http_req_buf[@intCast(u32, response.bytes_read)..]; remainder = remainder[0..std.math.min(remainder.len, content_length)]; + const Zlib = @import("./zlib.zig"); + + var buffer: *MutableString = body_out_str; + + switch (encoding) { + Encoding.gzip, Encoding.deflate => { + if (!ZlibPool.loaded) { + ZlibPool.instance = ZlibPool.init(default_allocator); + ZlibPool.loaded = true; + } + + buffer = try ZlibPool.instance.get(); + if (buffer.list.capacity < remaining_content_length) { + try buffer.list.ensureUnusedCapacity(buffer.allocator, remaining_content_length); + } + buffer.list.items = buffer.list.items.ptr[0..remaining_content_length]; + }, + else => {}, + } + var body_size: usize = 0; - if (remainder.len > 0) { - std.mem.copy(u8, body_out_str.list.items, remainder); - body_size = @intCast(u32, remainder.len); - remaining_content_length -= @intCast(u32, remainder.len); + if (comptime !is_https) { + if (remainder.len > 0) { + std.mem.copy(u8, buffer.list.items, remainder); + body_size = @intCast(u32, remainder.len); + remaining_content_length -= @intCast(u32, remainder.len); + } } while (remaining_content_length > 0) { - const size = @intCast(u32, try client.read(body_out_str.list.items[body_size..], SOCKET_FLAGS)); + const size = @intCast(u32, try client.read( + buffer.list.items[body_size..], + )); if (size == 0) break; body_size += size; remaining_content_length -= size; } - body_out_str.list.items.len = body_size; + buffer.list.shrinkRetainingCapacity(body_size); + + switch (encoding) { + Encoding.gzip, Encoding.deflate => { + body_out_str.list.expandToCapacity(); + defer ZlibPool.instance.put(buffer) catch unreachable; + var reader = try Zlib.ZlibReaderArrayList.init(buffer.list.items, &body_out_str.list, default_allocator); + reader.readAll() catch |err| { + if (reader.errorMessage()) |msg| { + Output.prettyErrorln("<r><red>Zlib error<r>: <b>{s}<r>", .{msg}); + Output.flush(); + } + return err; + }; + }, + else => {}, + } } return response; @@ -352,78 +475,75 @@ pub fn sendHTTPS(this: *HTTPClient, body_str: []const u8, body_out_str: *Mutable try client_writer.writeAll(body); } - var client_reader = client.reader(); - var req_buf_len = try client_reader.readAll(&http_req_buf); - var request_buffer = http_req_buf[0..req_buf_len]; - var response: picohttp.Response = undefined; + var reader = client.reader(); - { - var response_length: usize = 0; - restart: while (true) { - response = picohttp.Response.parseParts(request_buffer, &response_headers_buf, &response_length) catch |err| { - switch (err) { - error.ShortRead => { - continue :restart; - }, - else => { - return err; - }, - } - }; - break :restart; - } - } + return try this.processResponse(true, @TypeOf(&reader), &reader, body_out_str); +} - body_out_str.reset(); - var content_length: u32 = 0; - for (response.headers) |header| { - if (this.verbose) { - Output.prettyErrorln("Response: {s}", .{response}); - } +// zig test src/http_client.zig --test-filter "sendHTTP - only" -lc -lc++ /Users/jarred/Code/bun/src/deps/zlib/libz.a /Users/jarred/Code/bun/src/deps/picohttpparser.o --cache-dir /Users/jarred/Code/bun/zig-cache --global-cache-dir /Users/jarred/.cache/zig --name bun --pkg-begin clap /Users/jarred/Code/bun/src/deps/zig-clap/clap.zig --pkg-end --pkg-begin picohttp /Users/jarred/Code/bun/src/deps/picohttp.zig --pkg-end --pkg-begin iguanaTLS /Users/jarred/Code/bun/src/deps/iguanaTLS/src/main.zig --pkg-end -I /Users/jarred/Code/bun/src/deps -I /Users/jarred/Code/bun/src/deps/mimalloc -I /usr/local/opt/icu4c/include -L src/deps/mimalloc -L /usr/local/opt/icu4c/lib --main-pkg-path /Users/jarred/Code/bun --enable-cache -femit-bin=zig-out/bin/test --test-no-exec +test "sendHTTP - only" { + Output.initTest(); + defer Output.flush(); - switch (hashHeaderName(header.name)) { - content_length_header_hash => { - content_length = std.fmt.parseInt(u32, header.value, 10) catch 0; - try body_out_str.inflate(content_length); - body_out_str.list.expandToCapacity(); - }, - content_encoding_hash => { - return error.UnsupportedEncoding; - }, - else => {}, - } - } + var headers = try std.heap.c_allocator.create(Headers); + headers.* = Headers{ + .entries = @TypeOf(headers.entries){}, + .buf = @TypeOf(headers.buf){}, + .used = 0, + .allocator = std.heap.c_allocator, + }; - if (content_length > 0) { - var remaining_content_length = content_length; - var remainder = http_req_buf[@intCast(u32, response.bytes_read)..]; - remainder = remainder[0..std.math.min(remainder.len, content_length)]; + // headers.appendHeader("X-What", "ok", true, true, false); + headers.appendHeader("Accept-Encoding", "identity", true, true, false); - var body_size: usize = 0; - if (remainder.len > 0) { - std.mem.copy(u8, body_out_str.list.items, remainder); - body_size = @intCast(u32, remainder.len); - remaining_content_length -= @intCast(u32, remainder.len); - } + var client = HTTPClient.init( + std.heap.c_allocator, + .GET, + URL.parse("http://example.com/"), + headers.entries, + headers.buf.items, + ); + var body_out_str = try MutableString.init(std.heap.c_allocator, 0); + var response = try client.sendHTTP("", &body_out_str); + try std.testing.expectEqual(response.status_code, 200); + try std.testing.expectEqual(body_out_str.list.items.len, 1256); + try std.testing.expectEqualStrings(body_out_str.list.items, @embedFile("fixtures_example.com.html")); +} - while (remaining_content_length > 0) { - const size = @intCast(u32, try client.read( - body_out_str.list.items[body_size..], - )); - if (size == 0) break; +// zig test src/http_client.zig --test-filter "sendHTTP - gzip" -lc -lc++ /Users/jarred/Code/bun/src/deps/zlib/libz.a /Users/jarred/Code/bun/src/deps/picohttpparser.o --cache-dir /Users/jarred/Code/bun/zig-cache --global-cache-dir /Users/jarred/.cache/zig --name bun --pkg-begin clap /Users/jarred/Code/bun/src/deps/zig-clap/clap.zig --pkg-end --pkg-begin picohttp /Users/jarred/Code/bun/src/deps/picohttp.zig --pkg-end --pkg-begin iguanaTLS /Users/jarred/Code/bun/src/deps/iguanaTLS/src/main.zig --pkg-end -I /Users/jarred/Code/bun/src/deps -I /Users/jarred/Code/bun/src/deps/mimalloc -I /usr/local/opt/icu4c/include -L src/deps/mimalloc -L /usr/local/opt/icu4c/lib --main-pkg-path /Users/jarred/Code/bun --enable-cache -femit-bin=zig-out/bin/test --test-no-exec +test "sendHTTP - gzip" { + Output.initTest(); + defer Output.flush(); - body_size += size; - remaining_content_length -= size; - } + var headers = try std.heap.c_allocator.create(Headers); + headers.* = Headers{ + .entries = @TypeOf(headers.entries){}, + .buf = @TypeOf(headers.buf){}, + .used = 0, + .allocator = std.heap.c_allocator, + }; - body_out_str.list.shrinkRetainingCapacity(body_size); - } + // headers.appendHeader("X-What", "ok", true, true, false); + headers.appendHeader("Accept-Encoding", "gzip", true, true, false); - return response; + var client = HTTPClient.init( + std.heap.c_allocator, + .GET, + URL.parse("http://example.com/"), + headers.entries, + headers.buf.items, + ); + var body_out_str = try MutableString.init(std.heap.c_allocator, 0); + var response = try client.sendHTTP("", &body_out_str); + try std.testing.expectEqual(response.status_code, 200); + try std.testing.expectEqualStrings(body_out_str.list.items, @embedFile("fixtures_example.com.html")); } -// zig test src/http_client.zig --test-filter "sendHTTP" -lc -lc++ /Users/jarred/Code/bun/src/deps/picohttpparser.o --cache-dir /Users/jarred/Code/bun/zig-cache --global-cache-dir /Users/jarred/.cache/zig --name bun --pkg-begin clap /Users/jarred/Code/bun/src/deps/zig-clap/clap.zig --pkg-end --pkg-begin picohttp /Users/jarred/Code/bun/src/deps/picohttp.zig --pkg-end --pkg-begin iguanaTLS /Users/jarred/Code/bun/src/deps/iguanaTLS/src/main.zig --pkg-end -I /Users/jarred/Code/bun/src/deps -I /Users/jarred/Code/bun/src/deps/mimalloc -I /usr/local/opt/icu4c/include -L src/deps/mimalloc -L /usr/local/opt/icu4c/lib --main-pkg-path /Users/jarred/Code/bun --enable-cache -test "sendHTTP" { +// zig test src/http_client.zig --test-filter "sendHTTPS - identity" -lc -lc++ /Users/jarred/Code/bun/src/deps/zlib/libz.a /Users/jarred/Code/bun/src/deps/picohttpparser.o --cache-dir /Users/jarred/Code/bun/zig-cache --global-cache-dir /Users/jarred/.cache/zig --name bun --pkg-begin clap /Users/jarred/Code/bun/src/deps/zig-clap/clap.zig --pkg-end --pkg-begin picohttp /Users/jarred/Code/bun/src/deps/picohttp.zig --pkg-end --pkg-begin iguanaTLS /Users/jarred/Code/bun/src/deps/iguanaTLS/src/main.zig --pkg-end -I /Users/jarred/Code/bun/src/deps -I /Users/jarred/Code/bun/src/deps/mimalloc -I /usr/local/opt/icu4c/include -L src/deps/mimalloc -L /usr/local/opt/icu4c/lib --main-pkg-path /Users/jarred/Code/bun --enable-cache -femit-bin=zig-out/bin/test --test-no-exec +test "sendHTTPS - identity" { + Output.initTest(); + defer Output.flush(); + var headers = try std.heap.c_allocator.create(Headers); headers.* = Headers{ .entries = @TypeOf(headers.entries){}, @@ -433,22 +553,26 @@ test "sendHTTP" { }; headers.appendHeader("X-What", "ok", true, true, false); + headers.appendHeader("Accept-Encoding", "identity", true, true, false); var client = HTTPClient.init( std.heap.c_allocator, .GET, - URL.parse("http://example.com/"), + URL.parse("https://example.com/"), headers.entries, headers.buf.items, ); var body_out_str = try MutableString.init(std.heap.c_allocator, 0); - var response = try client.sendHTTP("", &body_out_str); + var response = try client.sendHTTPS("", &body_out_str); try std.testing.expectEqual(response.status_code, 200); - try std.testing.expectEqual(body_out_str.list.items.len, 1256); + try std.testing.expectEqualStrings(body_out_str.list.items, @embedFile("fixtures_example.com.html")); } -// zig test src/http_client.zig --test-filter "sendHTTPS" -lc -lc++ /Users/jarred/Code/bun/src/deps/picohttpparser.o --cache-dir /Users/jarred/Code/bun/zig-cache --global-cache-dir /Users/jarred/.cache/zig --name bun --pkg-begin clap /Users/jarred/Code/bun/src/deps/zig-clap/clap.zig --pkg-end --pkg-begin picohttp /Users/jarred/Code/bun/src/deps/picohttp.zig --pkg-end --pkg-begin iguanaTLS /Users/jarred/Code/bun/src/deps/iguanaTLS/src/main.zig --pkg-end -I /Users/jarred/Code/bun/src/deps -I /Users/jarred/Code/bun/src/deps/mimalloc -I /usr/local/opt/icu4c/include -L src/deps/mimalloc -L /usr/local/opt/icu4c/lib --main-pkg-path /Users/jarred/Code/bun --enable-cache -test "sendHTTPS" { +// zig test src/http_client.zig --test-filter "sendHTTPS - gzip" -lc -lc++ /Users/jarred/Code/bun/src/deps/zlib/libz.a /Users/jarred/Code/bun/src/deps/picohttpparser.o --cache-dir /Users/jarred/Code/bun/zig-cache --global-cache-dir /Users/jarred/.cache/zig --name bun --pkg-begin clap /Users/jarred/Code/bun/src/deps/zig-clap/clap.zig --pkg-end --pkg-begin picohttp /Users/jarred/Code/bun/src/deps/picohttp.zig --pkg-end --pkg-begin iguanaTLS /Users/jarred/Code/bun/src/deps/iguanaTLS/src/main.zig --pkg-end -I /Users/jarred/Code/bun/src/deps -I /Users/jarred/Code/bun/src/deps/mimalloc -I /usr/local/opt/icu4c/include -L src/deps/mimalloc -L /usr/local/opt/icu4c/lib --main-pkg-path /Users/jarred/Code/bun --enable-cache -femit-bin=zig-out/bin/test --test-no-exec +test "sendHTTPS - gzip" { + Output.initTest(); + defer Output.flush(); + var headers = try std.heap.c_allocator.create(Headers); headers.* = Headers{ .entries = @TypeOf(headers.entries){}, @@ -457,17 +581,19 @@ test "sendHTTPS" { .allocator = std.heap.c_allocator, }; - headers.appendHeader("X-What", "ok", true, true, false); + headers.appendHeader("Accept-Encoding", "gzip", false, false, false); var client = HTTPClient.init( std.heap.c_allocator, .GET, - URL.parse("https://hookb.in/aBnOOWN677UXQ9kkQ2g3"), + URL.parse("https://example.com/"), headers.entries, headers.buf.items, ); var body_out_str = try MutableString.init(std.heap.c_allocator, 0); var response = try client.sendHTTPS("", &body_out_str); try std.testing.expectEqual(response.status_code, 200); - try std.testing.expectEqual(body_out_str.list.items.len, 1256); + try std.testing.expectEqualStrings(body_out_str.list.items, @embedFile("fixtures_example.com.html")); } + +// zig test src/http_client.zig --test-filter "sendHTTP" -lc -lc++ /Users/jarred/Code/bun/src/deps/zlib/libz.a /Users/jarred/Code/bun/src/deps/picohttpparser.o --cache-dir /Users/jarred/Code/bun/zig-cache --global-cache-dir /Users/jarred/.cache/zig --name bun --pkg-begin clap /Users/jarred/Code/bun/src/deps/zig-clap/clap.zig --pkg-end --pkg-begin picohttp /Users/jarred/Code/bun/src/deps/picohttp.zig --pkg-end --pkg-begin iguanaTLS /Users/jarred/Code/bun/src/deps/iguanaTLS/src/main.zig --pkg-end -I /Users/jarred/Code/bun/src/deps -I /Users/jarred/Code/bun/src/deps/mimalloc -I /usr/local/opt/icu4c/include -L src/deps/mimalloc -L /usr/local/opt/icu4c/lib --main-pkg-path /Users/jarred/Code/bun --enable-cache -femit-bin=zig-out/bin/test diff --git a/src/linker.zig b/src/linker.zig index ecc6951d8..10f611a81 100644 --- a/src/linker.zig +++ b/src/linker.zig @@ -93,7 +93,6 @@ pub const Linker = struct { return hashed_result.value_ptr.*; } } - var file: std.fs.File = if (fd) |_fd| std.fs.File{ .handle = _fd } else try std.fs.openFileAbsolute(file_path.text, .{ .read = true }); Fs.FileSystem.setMaxFd(file.handle); var modkey = try Fs.FileSystem.RealFS.ModKey.generate(&this.fs.fs, file_path.text, file); diff --git a/src/resolver/resolver.zig b/src/resolver/resolver.zig index 4b766b81e..1434dc273 100644 --- a/src/resolver/resolver.zig +++ b/src/resolver/resolver.zig @@ -14,7 +14,7 @@ const BrowserMap = @import("./package_json.zig").BrowserMap; const CacheSet = cache.Set; usingnamespace @import("./data_url.zig"); pub const DirInfo = @import("./dir_info.zig"); -const HTTPWatcher = @import("../http.zig").Watcher; +const HTTPWatcher = if (isTest) void else @import("../http.zig").Watcher; const Wyhash = std.hash.Wyhash; const ResolvePath = @import("./resolve_path.zig"); const NodeFallbackModules = @import("../node_fallbacks.zig"); diff --git a/src/string_types.zig b/src/string_types.zig index 02f692629..83e47953e 100644 --- a/src/string_types.zig +++ b/src/string_types.zig @@ -1,5 +1,6 @@ const std = @import("std"); pub const string = []const u8; +pub const stringZ = [:0]const u8; pub const stringMutable = []u8; pub const CodePoint = i32; diff --git a/src/zee_alloc.zig b/src/zee_alloc.zig deleted file mode 100644 index b1f56e726..000000000 --- a/src/zee_alloc.zig +++ /dev/null @@ -1,667 +0,0 @@ -const std = @import("std"); - -const Allocator = std.mem.Allocator; - -pub const Config = struct { - /// ZeeAlloc will request a multiple of `slab_size` from the backing allocator. - /// **Must** be a power of two. - slab_size: usize = std.math.max(std.mem.page_size, 65536), // 64K ought to be enough for everybody - - /// **Must** be a power of two. - min_element_size: usize = 4, - - fn maxElementSize(conf: Config) usize { - // Scientifically derived value - return conf.slab_size / 4; - } -}; - -pub const ZeeAllocDefaults = ZeeAlloc(Config{}); - -pub fn ZeeAlloc(comptime conf: Config) type { - return struct { - const Self = @This(); - - const min_shift_size = unsafeLog2(usize, conf.min_element_size); - const max_shift_size = unsafeLog2(usize, conf.maxElementSize()); - const total_slabs = max_shift_size - min_shift_size + 1; - - /// The definitive™ way of using `ZeeAlloc` - pub const wasm_allocator = &_wasm.allocator; - pub var _wasm = init(&wasm_page_allocator); - - jumbo: ?*Slab = null, - slabs: [total_slabs]?*Slab = [_]?*Slab{null} ** total_slabs, - backing_allocator: *std.mem.Allocator, - - allocator: Allocator = Allocator{ - .allocFn = alloc, - .resizeFn = resize, - }, - - const Slab = extern struct { - const header_size = 2 * @sizeOf(usize); - const payload_alignment = header_size; - - next: ?*Slab align(conf.slab_size), - element_size: usize, - pad: [conf.slab_size - header_size]u8 align(payload_alignment), - - fn init(element_size: usize) Slab { - var result: Slab = undefined; - result.reset(element_size); - return result; - } - - fn reset(self: *Slab, element_size: usize) void { - self.next = null; - self.element_size = element_size; - - const blocks = self.freeBlocks(); - for (blocks) |*block| { - block.* = std.math.maxInt(u64); - } - - const remaining_bits = @truncate(u6, (self.elementCount() - self.dataOffset()) % 64); - // TODO: detect overflow - blocks[blocks.len - 1] = (@as(u64, 1) << remaining_bits) - 1; - } - - fn fromMemPtr(ptr: [*]u8) *Slab { - const addr = std.mem.alignBackward(@ptrToInt(ptr), conf.slab_size); - return @intToPtr(*Slab, addr); - } - - const detached_signal = @intToPtr(*align(1) Slab, 0xaaaa); - fn markDetached(self: *Slab) void { - // Salt the earth - const raw_next = @ptrCast(*usize, &self.next); - raw_next.* = @ptrToInt(detached_signal); - } - - fn isDetached(self: Slab) bool { - return self.next == detached_signal; - } - - fn freeBlocks(self: *Slab) []u64 { - const count = divCeil(usize, self.elementCount(), 64); - const ptr = @ptrCast([*]u64, &self.pad); - return ptr[0..count]; - } - - fn totalFree(self: *Slab) usize { - var i: usize = 0; - for (self.freeBlocks()) |block| { - i += @popCount(u64, block); - } - return i; - } - - const UsizeShift = std.meta.Int(.unsigned, @bitSizeOf(std.math.Log2Int(usize)) - 1); - fn elementSizeShift(self: Slab) UsizeShift { - return @truncate(UsizeShift, @ctz(usize, self.element_size)); - } - - fn elementCount(self: Slab) usize { - return conf.slab_size >> self.elementSizeShift(); - } - - fn dataOffset(self: Slab) usize { - const BITS_PER_BYTE = 8; - return 1 + ((conf.slab_size / BITS_PER_BYTE) >> self.elementSizeShift() >> self.elementSizeShift()); - } - - fn elementAt(self: *Slab, idx: usize) []u8 { - std.debug.assert(idx >= self.dataOffset()); - std.debug.assert(idx < self.elementCount()); - - const bytes = std.mem.asBytes(self); - return bytes[idx << self.elementSizeShift() ..][0..self.element_size]; - } - - fn elementIdx(self: *Slab, element: []u8) usize { - std.debug.assert(element.len <= self.element_size); - const diff = @ptrToInt(element.ptr) - @ptrToInt(self); - std.debug.assert(diff % self.element_size == 0); - - return diff >> self.elementSizeShift(); - } - - fn alloc(self: *Slab) ![]u8 { - for (self.freeBlocks()) |*block, i| { - const bit = @ctz(u64, block.*); - if (bit != 64) { - const index = 64 * i + bit; - - const mask = @as(u64, 1) << @intCast(u6, bit); - block.* &= ~mask; - - return self.elementAt(index + self.dataOffset()); - } - } - - return error.OutOfMemory; - } - - fn free(self: *Slab, element: []u8) void { - const index = self.elementIdx(element) - self.dataOffset(); - - const block = &self.freeBlocks()[index / 64]; - const mask = @as(u64, 1) << @truncate(u6, index); - std.debug.assert(mask & block.* == 0); - block.* |= mask; - } - }; - - pub fn init(allocator: *std.mem.Allocator) Self { - return .{ .backing_allocator = allocator }; - } - - pub fn freeAll(self: *Self) void { - { - var iter = self.jumbo; - while (iter) |node| { - iter = node.next; - const bytes = @ptrCast([*]u8, node); - self.backing_allocator.free(bytes[0..node.element_size]); - } - } - - for (self.slabs) |root| { - var iter = root; - while (iter) |node| { - iter = node.next; - self.backing_allocator.destroy(node); - } - } - } - - pub fn deinit(self: *Self) void { - self.freeAll(); - self.* = undefined; - } - - fn isJumbo(value: usize) bool { - return value > conf.slab_size / 4; - } - - fn padToSize(memsize: usize) usize { - if (isJumbo(memsize)) { - return std.mem.alignForward(memsize + Slab.header_size, conf.slab_size); - } else { - return std.math.max(conf.min_element_size, ceilPowerOfTwo(usize, memsize)); - } - } - - fn unsafeLog2(comptime T: type, val: T) T { - std.debug.assert(ceilPowerOfTwo(T, val) == val); - return @ctz(T, val); - } - - fn findSlabIndex(padded_size: usize) usize { - return unsafeLog2(usize, padded_size) - min_shift_size; - } - - fn allocJumbo(self: *Self, padded_size: usize, ptr_align: usize) ![*]u8 { - if (ptr_align > Slab.payload_alignment) { - return error.OutOfMemory; - } - - const slab: *Slab = blk: { - var prev = @ptrCast(*align(@alignOf(Self)) Slab, self); - while (prev.next) |curr| : (prev = curr) { - if (curr.element_size == padded_size) { - prev.next = curr.next; - break :blk curr; - } - } - - const new_frame = try self.backing_allocator.allocAdvanced(u8, conf.slab_size, padded_size, .exact); - const synth_slab = @ptrCast(*Slab, new_frame.ptr); - synth_slab.element_size = padded_size; - break :blk synth_slab; - }; - slab.markDetached(); - return @ptrCast([*]u8, &slab.pad); - } - - fn allocSlab(self: *Self, element_size: usize, ptr_align: usize) ![*]u8 { - if (ptr_align > element_size) { - return error.OutOfMemory; - } - - const idx = findSlabIndex(element_size); - const slab = self.slabs[idx] orelse blk: { - const new_slab = try self.backing_allocator.create(Slab); - new_slab.reset(element_size); - self.slabs[idx] = new_slab; - break :blk new_slab; - }; - - const result = slab.alloc() catch unreachable; - if (slab.totalFree() == 0) { - self.slabs[idx] = slab.next; - slab.markDetached(); - } - - return result.ptr; - } - - fn alloc(allocator: *Allocator, n: usize, ptr_align: u29, len_align: u29, ret_addr: usize) Allocator.Error![]u8 { - const self = @fieldParentPtr(Self, "allocator", allocator); - - const padded_size = padToSize(n); - const ptr: [*]u8 = if (isJumbo(n)) - try self.allocJumbo(padded_size, ptr_align) - else - try self.allocSlab(padded_size, ptr_align); - - return ptr[0..std.mem.alignAllocLen(padded_size, n, len_align)]; - } - - fn resize(allocator: *Allocator, buf: []u8, buf_align: u29, new_size: usize, len_align: u29, ret_addr: usize) Allocator.Error!usize { - const self = @fieldParentPtr(Self, "allocator", allocator); - - const slab = Slab.fromMemPtr(buf.ptr); - if (new_size == 0) { - if (isJumbo(slab.element_size)) { - std.debug.assert(slab.isDetached()); - slab.next = self.jumbo; - self.jumbo = slab; - } else { - slab.free(buf); - if (slab.isDetached()) { - const idx = findSlabIndex(slab.element_size); - slab.next = self.slabs[idx]; - self.slabs[idx] = slab; - } - } - return 0; - } - - const padded_new_size = padToSize(new_size); - if (padded_new_size > slab.element_size) { - return error.OutOfMemory; - } - - return std.mem.alignAllocLen(padded_new_size, new_size, len_align); - } - }; -} - -pub var wasm_page_allocator = init: { - if (!std.builtin.target.isWasm()) { - @compileError("wasm allocator is only available for wasm32 arch"); - } - - // std.heap.WasmPageAllocator is designed for reusing pages - // We never free, so this lets us stay super small - const WasmPageAllocator = struct { - fn alloc(allocator: *Allocator, n: usize, alignment: u29, len_align: u29, ret_addr: usize) Allocator.Error![]u8 { - const is_debug = std.builtin.mode == .Debug; - @setRuntimeSafety(is_debug); - std.debug.assert(n % std.mem.page_size == 0); // Should only be allocating page size chunks - std.debug.assert(alignment % std.mem.page_size == 0); // Should only align to page_size increments - - const requested_page_count = @intCast(u32, n / std.mem.page_size); - const prev_page_count = @wasmMemoryGrow(0, requested_page_count); - if (prev_page_count < 0) { - return error.OutOfMemory; - } - - const start_ptr = @intToPtr([*]u8, @intCast(usize, prev_page_count) * std.mem.page_size); - return start_ptr[0..n]; - } - }; - - break :init Allocator{ - .allocFn = WasmPageAllocator.alloc, - .resizeFn = undefined, // Shouldn't be shrinking / freeing - }; -}; - -pub const ExportC = struct { - allocator: *std.mem.Allocator, - malloc: bool = true, - free: bool = true, - calloc: bool = false, - realloc: bool = false, - - pub fn run(comptime conf: ExportC) void { - const Funcs = struct { - fn malloc(size: usize) callconv(.C) ?*c_void { - if (size == 0) { - return null; - } - //const result = conf.allocator.alloc(u8, size) catch return null; - const result = conf.allocator.allocFn(conf.allocator, size, 1, 1, 0) catch return null; - return result.ptr; - } - fn calloc(num_elements: usize, element_size: usize) callconv(.C) ?*c_void { - const size = num_elements *% element_size; - const c_ptr = @call(.{ .modifier = .never_inline }, malloc, .{size}); - if (c_ptr) |ptr| { - const p = @ptrCast([*]u8, ptr); - @memset(p, 0, size); - } - return c_ptr; - } - fn realloc(c_ptr: ?*c_void, new_size: usize) callconv(.C) ?*c_void { - if (new_size == 0) { - @call(.{ .modifier = .never_inline }, free, .{c_ptr}); - return null; - } else if (c_ptr) |ptr| { - // Use a synthetic slice - const p = @ptrCast([*]u8, ptr); - const result = conf.allocator.realloc(p[0..1], new_size) catch return null; - return @ptrCast(*c_void, result.ptr); - } else { - return @call(.{ .modifier = .never_inline }, malloc, .{new_size}); - } - } - fn free(c_ptr: ?*c_void) callconv(.C) void { - if (c_ptr) |ptr| { - // Use a synthetic slice. zee_alloc will free via corresponding metadata. - const p = @ptrCast([*]u8, ptr); - //conf.allocator.free(p[0..1]); - _ = conf.allocator.resizeFn(conf.allocator, p[0..1], 0, 0, 0, 0) catch unreachable; - } - } - }; - - if (conf.malloc) { - @export(Funcs.malloc, .{ .name = "malloc" }); - } - if (conf.calloc) { - @export(Funcs.calloc, .{ .name = "calloc" }); - } - if (conf.realloc) { - @export(Funcs.realloc, .{ .name = "realloc" }); - } - if (conf.free) { - @export(Funcs.free, .{ .name = "free" }); - } - } -}; - -fn divCeil(comptime T: type, numerator: T, denominator: T) T { - return (numerator + denominator - 1) / denominator; -} - -// https://github.com/ziglang/zig/issues/2426 -fn ceilPowerOfTwo(comptime T: type, value: T) T { - std.debug.assert(value != 0); - const Shift = comptime std.math.Log2Int(T); - return @as(T, 1) << @intCast(Shift, @bitSizeOf(T) - @clz(T, value - 1)); -} - -test "divCeil" { - std.testing.expectEqual(@as(u32, 0), divCeil(u32, 0, 64)); - std.testing.expectEqual(@as(u32, 1), divCeil(u32, 1, 64)); - std.testing.expectEqual(@as(u32, 1), divCeil(u32, 64, 64)); - std.testing.expectEqual(@as(u32, 2), divCeil(u32, 65, 64)); -} - -test "Slab.init" { - { - const slab = ZeeAllocDefaults.Slab.init(16384); - std.testing.expectEqual(@as(usize, 16384), slab.element_size); - std.testing.expectEqual(@as(?*ZeeAllocDefaults.Slab, null), slab.next); - - const raw_ptr = @ptrCast(*const u64, &slab.pad); - std.testing.expectEqual((@as(u64, 1) << 3) - 1, raw_ptr.*); - } - - { - const slab = ZeeAllocDefaults.Slab.init(2048); - std.testing.expectEqual(@as(usize, 2048), slab.element_size); - std.testing.expectEqual(@as(?*ZeeAllocDefaults.Slab, null), slab.next); - - const raw_ptr = @ptrCast(*const u64, &slab.pad); - std.testing.expectEqual((@as(u64, 1) << 31) - 1, raw_ptr.*); - } - - const u64_max: u64 = std.math.maxInt(u64); - - { - const slab = ZeeAllocDefaults.Slab.init(256); - std.testing.expectEqual(@as(usize, 256), slab.element_size); - std.testing.expectEqual(@as(?*ZeeAllocDefaults.Slab, null), slab.next); - - const raw_ptr = @ptrCast([*]const u64, &slab.pad); - std.testing.expectEqual(u64_max, raw_ptr[0]); - std.testing.expectEqual(u64_max, raw_ptr[1]); - std.testing.expectEqual(u64_max, raw_ptr[2]); - std.testing.expectEqual((@as(u64, 1) << 63) - 1, raw_ptr[3]); - } -} - -test "Slab.elementAt" { - { - var slab = ZeeAllocDefaults.Slab.init(16384); - - var element = slab.elementAt(1); - std.testing.expectEqual(slab.element_size, element.len); - std.testing.expectEqual(1 * slab.element_size, @ptrToInt(element.ptr) - @ptrToInt(&slab)); - - element = slab.elementAt(2); - std.testing.expectEqual(slab.element_size, element.len); - std.testing.expectEqual(2 * slab.element_size, @ptrToInt(element.ptr) - @ptrToInt(&slab)); - - element = slab.elementAt(3); - std.testing.expectEqual(slab.element_size, element.len); - std.testing.expectEqual(3 * slab.element_size, @ptrToInt(element.ptr) - @ptrToInt(&slab)); - } - { - var slab = ZeeAllocDefaults.Slab.init(128); - - var element = slab.elementAt(1); - std.testing.expectEqual(slab.element_size, element.len); - std.testing.expectEqual(1 * slab.element_size, @ptrToInt(element.ptr) - @ptrToInt(&slab)); - - element = slab.elementAt(2); - std.testing.expectEqual(slab.element_size, element.len); - std.testing.expectEqual(2 * slab.element_size, @ptrToInt(element.ptr) - @ptrToInt(&slab)); - - element = slab.elementAt(3); - std.testing.expectEqual(slab.element_size, element.len); - std.testing.expectEqual(3 * slab.element_size, @ptrToInt(element.ptr) - @ptrToInt(&slab)); - } - { - var slab = ZeeAllocDefaults.Slab.init(64); - std.testing.expectEqual(@as(usize, 3), slab.dataOffset()); - - var element = slab.elementAt(3); - std.testing.expectEqual(slab.element_size, element.len); - std.testing.expectEqual(3 * slab.element_size, @ptrToInt(element.ptr) - @ptrToInt(&slab)); - - element = slab.elementAt(5); - std.testing.expectEqual(slab.element_size, element.len); - std.testing.expectEqual(5 * slab.element_size, @ptrToInt(element.ptr) - @ptrToInt(&slab)); - } - { - var slab = ZeeAllocDefaults.Slab.init(4); - std.testing.expectEqual(@as(usize, 513), slab.dataOffset()); - - var element = slab.elementAt(513); - std.testing.expectEqual(slab.element_size, element.len); - std.testing.expectEqual(513 * slab.element_size, @ptrToInt(element.ptr) - @ptrToInt(&slab)); - - element = slab.elementAt(1023); - std.testing.expectEqual(slab.element_size, element.len); - std.testing.expectEqual(1023 * slab.element_size, @ptrToInt(element.ptr) - @ptrToInt(&slab)); - } -} - -test "Slab.elementIdx" { - var slab = ZeeAllocDefaults.Slab.init(128); - - var element = slab.elementAt(1); - std.testing.expectEqual(@as(usize, 1), slab.elementIdx(element)); -} - -test "Slab.freeBlocks" { - { - var slab = ZeeAllocDefaults.Slab.init(16384); - - const blocks = slab.freeBlocks(); - std.testing.expectEqual(@as(usize, 1), blocks.len); - std.testing.expectEqual(@ptrToInt(&slab.pad), @ptrToInt(blocks.ptr)); - } - { - var slab = ZeeAllocDefaults.Slab.init(128); - - const blocks = slab.freeBlocks(); - std.testing.expectEqual(@as(usize, 8), blocks.len); - std.testing.expectEqual(@ptrToInt(&slab.pad), @ptrToInt(blocks.ptr)); - } -} - -test "Slab.alloc + free" { - var slab = ZeeAllocDefaults.Slab.init(16384); - - std.testing.expectEqual(@as(usize, 3), slab.totalFree()); - - const data0 = try slab.alloc(); - std.testing.expectEqual(@as(usize, 2), slab.totalFree()); - std.testing.expectEqual(@as(usize, 16384), data0.len); - - const data1 = try slab.alloc(); - std.testing.expectEqual(@as(usize, 1), slab.totalFree()); - std.testing.expectEqual(@as(usize, 16384), data1.len); - std.testing.expectEqual(@as(usize, 16384), @ptrToInt(data1.ptr) - @ptrToInt(data0.ptr)); - - const data2 = try slab.alloc(); - std.testing.expectEqual(@as(usize, 0), slab.totalFree()); - std.testing.expectEqual(@as(usize, 16384), data2.len); - std.testing.expectEqual(@as(usize, 16384), @ptrToInt(data2.ptr) - @ptrToInt(data1.ptr)); - - std.testing.expectError(error.OutOfMemory, slab.alloc()); - - { - slab.free(data2); - std.testing.expectEqual(@as(usize, 1), slab.totalFree()); - slab.free(data1); - std.testing.expectEqual(@as(usize, 2), slab.totalFree()); - slab.free(data0); - std.testing.expectEqual(@as(usize, 3), slab.totalFree()); - } -} - -test "padToSize" { - const page_size = 65536; - const header_size = 2 * @sizeOf(usize); - - std.testing.expectEqual(@as(usize, 4), ZeeAllocDefaults.padToSize(1)); - std.testing.expectEqual(@as(usize, 4), ZeeAllocDefaults.padToSize(4)); - std.testing.expectEqual(@as(usize, 8), ZeeAllocDefaults.padToSize(8)); - std.testing.expectEqual(@as(usize, 16), ZeeAllocDefaults.padToSize(9)); - std.testing.expectEqual(@as(usize, 16384), ZeeAllocDefaults.padToSize(16384)); -} - -test "alloc slabs" { - var zee_alloc = ZeeAllocDefaults.init(std.testing.allocator); - defer zee_alloc.deinit(); - - for (zee_alloc.slabs) |root| { - std.testing.expect(root == null); - } - - std.testing.expect(zee_alloc.slabs[0] == null); - const small = try zee_alloc.allocator.alloc(u8, 4); - std.testing.expect(zee_alloc.slabs[0] != null); - const smalls_before_free = zee_alloc.slabs[0].?.totalFree(); - zee_alloc.allocator.free(small); - std.testing.expectEqual(smalls_before_free + 1, zee_alloc.slabs[0].?.totalFree()); - - std.testing.expect(zee_alloc.slabs[12] == null); - const large = try zee_alloc.allocator.alloc(u8, 16384); - std.testing.expect(zee_alloc.slabs[12] != null); - const larges_before_free = zee_alloc.slabs[12].?.totalFree(); - zee_alloc.allocator.free(large); - std.testing.expectEqual(larges_before_free + 1, zee_alloc.slabs[12].?.totalFree()); -} - -test "alloc jumbo" { - var zee_alloc = ZeeAllocDefaults.init(std.testing.allocator); - defer zee_alloc.deinit(); - - std.testing.expect(zee_alloc.jumbo == null); - const first = try zee_alloc.allocator.alloc(u8, 32000); - std.testing.expect(zee_alloc.jumbo == null); - std.testing.expectEqual(@as(usize, ZeeAllocDefaults.Slab.header_size), @ptrToInt(first.ptr) % 65536); - zee_alloc.allocator.free(first); - std.testing.expect(zee_alloc.jumbo != null); - - const reuse = try zee_alloc.allocator.alloc(u8, 32000); - std.testing.expect(zee_alloc.jumbo == null); - std.testing.expectEqual(first.ptr, reuse.ptr); - zee_alloc.allocator.free(first); - std.testing.expect(zee_alloc.jumbo != null); -} - -test "functional tests" { - var zee_alloc = ZeeAllocDefaults.init(std.testing.allocator); - defer zee_alloc.deinit(); - - try std.heap.testAllocator(&zee_alloc.allocator); - try std.heap.testAllocatorAligned(&zee_alloc.allocator, 16); -} - -fn expectIllegalBehavior(context: anytype, comptime func: anytype) !void { - if (!@hasDecl(std.os.system, "fork") or !std.debug.runtime_safety) return; - - const child_pid = try std.os.fork(); - if (child_pid == 0) { - const null_fd = std.os.openZ("/dev/null", std.os.O_RDWR, 0) catch { - std.debug.print("Cannot open /dev/null\n", .{}); - std.os.exit(0); - }; - std.os.dup2(null_fd, std.io.getStdErr().handle) catch { - std.debug.print("Cannot close child process stderr\n", .{}); - std.os.exit(0); - }; - - func(context); // this should crash - std.os.exit(0); - } else { - const status = std.os.waitpid(child_pid, 0); - // Maybe we should use a fixed error code instead of checking status != 0 - if (status == 0) @panic("Expected illegal behavior but succeeded instead"); - } -} - -const AllocContext = struct { - allocator: *Allocator, - mem: []u8, - - fn init(allocator: *Allocator, mem: []u8) AllocContext { - return .{ .allocator = allocator, .mem = mem }; - } - - fn free(self: AllocContext) void { - self.allocator.free(self.mem); - } -}; - -test "double free" { - var zee_alloc = ZeeAllocDefaults.init(std.testing.allocator); - defer zee_alloc.deinit(); - - const mem = try zee_alloc.allocator.alloc(u8, 16); - zee_alloc.allocator.free(mem); - - const context = AllocContext.init(&zee_alloc.allocator, mem); - try expectIllegalBehavior(context, AllocContext.free); -} - -test "freeing non-owned memory" { - var zee_alloc = ZeeAllocDefaults.init(std.testing.allocator); - defer zee_alloc.deinit(); - - const mem = try std.testing.allocator.alloc(u8, 16); - defer std.testing.allocator.free(mem); - - const context = AllocContext.init(&zee_alloc.allocator, mem); - try expectIllegalBehavior(context, AllocContext.free); -} diff --git a/src/zlib.test.gz b/src/zlib.test.gz Binary files differnew file mode 100644 index 000000000..b62d2403b --- /dev/null +++ b/src/zlib.test.gz diff --git a/src/zlib.test.txt b/src/zlib.test.txt new file mode 100644 index 000000000..3085bd41f --- /dev/null +++ b/src/zlib.test.txt @@ -0,0 +1,651 @@ +/* minigzip.c -- simulate gzip using the zlib compression library + * Copyright (C) 1995-2006, 2010, 2011, 2016 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * minigzip is a minimal implementation of the gzip utility. This is + * only an example of using zlib and isn't meant to replace the + * full-featured gzip. No attempt is made to deal with file systems + * limiting names to 14 or 8+3 characters, etc... Error checking is + * very limited. So use minigzip only for testing; use gzip for the + * real thing. On MSDOS, use only on file names without extension + * or in pipe mode. + */ + +/* @(#) $Id$ */ + +#include "zlib.h" +#include <stdio.h> + +#ifdef STDC +# include <string.h> +# include <stdlib.h> +#endif + +#ifdef USE_MMAP +# include <sys/types.h> +# include <sys/mman.h> +# include <sys/stat.h> +#endif + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include <fcntl.h> +# include <io.h> +# ifdef UNDER_CE +# include <stdlib.h> +# endif +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +#if defined(_MSC_VER) && _MSC_VER < 1900 +# define snprintf _snprintf +#endif + +#ifdef VMS +# define unlink delete +# define GZ_SUFFIX "-gz" +#endif +#ifdef RISCOS +# define unlink remove +# define GZ_SUFFIX "-gz" +# define fileno(file) file->__file +#endif +#if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include <unix.h> /* for fileno */ +#endif + +#if !defined(Z_HAVE_UNISTD_H) && !defined(_LARGEFILE64_SOURCE) +#ifndef WIN32 /* unlink already in stdio.h for WIN32 */ + extern int unlink OF((const char *)); +#endif +#endif + +#if defined(UNDER_CE) +# include <windows.h> +# define perror(s) pwinerror(s) + +/* Map the Windows error number in ERROR to a locale-dependent error + message string and return a pointer to it. Typically, the values + for ERROR come from GetLastError. + + The string pointed to shall not be modified by the application, + but may be overwritten by a subsequent call to strwinerror + + The strwinerror function does not change the current setting + of GetLastError. */ + +static char *strwinerror (error) + DWORD error; +{ + static char buf[1024]; + + wchar_t *msgbuf; + DWORD lasterr = GetLastError(); + DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM + | FORMAT_MESSAGE_ALLOCATE_BUFFER, + NULL, + error, + 0, /* Default language */ + (LPVOID)&msgbuf, + 0, + NULL); + if (chars != 0) { + /* If there is an \r\n appended, zap it. */ + if (chars >= 2 + && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') { + chars -= 2; + msgbuf[chars] = 0; + } + + if (chars > sizeof (buf) - 1) { + chars = sizeof (buf) - 1; + msgbuf[chars] = 0; + } + + wcstombs(buf, msgbuf, chars + 1); + LocalFree(msgbuf); + } + else { + sprintf(buf, "unknown win32 error (%ld)", error); + } + + SetLastError(lasterr); + return buf; +} + +static void pwinerror (s) + const char *s; +{ + if (s && *s) + fprintf(stderr, "%s: %s\n", s, strwinerror(GetLastError ())); + else + fprintf(stderr, "%s\n", strwinerror(GetLastError ())); +} + +#endif /* UNDER_CE */ + +#ifndef GZ_SUFFIX +# define GZ_SUFFIX ".gz" +#endif +#define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1) + +#define BUFLEN 16384 +#define MAX_NAME_LEN 1024 + +#ifdef MAXSEG_64K +# define local static + /* Needed for systems with limitation on stack size. */ +#else +# define local +#endif + +#ifdef Z_SOLO +/* for Z_SOLO, create simplified gz* functions using deflate and inflate */ + +#if defined(Z_HAVE_UNISTD_H) || defined(Z_LARGE) +# include <unistd.h> /* for unlink() */ +#endif + +void *myalloc OF((void *, unsigned, unsigned)); +void myfree OF((void *, void *)); + +void *myalloc(q, n, m) + void *q; + unsigned n, m; +{ + (void)q; + return calloc(n, m); +} + +void myfree(q, p) + void *q, *p; +{ + (void)q; + free(p); +} + +typedef struct gzFile_s { + FILE *file; + int write; + int err; + char *msg; + z_stream strm; +} *gzFile; + +gzFile gzopen OF((const char *, const char *)); +gzFile gzdopen OF((int, const char *)); +gzFile gz_open OF((const char *, int, const char *)); + +gzFile gzopen(path, mode) +const char *path; +const char *mode; +{ + return gz_open(path, -1, mode); +} + +gzFile gzdopen(fd, mode) +int fd; +const char *mode; +{ + return gz_open(NULL, fd, mode); +} + +gzFile gz_open(path, fd, mode) + const char *path; + int fd; + const char *mode; +{ + gzFile gz; + int ret; + + gz = malloc(sizeof(struct gzFile_s)); + if (gz == NULL) + return NULL; + gz->write = strchr(mode, 'w') != NULL; + gz->strm.zalloc = myalloc; + gz->strm.zfree = myfree; + gz->strm.opaque = Z_NULL; + if (gz->write) + ret = deflateInit2(&(gz->strm), -1, 8, 15 + 16, 8, 0); + else { + gz->strm.next_in = 0; + gz->strm.avail_in = Z_NULL; + ret = inflateInit2(&(gz->strm), 15 + 16); + } + if (ret != Z_OK) { + free(gz); + return NULL; + } + gz->file = path == NULL ? fdopen(fd, gz->write ? "wb" : "rb") : + fopen(path, gz->write ? "wb" : "rb"); + if (gz->file == NULL) { + gz->write ? deflateEnd(&(gz->strm)) : inflateEnd(&(gz->strm)); + free(gz); + return NULL; + } + gz->err = 0; + gz->msg = ""; + return gz; +} + +int gzwrite OF((gzFile, const void *, unsigned)); + +int gzwrite(gz, buf, len) + gzFile gz; + const void *buf; + unsigned len; +{ + z_stream *strm; + unsigned char out[BUFLEN]; + + if (gz == NULL || !gz->write) + return 0; + strm = &(gz->strm); + strm->next_in = (void *)buf; + strm->avail_in = len; + do { + strm->next_out = out; + strm->avail_out = BUFLEN; + (void)deflate(strm, Z_NO_FLUSH); + fwrite(out, 1, BUFLEN - strm->avail_out, gz->file); + } while (strm->avail_out == 0); + return len; +} + +int gzread OF((gzFile, void *, unsigned)); + +int gzread(gz, buf, len) + gzFile gz; + void *buf; + unsigned len; +{ + int ret; + unsigned got; + unsigned char in[1]; + z_stream *strm; + + if (gz == NULL || gz->write) + return 0; + if (gz->err) + return 0; + strm = &(gz->strm); + strm->next_out = (void *)buf; + strm->avail_out = len; + do { + got = fread(in, 1, 1, gz->file); + if (got == 0) + break; + strm->next_in = in; + strm->avail_in = 1; + ret = inflate(strm, Z_NO_FLUSH); + if (ret == Z_DATA_ERROR) { + gz->err = Z_DATA_ERROR; + gz->msg = strm->msg; + return 0; + } + if (ret == Z_STREAM_END) + inflateReset(strm); + } while (strm->avail_out); + return len - strm->avail_out; +} + +int gzclose OF((gzFile)); + +int gzclose(gz) + gzFile gz; +{ + z_stream *strm; + unsigned char out[BUFLEN]; + + if (gz == NULL) + return Z_STREAM_ERROR; + strm = &(gz->strm); + if (gz->write) { + strm->next_in = Z_NULL; + strm->avail_in = 0; + do { + strm->next_out = out; + strm->avail_out = BUFLEN; + (void)deflate(strm, Z_FINISH); + fwrite(out, 1, BUFLEN - strm->avail_out, gz->file); + } while (strm->avail_out == 0); + deflateEnd(strm); + } + else + inflateEnd(strm); + fclose(gz->file); + free(gz); + return Z_OK; +} + +const char *gzerror OF((gzFile, int *)); + +const char *gzerror(gz, err) + gzFile gz; + int *err; +{ + *err = gz->err; + return gz->msg; +} + +#endif + +static char *prog; + +void error OF((const char *msg)); +void gz_compress OF((FILE *in, gzFile out)); +#ifdef USE_MMAP +int gz_compress_mmap OF((FILE *in, gzFile out)); +#endif +void gz_uncompress OF((gzFile in, FILE *out)); +void file_compress OF((char *file, char *mode)); +void file_uncompress OF((char *file)); +int main OF((int argc, char *argv[])); + +/* =========================================================================== + * Display error message and exit + */ +void error(msg) + const char *msg; +{ + fprintf(stderr, "%s: %s\n", prog, msg); + exit(1); +} + +/* =========================================================================== + * Compress input to output then close both files. + */ + +void gz_compress(in, out) + FILE *in; + gzFile out; +{ + local char buf[BUFLEN]; + int len; + int err; + +#ifdef USE_MMAP + /* Try first compressing with mmap. If mmap fails (minigzip used in a + * pipe), use the normal fread loop. + */ + if (gz_compress_mmap(in, out) == Z_OK) return; +#endif + for (;;) { + len = (int)fread(buf, 1, sizeof(buf), in); + if (ferror(in)) { + perror("fread"); + exit(1); + } + if (len == 0) break; + + if (gzwrite(out, buf, (unsigned)len) != len) error(gzerror(out, &err)); + } + fclose(in); + if (gzclose(out) != Z_OK) error("failed gzclose"); +} + +#ifdef USE_MMAP /* MMAP version, Miguel Albrecht <malbrech@eso.org> */ + +/* Try compressing the input file at once using mmap. Return Z_OK if + * if success, Z_ERRNO otherwise. + */ +int gz_compress_mmap(in, out) + FILE *in; + gzFile out; +{ + int len; + int err; + int ifd = fileno(in); + caddr_t buf; /* mmap'ed buffer for the entire input file */ + off_t buf_len; /* length of the input file */ + struct stat sb; + + /* Determine the size of the file, needed for mmap: */ + if (fstat(ifd, &sb) < 0) return Z_ERRNO; + buf_len = sb.st_size; + if (buf_len <= 0) return Z_ERRNO; + + /* Now do the actual mmap: */ + buf = mmap((caddr_t) 0, buf_len, PROT_READ, MAP_SHARED, ifd, (off_t)0); + if (buf == (caddr_t)(-1)) return Z_ERRNO; + + /* Compress the whole file at once: */ + len = gzwrite(out, (char *)buf, (unsigned)buf_len); + + if (len != (int)buf_len) error(gzerror(out, &err)); + + munmap(buf, buf_len); + fclose(in); + if (gzclose(out) != Z_OK) error("failed gzclose"); + return Z_OK; +} +#endif /* USE_MMAP */ + +/* =========================================================================== + * Uncompress input to output then close both files. + */ +void gz_uncompress(in, out) + gzFile in; + FILE *out; +{ + local char buf[BUFLEN]; + int len; + int err; + + for (;;) { + len = gzread(in, buf, sizeof(buf)); + if (len < 0) error (gzerror(in, &err)); + if (len == 0) break; + + if ((int)fwrite(buf, 1, (unsigned)len, out) != len) { + error("failed fwrite"); + } + } + if (fclose(out)) error("failed fclose"); + + if (gzclose(in) != Z_OK) error("failed gzclose"); +} + + +/* =========================================================================== + * Compress the given file: create a corresponding .gz file and remove the + * original. + */ +void file_compress(file, mode) + char *file; + char *mode; +{ + local char outfile[MAX_NAME_LEN]; + FILE *in; + gzFile out; + + if (strlen(file) + strlen(GZ_SUFFIX) >= sizeof(outfile)) { + fprintf(stderr, "%s: filename too long\n", prog); + exit(1); + } + +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(outfile, sizeof(outfile), "%s%s", file, GZ_SUFFIX); +#else + strcpy(outfile, file); + strcat(outfile, GZ_SUFFIX); +#endif + + in = fopen(file, "rb"); + if (in == NULL) { + perror(file); + exit(1); + } + out = gzopen(outfile, mode); + if (out == NULL) { + fprintf(stderr, "%s: can't gzopen %s\n", prog, outfile); + exit(1); + } + gz_compress(in, out); + + unlink(file); +} + + +/* =========================================================================== + * Uncompress the given file and remove the original. + */ +void file_uncompress(file) + char *file; +{ + local char buf[MAX_NAME_LEN]; + char *infile, *outfile; + FILE *out; + gzFile in; + unsigned len = strlen(file); + + if (len + strlen(GZ_SUFFIX) >= sizeof(buf)) { + fprintf(stderr, "%s: filename too long\n", prog); + exit(1); + } + +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(buf, sizeof(buf), "%s", file); +#else + strcpy(buf, file); +#endif + + if (len > SUFFIX_LEN && strcmp(file+len-SUFFIX_LEN, GZ_SUFFIX) == 0) { + infile = file; + outfile = buf; + outfile[len-3] = '\0'; + } else { + outfile = file; + infile = buf; +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(buf + len, sizeof(buf) - len, "%s", GZ_SUFFIX); +#else + strcat(infile, GZ_SUFFIX); +#endif + } + in = gzopen(infile, "rb"); + if (in == NULL) { + fprintf(stderr, "%s: can't gzopen %s\n", prog, infile); + exit(1); + } + out = fopen(outfile, "wb"); + if (out == NULL) { + perror(file); + exit(1); + } + + gz_uncompress(in, out); + + unlink(infile); +} + + +/* =========================================================================== + * Usage: minigzip [-c] [-d] [-f] [-h] [-r] [-1 to -9] [files...] + * -c : write to standard output + * -d : decompress + * -f : compress with Z_FILTERED + * -h : compress with Z_HUFFMAN_ONLY + * -r : compress with Z_RLE + * -1 to -9 : compression level + */ + +int main(argc, argv) + int argc; + char *argv[]; +{ + int copyout = 0; + int uncompr = 0; + gzFile file; + char *bname, outmode[20]; + +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(outmode, sizeof(outmode), "%s", "wb6 "); +#else + strcpy(outmode, "wb6 "); +#endif + + prog = argv[0]; + bname = strrchr(argv[0], '/'); + if (bname) + bname++; + else + bname = argv[0]; + argc--, argv++; + + if (!strcmp(bname, "gunzip")) + uncompr = 1; + else if (!strcmp(bname, "zcat")) + copyout = uncompr = 1; + + while (argc > 0) { + if (strcmp(*argv, "-c") == 0) + copyout = 1; + else if (strcmp(*argv, "-d") == 0) + uncompr = 1; + else if (strcmp(*argv, "-f") == 0) + outmode[3] = 'f'; + else if (strcmp(*argv, "-h") == 0) + outmode[3] = 'h'; + else if (strcmp(*argv, "-r") == 0) + outmode[3] = 'R'; + else if ((*argv)[0] == '-' && (*argv)[1] >= '1' && (*argv)[1] <= '9' && + (*argv)[2] == 0) + outmode[2] = (*argv)[1]; + else + break; + argc--, argv++; + } + if (outmode[3] == ' ') + outmode[3] = 0; + if (argc == 0) { + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); + if (uncompr) { + file = gzdopen(fileno(stdin), "rb"); + if (file == NULL) error("can't gzdopen stdin"); + gz_uncompress(file, stdout); + } else { + file = gzdopen(fileno(stdout), outmode); + if (file == NULL) error("can't gzdopen stdout"); + gz_compress(stdin, file); + } + } else { + if (copyout) { + SET_BINARY_MODE(stdout); + } + do { + if (uncompr) { + if (copyout) { + file = gzopen(*argv, "rb"); + if (file == NULL) + fprintf(stderr, "%s: can't gzopen %s\n", prog, *argv); + else + gz_uncompress(file, stdout); + } else { + file_uncompress(*argv); + } + } else { + if (copyout) { + FILE * in = fopen(*argv, "rb"); + + if (in == NULL) { + perror(*argv); + } else { + file = gzdopen(fileno(stdout), outmode); + if (file == NULL) error("can't gzdopen stdout"); + + gz_compress(in, file); + } + + } else { + file_compress(*argv, outmode); + } + } + } while (argv++, --argc); + } + return 0; +}
\ No newline at end of file diff --git a/src/zlib.zig b/src/zlib.zig new file mode 100644 index 000000000..a8b219b50 --- /dev/null +++ b/src/zlib.zig @@ -0,0 +1,573 @@ +// @link "deps/zlib/libz.a" + +const std = @import("std"); + +test "Zlib Read" { + const expected_text = @embedFile("./zlib.test.txt"); + const input = std.mem.span(@embedFile("./zlib.test.gz")); + std.debug.print("zStream Size: {d}", .{@sizeOf(zStream_struct)}); + var output = std.ArrayList(u8).init(std.heap.c_allocator); + var writer = output.writer(); + const ZlibReader = NewZlibReader(@TypeOf(&writer), 4096); + + var reader = try ZlibReader.init(&writer, input, std.heap.c_allocator); + defer reader.deinit(); + try reader.readAll(); + + try std.testing.expectEqualStrings(expected_text, output.items); +} + +test "ZlibArrayList Read" { + const expected_text = @embedFile("./zlib.test.txt"); + const input = std.mem.span(@embedFile("./zlib.test.gz")); + std.debug.print("zStream Size: {d}", .{@sizeOf(zStream_struct)}); + var list = std.ArrayListUnmanaged(u8){}; + try list.ensureUnusedCapacity(std.heap.c_allocator, 4096); + var reader = try ZlibReaderArrayList.init(input, &list, std.heap.c_allocator); + defer reader.deinit(); + try reader.readAll(); + + try std.testing.expectEqualStrings(expected_text, list.items); +} + +pub extern fn zlibVersion() [*c]const u8; + +pub extern fn compress(dest: [*c]Bytef, destLen: [*c]uLongf, source: [*c]const Bytef, sourceLen: uLong) c_int; +pub extern fn compress2(dest: [*c]Bytef, destLen: [*c]uLongf, source: [*c]const Bytef, sourceLen: uLong, level: c_int) c_int; +pub extern fn compressBound(sourceLen: uLong) uLong; +pub extern fn uncompress(dest: [*c]Bytef, destLen: [*c]uLongf, source: [*c]const Bytef, sourceLen: uLong) c_int; +pub const struct_gzFile_s = extern struct { + have: c_uint, + next: [*c]u8, + pos: c_long, +}; +pub const gzFile = [*c]struct_gzFile_s; + +// https://zlib.net/manual.html#Stream +const Byte = u8; +const uInt = u32; +const uLong = u64; +const Bytef = Byte; +const charf = u8; +const intf = c_int; +const uIntf = uInt; +const uLongf = uLong; +const voidpc = ?*const c_void; +const voidpf = ?*c_void; +const voidp = ?*c_void; +const z_crc_t = c_uint; + +// typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size)); +// typedef void (*free_func) OF((voidpf opaque, voidpf address)); + +pub const z_alloc_fn = ?fn (*c_void, uInt, uInt) callconv(.C) voidpf; +pub const z_free_fn = ?fn (*c_void, *c_void) callconv(.C) void; + +pub const struct_internal_state = extern struct { + dummy: c_int, +}; +// typedef struct z_stream_s { +// z_const Bytef *next_in; /* next input byte */ +// uInt avail_in; /* number of bytes available at next_in */ +// uLong total_in; /* total number of input bytes read so far */ + +// Bytef *next_out; /* next output byte will go here */ +// uInt avail_out; /* remaining free space at next_out */ +// uLong total_out; /* total number of bytes output so far */ + +// z_const char *msg; /* last error message, NULL if no error */ +// struct internal_state FAR *state; /* not visible by applications */ + +// alloc_func zalloc; /* used to allocate the internal state */ +// free_func zfree; /* used to free the internal state */ +// voidpf opaque; /* private data object passed to zalloc and zfree */ + +// int data_type; /* best guess about the data type: binary or text +// for deflate, or the decoding state for inflate */ +// uLong adler; /* Adler-32 or CRC-32 value of the uncompressed data */ +// uLong reserved; /* reserved for future use */ +// } z_stream; + +pub const zStream_struct = extern struct { + /// next input byte + next_in: [*c]const u8, + /// number of bytes available at next_in + avail_in: uInt, + /// total number of input bytes read so far + total_in: uLong, + + /// next output byte will go here + next_out: [*c]u8, + /// remaining free space at next_out + avail_out: uInt, + /// total number of bytes output so far + total_out: uLong, + + /// last error message, NULL if no error + err_msg: [*c]const u8, + /// not visible by applications + internal_state: ?*struct_internal_state, + + /// used to allocate the internal state + alloc_func: z_alloc_fn, + /// used to free the internal state + free_func: z_free_fn, + /// private data object passed to zalloc and zfree + user_data: *c_void, + + /// best guess about the data type: binary or text for deflate, or the decoding state for inflate + data_type: DataType, + + ///Adler-32 or CRC-32 value of the uncompressed data + adler: uLong, + /// reserved for future use + reserved: uLong, +}; + +pub const z_stream = zStream_struct; +pub const z_streamp = [*c]z_stream; + +// #define Z_BINARY 0 +// #define Z_TEXT 1 +// #define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +// #define Z_UNKNOWN 2 +pub const DataType = enum(c_int) { + Binary = 0, + Text = 1, + Unknown = 2, +}; + +// #define Z_OK 0 +// #define Z_STREAM_END 1 +// #define Z_NEED_DICT 2 +// #define Z_ERRNO (-1) +// #define Z_STREAM_ERROR (-2) +// #define Z_DATA_ERROR (-3) +// #define Z_MEM_ERROR (-4) +// #define Z_BUF_ERROR (-5) +// #define Z_VERSION_ERROR (-6) +pub const ReturnCode = enum(c_int) { + Ok = 0, + StreamEnd = 1, + NeedDict = 2, + ErrNo = -1, + StreamError = -2, + DataError = -3, + MemError = -4, + BufError = -5, + VersionError = -6, +}; + +// #define Z_NO_FLUSH 0 +// #define Z_PARTIAL_FLUSH 1 +// #define Z_SYNC_FLUSH 2 +// #define Z_FULL_FLUSH 3 +// #define Z_FINISH 4 +// #define Z_BLOCK 5 +// #define Z_TREES 6 +pub const FlushValue = enum(c_int) { + NoFlush = 0, + PartialFlush = 1, + /// Z_SYNC_FLUSH requests that inflate() flush as much output as possible to the output buffer + SyncFlush = 2, + FullFlush = 3, + Finish = 4, + + /// Z_BLOCK requests that inflate() stop if and when it gets to the next / deflate block boundary When decoding the zlib or gzip format, this will / cause inflate() to return immediately after the header and before the / first block. When doing a raw inflate, inflate() will go ahead and / process the first block, and will return when it gets to the end of that / block, or when it runs out of data. / The Z_BLOCK option assists in appending to or combining deflate streams. / To assist in this, on return inflate() always sets strm->data_type to the / number of unused bits in the last byte taken from strm->next_in, plus 64 / if inflate() is currently decoding the last block in the deflate stream, / plus 128 if inflate() returned immediately after decoding an end-of-block / code or decoding the complete header up to just before the first byte of / the deflate stream. The end-of-block will not be indicated until all of / the uncompressed data from that block has been written to strm->next_out. / The number of unused bits may in general be greater than seven, except / when bit 7 of data_type is set, in which case the number of unused bits / will be less than eight. data_type is set as noted here every time / inflate() returns for all flush options, and so can be used to determine / the amount of currently consumed input in bits. + Block = 5, + + /// The Z_TREES option behaves as Z_BLOCK does, but it also returns when the end of each deflate block header is reached, before any actual data in that block is decoded. This allows the caller to determine the length of the deflate block header for later use in random access within a deflate block. 256 is added to the value of strm->data_type when inflate() returns immediately after reaching the end of the deflate block header. + Trees = 6, +}; + +// ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm)); + +/// Initializes the internal stream state for decompression. The fields next_in, avail_in, zalloc, zfree and opaque must be initialized before by the caller. In the current version of inflate, the provided input is not read or consumed. The allocation of a sliding window will be deferred to the first call of inflate (if the decompression does not complete on the first call). If zalloc and zfree are set to Z_NULL, inflateInit updates them to use default allocation functions. +/// +/// inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_VERSION_ERROR if the zlib library version is incompatible with the version assumed by the caller, or Z_STREAM_ERROR if the parameters are invalid, such as a null pointer to the structure. msg is set to null if there is no error message. inflateInit does not perform any decompression. Actual decompression will be done by inflate(). So next_in, and avail_in, next_out, and avail_out are unused and unchanged. The current implementation of inflateInit() does not process any header information—that is deferred until inflate() is called. +pub extern fn inflateInit_(strm: z_streamp, version: [*c]const u8, stream_size: c_int) ReturnCode; +pub extern fn inflateInit2_(strm: z_streamp, window_size: c_int, version: [*c]const u8, stream_size: c_int) ReturnCode; + +/// inflate decompresses as much data as possible, and stops when the input buffer becomes empty or the output buffer becomes full. It may introduce some output latency (reading input without producing any output) except when forced to flush. +/// The detailed semantics are as follows. inflate performs one or both of the following actions: +/// +/// - Decompress more input starting at next_in and update next_in and avail_in accordingly. If not all input can be processed (because there is not enough room in the output buffer), then next_in and avail_in are updated accordingly, and processing will resume at this point for the next call of inflate(). +/// - Generate more output starting at next_out and update next_out and avail_out accordingly. inflate() provides as much output as possible, until there is no more input data or no more space in the output buffer (see below about the flush parameter). +/// +/// Before the call of inflate(), the application should ensure that at least one of the actions is possible, by providing more input and/or consuming more output, and updating the next_* and avail_* values accordingly. If the caller of inflate() does not provide both available input and available output space, it is possible that there will be no progress made. The application can consume the uncompressed output when it wants, for example when the output buffer is full (avail_out == 0), or after each call of inflate(). If inflate returns Z_OK and with zero avail_out, it must be called again after making room in the output buffer because there might be more output pending. +/// +/// The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much output as possible to the output buffer. Z_BLOCK requests that inflate() stop if and when it gets to the next deflate block boundary. When decoding the zlib or gzip format, this will cause inflate() to return immediately after the header and before the first block. When doing a raw inflate, inflate() will go ahead and process the first block, and will return when it gets to the end of that block, or when it runs out of data. +/// +/// The Z_BLOCK option assists in appending to or combining deflate streams. To assist in this, on return inflate() always sets strm->data_type to the number of unused bits in the last byte taken from strm->next_in, plus 64 if inflate() is currently decoding the last block in the deflate stream, plus 128 if inflate() returned immediately after decoding an end-of-block code or decoding the complete header up to just before the first byte of the deflate stream. The end-of-block will not be indicated until all of the uncompressed data from that block has been written to strm->next_out. The number of unused bits may in general be greater than seven, except when bit 7 of data_type is set, in which case the number of unused bits will be less than eight. data_type is set as noted here every time inflate() returns for all flush options, and so can be used to determine the amount of currently consumed input in bits. +/// +/// The Z_TREES option behaves as Z_BLOCK does, but it also returns when the end of each deflate block header is reached, before any actual data in that block is decoded. This allows the caller to determine the length of the deflate block header for later use in random access within a deflate block. 256 is added to the value of strm->data_type when inflate() returns immediately after reaching the end of the deflate block header. +/// +/// inflate() should normally be called until it returns Z_STREAM_END or an error. However if all decompression is to be performed in a single step (a single call of inflate), the parameter flush should be set to Z_FINISH. In this case all pending input is processed and all pending output is flushed; avail_out must be large enough to hold all of the uncompressed data for the operation to complete. (The size of the uncompressed data may have been saved by the compressor for this purpose.) The use of Z_FINISH is not required to perform an inflation in one step. However it may be used to inform inflate that a faster approach can be used for the single inflate() call. Z_FINISH also informs inflate to not maintain a sliding window if the stream completes, which reduces inflate's memory footprint. If the stream does not complete, either because not all of the stream is provided or not enough output space is provided, then a sliding window will be allocated and inflate() can be called again to continue the operation as if Z_NO_FLUSH had been used. +/// +/// In this implementation, inflate() always flushes as much output as possible to the output buffer, and always uses the faster approach on the first call. So the effects of the flush parameter in this implementation are on the return value of inflate() as noted below, when inflate() returns early when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of memory for a sliding window when Z_FINISH is used. +/// +/// If a preset dictionary is needed after this call (see inflateSetDictionary below), inflate sets strm->adler to the Adler-32 checksum of the dictionary chosen by the compressor and returns Z_NEED_DICT; otherwise it sets strm->adler to the Adler-32 checksum of all output produced so far (that is, total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described below. At the end of the stream, inflate() checks that its computed Adler-32 checksum is equal to that saved by the compressor and returns Z_STREAM_END only if the checksum is correct. +/// +/// inflate() will decompress and check either zlib-wrapped or gzip-wrapped deflate data. The header type is detected automatically, if requested when initializing with inflateInit2(). Any information contained in the gzip header is not retained unless inflateGetHeader() is used. When processing gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output produced so far. The CRC-32 is checked against the gzip trailer, as is the uncompressed length, modulo 2^32. +/// +/// inflate() returns Z_OK if some progress has been made (more input processed or more output produced), Z_STREAM_END if the end of the compressed data has been reached and all uncompressed output has been produced, Z_NEED_DICT if a preset dictionary is needed at this point, Z_DATA_ERROR if the input data was corrupted (input stream not conforming to the zlib format or incorrect check value, in which case strm->msg points to a string with a more specific error), Z_STREAM_ERROR if the stream structure was inconsistent (for example next_in or next_out was Z_NULL, or the state was inadvertently written over by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if no progress was possible or if there was not enough room in the output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and inflate() can be called again with more input and more output space to continue decompressing. If Z_DATA_ERROR is returned, the application may then call inflateSync() to look for a good compression block if a partial recovery of the data is to be attempted. +extern fn inflate(stream: [*c]zStream_struct, flush: FlushValue) ReturnCode; + +/// inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state was inconsistent. +const InflateEndResult = enum(c_int) { + Ok = 0, + StreamEnd = 1, +}; + +/// All dynamically allocated data structures for this stream are freed. This function discards any unprocessed input and does not flush any pending output. +extern fn inflateEnd(stream: [*c]zStream_struct) InflateEndResult; + +pub fn NewZlibReader(comptime Writer: type, comptime buffer_size: usize) type { + return struct { + const ZlibReader = @This(); + pub const State = enum { + Uninitialized, + Inflating, + End, + Error, + }; + + context: Writer, + input: []const u8, + buf: [buffer_size]u8, + zlib: zStream_struct, + allocator: *std.mem.Allocator, + arena: std.heap.ArenaAllocator, + state: State = State.Uninitialized, + + pub fn alloc(ctx: *c_void, items: uInt, len: uInt) callconv(.C) *c_void { + var this = @ptrCast(*ZlibReader, @alignCast(@alignOf(*ZlibReader), ctx)); + const buf = this.arena.allocator.alloc(u8, items * len) catch unreachable; + return buf.ptr; + } + + // we free manually all at once + pub fn free(ctx: *c_void, ptr: *c_void) callconv(.C) void {} + + pub fn deinit(this: *ZlibReader) void { + var allocator = this.allocator; + this.end(); + this.arena.deinit(); + allocator.destroy(this); + } + + pub fn end(this: *ZlibReader) void { + if (this.state == State.Inflating) { + _ = inflateEnd(&this.zlib); + this.state = State.End; + } + } + + pub fn init(writer: Writer, input: []const u8, allocator: *std.mem.Allocator) !*ZlibReader { + var zlib_reader = try allocator.create(ZlibReader); + zlib_reader.* = ZlibReader{ + .context = writer, + .input = input, + .buf = std.mem.zeroes([buffer_size]u8), + .allocator = allocator, + .zlib = undefined, + .arena = std.heap.ArenaAllocator.init(allocator), + }; + + zlib_reader.zlib = zStream_struct{ + .next_in = input.ptr, + .avail_in = @intCast(uInt, input.len), + .total_in = @intCast(uInt, input.len), + + .next_out = &zlib_reader.buf, + .avail_out = buffer_size, + .total_out = buffer_size, + + .err_msg = null, + .alloc_func = ZlibReader.alloc, + .free_func = ZlibReader.free, + + .internal_state = null, + .user_data = zlib_reader, + + .data_type = DataType.Unknown, + .adler = 0, + .reserved = 0, + }; + + switch (inflateInit2_(&zlib_reader.zlib, 15 + 32, zlibVersion(), @sizeOf(zStream_struct))) { + ReturnCode.Ok => return zlib_reader, + ReturnCode.MemError => { + zlib_reader.deinit(); + return error.OutOfMemory; + }, + ReturnCode.StreamError => { + zlib_reader.deinit(); + return error.InvalidArgument; + }, + ReturnCode.VersionError => { + zlib_reader.deinit(); + return error.InvalidArgument; + }, + else => unreachable, + } + } + + pub fn errorMessage(this: *ZlibReader) ?[]const u8 { + if (this.zlib.err_msg) |msg_ptr| { + return std.mem.sliceTo(msg_ptr, 0); + } + + return null; + } + + pub fn readAll(this: *ZlibReader) !void { + while (this.state == State.Uninitialized or this.state == State.Inflating) { + + // Before the call of inflate(), the application should ensure + // that at least one of the actions is possible, by providing + // more input and/or consuming more output, and updating the + // next_* and avail_* values accordingly. If the caller of + // inflate() does not provide both available input and available + // output space, it is possible that there will be no progress + // made. The application can consume the uncompressed output + // when it wants, for example when the output buffer is full + // (avail_out == 0), or after each call of inflate(). If inflate + // returns Z_OK and with zero avail_out, it must be called again + // after making room in the output buffer because there might be + // more output pending. + + // - Decompress more input starting at next_in and update + // next_in and avail_in accordingly. If not all input can be + // processed (because there is not enough room in the output + // buffer), then next_in and avail_in are updated accordingly, + // and processing will resume at this point for the next call + // of inflate(). + + // - Generate more output starting at next_out and update + // next_out and avail_out accordingly. inflate() provides as + // much output as possible, until there is no more input data + // or no more space in the output buffer (see below about the + // flush parameter). + + if (this.zlib.avail_out == 0) { + var written = try this.context.write(&this.buf); + while (written < this.zlib.avail_out) { + written += try this.context.write(this.buf[written..]); + } + this.zlib.avail_out = buffer_size; + this.zlib.next_out = &this.buf; + } + + if (this.zlib.avail_in == 0) { + return error.ShortRead; + } + + const rc = inflate(&this.zlib, FlushValue.PartialFlush); + this.state = State.Inflating; + + switch (rc) { + ReturnCode.StreamEnd => { + this.state = State.End; + var remainder = this.buf[0 .. buffer_size - this.zlib.avail_out]; + remainder = remainder[try this.context.write(remainder)..]; + while (remainder.len > 0) { + remainder = remainder[try this.context.write(remainder)..]; + } + this.end(); + return; + }, + ReturnCode.MemError => { + this.state = State.Error; + return error.OutOfMemory; + }, + ReturnCode.StreamError, + ReturnCode.DataError, + ReturnCode.BufError, + ReturnCode.NeedDict, + ReturnCode.VersionError, + ReturnCode.ErrNo, + => { + this.state = State.Error; + return error.ZlibError; + }, + ReturnCode.Ok => {}, + } + } + } + }; +} + +pub const ZlibReaderArrayList = struct { + const ZlibReader = ZlibReaderArrayList; + pub const State = enum { + Uninitialized, + Inflating, + End, + Error, + }; + + input: []const u8, + list: std.ArrayListUnmanaged(u8), + list_ptr: *std.ArrayListUnmanaged(u8), + zlib: zStream_struct, + allocator: *std.mem.Allocator, + arena: std.heap.ArenaAllocator, + state: State = State.Uninitialized, + + pub fn alloc(ctx: *c_void, items: uInt, len: uInt) callconv(.C) *c_void { + var this = @ptrCast(*ZlibReader, @alignCast(@alignOf(*ZlibReader), ctx)); + const buf = this.arena.allocator.alloc(u8, items * len) catch unreachable; + return buf.ptr; + } + + // we free manually all at once + pub fn free(ctx: *c_void, ptr: *c_void) callconv(.C) void {} + + pub fn deinit(this: *ZlibReader) void { + var allocator = this.allocator; + this.end(); + this.arena.deinit(); + allocator.destroy(this); + } + + pub fn end(this: *ZlibReader) void { + if (this.state == State.Inflating) { + _ = inflateEnd(&this.zlib); + this.state = State.End; + } + } + + pub fn init(input: []const u8, list: *std.ArrayListUnmanaged(u8), allocator: *std.mem.Allocator) !*ZlibReader { + var zlib_reader = try allocator.create(ZlibReader); + zlib_reader.* = ZlibReader{ + .input = input, + .list = list.*, + .list_ptr = list, + .allocator = allocator, + .zlib = undefined, + .arena = std.heap.ArenaAllocator.init(allocator), + }; + + zlib_reader.zlib = zStream_struct{ + .next_in = input.ptr, + .avail_in = @intCast(uInt, input.len), + .total_in = @intCast(uInt, input.len), + + .next_out = zlib_reader.list.items.ptr, + .avail_out = @intCast(u32, zlib_reader.list.items.len), + .total_out = zlib_reader.list.items.len, + + .err_msg = null, + .alloc_func = ZlibReader.alloc, + .free_func = ZlibReader.free, + + .internal_state = null, + .user_data = zlib_reader, + + .data_type = DataType.Unknown, + .adler = 0, + .reserved = 0, + }; + + switch (inflateInit2_(&zlib_reader.zlib, 15 + 32, zlibVersion(), @sizeOf(zStream_struct))) { + ReturnCode.Ok => return zlib_reader, + ReturnCode.MemError => { + zlib_reader.deinit(); + return error.OutOfMemory; + }, + ReturnCode.StreamError => { + zlib_reader.deinit(); + return error.InvalidArgument; + }, + ReturnCode.VersionError => { + zlib_reader.deinit(); + return error.InvalidArgument; + }, + else => unreachable, + } + } + + pub fn errorMessage(this: *ZlibReader) ?[]const u8 { + if (this.zlib.err_msg) |msg_ptr| { + return std.mem.sliceTo(msg_ptr, 0); + } + + return null; + } + + pub fn readAll(this: *ZlibReader) !void { + defer { + this.list.shrinkRetainingCapacity(this.zlib.total_out); + this.list_ptr.* = this.list; + } + + while (this.state == State.Uninitialized or this.state == State.Inflating) { + + // Before the call of inflate(), the application should ensure + // that at least one of the actions is possible, by providing + // more input and/or consuming more output, and updating the + // next_* and avail_* values accordingly. If the caller of + // inflate() does not provide both available input and available + // output space, it is possible that there will be no progress + // made. The application can consume the uncompressed output + // when it wants, for example when the output buffer is full + // (avail_out == 0), or after each call of inflate(). If inflate + // returns Z_OK and with zero avail_out, it must be called again + // after making room in the output buffer because there might be + // more output pending. + + // - Decompress more input starting at next_in and update + // next_in and avail_in accordingly. If not all input can be + // processed (because there is not enough room in the output + // buffer), then next_in and avail_in are updated accordingly, + // and processing will resume at this point for the next call + // of inflate(). + + // - Generate more output starting at next_out and update + // next_out and avail_out accordingly. inflate() provides as + // much output as possible, until there is no more input data + // or no more space in the output buffer (see below about the + // flush parameter). + + if (this.zlib.avail_out == 0) { + const initial = this.list.items.len; + try this.list.ensureUnusedCapacity(this.allocator, 4096); + this.list.expandToCapacity(); + this.zlib.next_out = &this.list.items[initial]; + this.zlib.avail_out = @intCast(u32, this.list.items.len - initial); + } + + if (this.zlib.avail_in == 0) { + return error.ShortRead; + } + + const rc = inflate(&this.zlib, FlushValue.PartialFlush); + this.state = State.Inflating; + + switch (rc) { + ReturnCode.StreamEnd => { + this.state = State.End; + + this.end(); + return; + }, + ReturnCode.MemError => { + this.state = State.Error; + return error.OutOfMemory; + }, + ReturnCode.StreamError, + ReturnCode.DataError, + ReturnCode.BufError, + ReturnCode.NeedDict, + ReturnCode.VersionError, + ReturnCode.ErrNo, + => { + this.state = State.Error; + return error.ZlibError; + }, + ReturnCode.Ok => {}, + } + } + } +}; |