aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <jarred@jarredsumner.com> 2021-12-17 23:06:56 -0800
committerGravatar Jarred Sumner <jarred@jarredsumner.com> 2021-12-17 23:06:56 -0800
commitd4c55557c3e6de29b9676d9ea0e8eda0970cebfc (patch)
tree3699e79183e5f64c7c7d129b667520e65c3d7ef6 /src
parent0e39174c481adeeb000fe021fa49616906fd9cd8 (diff)
downloadbun-d4c55557c3e6de29b9676d9ea0e8eda0970cebfc.tar.gz
bun-d4c55557c3e6de29b9676d9ea0e8eda0970cebfc.tar.zst
bun-d4c55557c3e6de29b9676d9ea0e8eda0970cebfc.zip
Implement hop archive format
Diffstat (limited to 'src')
-rw-r--r--src/hop/hop.zig112
-rw-r--r--src/hop/schema.zig45
-rw-r--r--src/libarchive/libarchive.zig156
3 files changed, 257 insertions, 56 deletions
diff --git a/src/hop/hop.zig b/src/hop/hop.zig
index 7d257c3d7..90def6dc6 100644
--- a/src/hop/hop.zig
+++ b/src/hop/hop.zig
@@ -2,6 +2,7 @@ const std = @import("std");
const C = @import("../c.zig");
const Schema = @import("./schema.zig");
const Hop = Schema.Hop;
+const Environment = @import("../env.zig");
const string = []const u8;
@@ -24,9 +25,10 @@ pub fn sortDesc(in: []string) void {
std.sort.sort([]const u8, in, {}, cmpStringsDesc);
}
-const Library = struct {
+pub const Library = struct {
pub const magic_bytes = "#!/usr/bin/env hop\n\n";
const Header = [magic_bytes.len + 5]u8;
+ pub usingnamespace Schema.Hop;
archive: Hop.Archive,
allocator: *std.mem.Allocator,
@@ -36,6 +38,7 @@ const Library = struct {
pub const Builder = struct {
allocator: *std.mem.Allocator,
files: std.ArrayListUnmanaged(Hop.File),
+ directories: std.ArrayListUnmanaged(Hop.Directory),
metadata_bytes: std.ArrayListUnmanaged(u8),
destination: std.fs.File = undefined,
@@ -43,6 +46,7 @@ const Library = struct {
return Builder{
.allocator = allocator,
.metadata_bytes = .{},
+ .directories = .{},
.files = std.ArrayListUnmanaged(Hop.File){},
};
}
@@ -62,14 +66,31 @@ const Library = struct {
}
};
+ const DirSorter = struct {
+ metadata: []const u8,
+ pub fn sortByName(this: DirSorter, lhs: Hop.Directory, rhs: Hop.Directory) bool {
+ return std.mem.order(u8, this.metadata[lhs.name.off..][0..lhs.name.len], this.metadata[rhs.name.off..][0..rhs.name.len]) == .lt;
+ }
+ };
+
pub fn done(this: *Builder) !Hop.Archive {
const metadata_offset = @truncate(u32, try this.destination.getPos());
- var sorter = FileSorter{
- .metadata = this.metadata_bytes.items,
- };
+ {
+ var sorter = FileSorter{
+ .metadata = this.metadata_bytes.items,
+ };
- std.sort.sort(Hop.File, this.files.items, sorter, FileSorter.sortByName);
+ std.sort.sort(Hop.File, this.files.items, sorter, FileSorter.sortByName);
+ }
+
+ {
+ var sorter = DirSorter{
+ .metadata = this.metadata_bytes.items,
+ };
+
+ std.sort.sort(Hop.Directory, this.directories.items, sorter, DirSorter.sortByName);
+ }
var name_hashes = try this.allocator.alloc(u32, this.files.items.len);
@@ -80,6 +101,7 @@ const Library = struct {
var archive = Hop.Archive{
.version = 1,
.files = this.files.items,
+ .directories = this.directories.items,
.name_hashes = name_hashes,
.content_offset = metadata_offset,
.metadata = this.metadata_bytes.items,
@@ -132,11 +154,17 @@ const Library = struct {
const written = try std.os.copy_file_range(in.handle, 0, this.destination.handle, off_in, stat.size, 0);
try this.destination.seekTo(off_in + written);
const end = try this.destination.getPos();
- try this.appendFileMetadata(name, off_in, end, stat);
+ try this.appendFileMetadataFromDisk(name, off_in, end, stat);
try this.destination.writeAll(&[_]u8{0});
}
- pub fn appendFileMetadata(this: *Builder, name_buf: []const u8, start_pos: u64, end_pos: u64, stat: std.fs.File.Stat) !void {
+ pub fn appendDirectoryFromDisk(this: *Builder, name: []const u8, in: std.fs.Dir) !void {
+ var stat = try std.os.fstatat(in.fd, name, 0);
+
+ try this.appendDirMetadataFromDisk(name, stat);
+ }
+
+ pub fn appendFileMetadataFromDisk(this: *Builder, name_buf: []const u8, start_pos: u64, end_pos: u64, stat: std.fs.File.Stat) !void {
const name = try this.appendMetadata(name_buf);
try this.files.append(
this.allocator,
@@ -151,15 +179,48 @@ const Library = struct {
);
}
+ pub fn appendDirMetadataFromDisk(this: *Builder, name_buf: []const u8, stat: std.fs.File.Stat) !void {
+ const name = try this.appendMetadata(name_buf);
+ try this.directories.append(
+ this.allocator,
+ Hop.Directory{
+ .name = name,
+ .name_hash = @truncate(u32, std.hash.Wyhash.hash(0, name_buf)),
+ .chmod = @truncate(u32, stat.mode),
+ },
+ );
+ }
+
+ pub fn appendFileMetadata(this: *Builder, name_buf: []const u8, meta: Hop.File) !void {
+ const name = try this.appendMetadata(name_buf);
+ try this.files.append(
+ this.allocator,
+ Hop.File{
+ .name = name,
+ .name_hash = @truncate(u32, std.hash.Wyhash.hash(0, name_buf)),
+ .data = meta.data,
+ .chmod = meta.chmod,
+ .mtime = meta.mtime,
+ .ctime = meta.ctime,
+ },
+ );
+ }
+
pub fn appendDirectoryRecursively(this: *Builder, dir: std.fs.Dir) !void {
var walker = try dir.walk(this.allocator);
defer walker.deinit();
while (try walker.next()) |entry_| {
const entry: std.fs.Dir.Walker.WalkerEntry = entry_;
- if (entry.kind != .File) continue;
-
- try this.appendContentFromDisk(entry.path, try entry.dir.openFile(entry.basename, .{ .read = true }));
+ switch (entry.kind) {
+ .Directory => {
+ try this.appendDirectoryFromDisk(entry.path, entry.basename, entry.dir);
+ },
+ .File => {
+ try this.appendContentFromDisk(entry.path, try entry.dir.openFile(entry.basename, .{ .read = true }));
+ },
+ else => {},
+ }
}
}
};
@@ -182,7 +243,36 @@ const Library = struct {
};
};
- const written = try std.os.copy_file_range(this.fd.?, file.data.off, out.handle, 0, file.data.len, 0);
+ if (file.data.len > std.mem.page_size) {
+ if (comptime Environment.isLinux) {
+ _ = std.os.system.fallocate(out.handle, 0, 0, @intCast(i64, file.data.len));
+ } else if (comptime Environment.isMac) {
+ try C.preallocate_file(
+ out.handle,
+ @intCast(std.os.off_t, 0),
+ @intCast(std.os.off_t, file.data.len),
+ );
+ }
+ }
+
+ var remain: usize = file.data.len;
+ var written: usize = 0;
+ var in_off: usize = file.data.off;
+
+ while (remain > 0) {
+ const wrote = try std.os.copy_file_range(
+ this.fd.?,
+ in_off,
+ out.handle,
+ written,
+ remain,
+ 0,
+ );
+ in_off += wrote;
+ remain -= wrote;
+ written += wrote;
+ }
+
if (verbose) {
std.log.info("Extracted file: {s} ({d} bytes)\n", .{ name_slice, written });
}
diff --git a/src/hop/schema.zig b/src/hop/schema.zig
index fd80e71f3..8e8a15eaf 100644
--- a/src/hop/schema.zig
+++ b/src/hop/schema.zig
@@ -397,6 +397,35 @@ pub const Hop = struct {
}
};
+ pub const Directory = packed struct {
+ /// name
+ name: StringPointer,
+
+ /// name_hash
+ name_hash: u32 = 0,
+
+ /// chmod
+ chmod: u32 = 0,
+
+ pub fn decode(reader: anytype) anyerror!Directory {
+ var this = Directory{
+ .name = StringPointer{},
+ };
+
+ this.name = try reader.readValue(StringPointer);
+ this.name_hash = try reader.readValue(u32);
+ this.chmod = try reader.readValue(u32);
+
+ return this;
+ }
+
+ pub fn encode(this: *const @This(), writer: anytype) anyerror!void {
+ try writer.writeValue(@TypeOf(this.name), this.name);
+ try writer.writeInt(this.name_hash);
+ try writer.writeInt(this.chmod);
+ }
+ };
+
pub const Archive = struct {
/// version
version: ?u32 = null,
@@ -407,6 +436,9 @@ pub const Hop = struct {
/// files
files: []align(1) const File,
+ /// files
+ directories: []align(1) const Directory,
+
/// name_hashes
name_hashes: []align(1) const u32,
@@ -432,9 +464,12 @@ pub const Hop = struct {
this.files = try reader.readArray(File);
},
4 => {
- this.name_hashes = try reader.readArray(u32);
+ this.directories = try reader.readArray(Directory);
},
5 => {
+ this.name_hashes = try reader.readArray(u32);
+ },
+ 6 => {
this.metadata = try reader.readArray(u8);
},
else => {
@@ -458,12 +493,16 @@ pub const Hop = struct {
try writer.writeFieldID(3);
try writer.writeArray(File, this.files);
}
- if (this.name_hashes.len > 0) {
+ if (this.directories.len > 0) {
try writer.writeFieldID(4);
+ try writer.writeArray(Directory, this.directories);
+ }
+ if (this.name_hashes.len > 0) {
+ try writer.writeFieldID(5);
try writer.writeArray(u32, this.name_hashes);
}
if (this.metadata.len > 0) {
- try writer.writeFieldID(5);
+ try writer.writeFieldID(6);
try writer.writeArray(u8, this.metadata);
}
try writer.endMessage();
diff --git a/src/libarchive/libarchive.zig b/src/libarchive/libarchive.zig
index 1ab109068..47ac23645 100644
--- a/src/libarchive/libarchive.zig
+++ b/src/libarchive/libarchive.zig
@@ -523,6 +523,49 @@ pub const Archive = struct {
}
}
+ const SeekableBufferedWriter = struct {
+ pos: u64 = 0,
+ buf: [std.mem.page_size]u8 = undefined,
+ len: usize = 0,
+ fd: std.os.fd_t = 0,
+
+ pub fn flush(this: *SeekableBufferedWriter) !usize {
+ const end = this.len + this.pos;
+ var off: usize = this.pos;
+ const initial = this.pos;
+ defer this.pos = off;
+ var slice = this.buf[0..this.len];
+ while (slice.len > 0) {
+ const written = try std.os.pwrite(this.fd, slice, off);
+ slice = slice[written..];
+ off += written;
+ }
+ this.len = 0;
+ return off - initial;
+ }
+
+ pub fn write(this: *SeekableBufferedWriter, buf: []const u8) !usize {
+ if (this.buf.len - this.len < 32) {
+ _ = try this.flush();
+ }
+ var queue = buf;
+ while (queue.len > 0) {
+ var to_write = @minimum(this.buf.len - this.len, queue.len);
+ if (to_write == 0 and this.len > 0) {
+ _ = try this.flush();
+ to_write = @minimum(this.buf.len - this.len, queue.len);
+ }
+
+ var remainder = queue[0..to_write];
+ queue = queue[remainder.len..];
+ @memcpy(this.buf[this.len..].ptr, remainder.ptr, remainder.len);
+ this.len += remainder.len;
+ }
+
+ return buf.len;
+ }
+ };
+
pub fn convertToHop(
hop: *Hop.Builder,
file_buffer: []const u8,
@@ -530,7 +573,6 @@ pub const Archive = struct {
comptime FilePathAppender: type,
appender: FilePathAppender,
comptime depth_to_skip: usize,
- comptime close_handles: bool,
comptime log: bool,
) !u32 {
var entry: *lib.archive_entry = undefined;
@@ -544,6 +586,18 @@ pub const Archive = struct {
var archive = stream.archive;
var count: u32 = 0;
+ var chunk_remain: usize = 0;
+ var chunk_offset: isize = 0;
+ var chunk_output_offset: isize = 0;
+ var chunk_size: usize = 0;
+ var chunk_buf: ?[*]u8 = null;
+ const handle = hop.destination.handle;
+
+ var writer = SeekableBufferedWriter{
+ .pos = try hop.destination.getPos(),
+ .fd = hop.destination.handle,
+ };
+
loop: while (true) {
const r = @intToEnum(Status, lib.archive_read_next_header(archive, &entry));
@@ -559,65 +613,83 @@ pub const Archive = struct {
if (tokenizer.next() == null) continue :loop;
}
+ const Kind = std.fs.Dir.Entry.Kind;
+ const entry_kind: Kind = switch (lib.archive_entry_filetype(entry)) {
+ std.os.S_IFDIR => Kind.Directory,
+ std.os.S_IFREG => Kind.File,
+ else => continue :loop,
+ };
+
var pathname_ = tokenizer.rest();
- pathname = @intToPtr([*]const u8, @ptrToInt(pathname_.ptr))[0..pathname_.len :0];
- const mask = lib.archive_entry_filetype(entry);
- const size = @intCast(usize, std.math.max(lib.archive_entry_size(entry), 0));
- if (size > 0) {
- const slice = std.mem.span(pathname);
+ count += 1;
+ const mode = @truncate(u32, lib.archive_entry_perm(entry));
+ const slice = std.mem.span(pathname);
+ const name = hop.appendMetadata(pathname_) catch unreachable;
+
+ if (entry_kind == .Directory) {
+ hop.directories.append(hop.allocator, .{
+ .name = name,
+ .name_hash = @truncate(u32, std.hash.Wyhash.hash(0, pathname_)),
+ .chmod = mode,
+ }) catch unreachable;
if (comptime log) {
- Output.prettyln(" {s}", .{pathname});
+ Output.prettyErrorln("Dir: {s}", .{
+ pathname_,
+ });
}
-
- const file = dir.createFileZ(pathname, .{ .truncate = true }) catch |err| brk: {
- switch (err) {
- error.FileNotFound => {
- try dir.makePath(std.fs.path.dirname(slice) orelse return err);
- break :brk try dir.createFileZ(pathname, .{ .truncate = true });
- },
- else => {
- return err;
- },
- }
+ } else {
+ var data = Hop.StringPointer{
+ .off = @truncate(u32, writer.pos),
};
- count += 1;
- _ = C.fchmod(file.handle, lib.archive_entry_perm(entry));
+ chunk_offset = 0;
+ chunk_output_offset = 0;
+ const size = lib.archive_entry_size(entry);
- if (ctx) |ctx_| {
- const hash: u64 = if (ctx_.pluckers.len > 0)
- std.hash.Wyhash.hash(0, slice)
- else
- @as(u64, 0);
+ var data_block_status: c_int = lib.archive_read_data_block(archive, @ptrCast([*c]*const c_void, &chunk_buf), &chunk_size, &chunk_offset);
- if (comptime FilePathAppender != void) {
- var result = ctx.?.all_files.getOrPutAdapted(hash, Context.U64Context{}) catch unreachable;
- if (!result.found_existing) {
- result.value_ptr.* = (try appender.appendMutable(@TypeOf(slice), slice)).ptr;
- }
+ while (data_block_status == lib.ARCHIVE_OK) : (data_block_status = lib.archive_read_data_block(archive, @ptrCast([*c]*const c_void, &chunk_buf), &chunk_size, &chunk_offset)) {
+ if (chunk_offset > chunk_output_offset) {
+ writer.pos = @intCast(usize, @intCast(isize, writer.pos) + chunk_offset - chunk_output_offset);
}
- for (ctx_.pluckers) |*plucker_| {
- if (plucker_.filename_hash == hash) {
- try plucker_.contents.inflate(size);
- plucker_.contents.list.expandToCapacity();
- var read = lib.archive_read_data(archive, plucker_.contents.list.items.ptr, size);
- try plucker_.contents.inflate(@intCast(usize, read));
- plucker_.found = read > 0;
- plucker_.fd = file.handle;
- continue :loop;
- }
+ while (chunk_size > 0) {
+ const remain = size - chunk_output_offset;
+ chunk_size = @minimum(@intCast(usize, remain), chunk_size);
+ // const written = try std.os.pwrite(handle, chunk_buf.?[0..chunk_size], writer.pos);
+ // writer.pos += written;
+ const written = try writer.write(chunk_buf.?[0..chunk_size]);
+ chunk_buf.? += written;
+ chunk_size -= written;
+ chunk_output_offset += @intCast(i64, written);
}
}
- _ = lib.archive_read_data_into_fd(archive, file.handle);
+ data.len = @intCast(u32, size);
+ _ = try writer.flush();
+
+ if (comptime log) {
+ Output.prettyErrorln("File: {s} - [{d}, {d}]", .{ pathname_, data.off, data.len });
+ }
+
+ hop.files.append(
+ hop.allocator,
+ Hop.File{
+ .name = name,
+ .name_hash = @truncate(u32, std.hash.Wyhash.hash(0, pathname_)),
+ .chmod = mode,
+ .data = data,
+ },
+ ) catch unreachable;
}
},
}
}
+ try hop.destination.seekTo(writer.pos);
+
return count;
}
@@ -676,7 +748,7 @@ pub const Archive = struct {
pathname = @intToPtr([*]const u8, @ptrToInt(pathname_.ptr))[0..pathname_.len :0];
const mask = lib.archive_entry_filetype(entry);
- const size = @intCast(usize, std.math.max(lib.archive_entry_size(entry), 0));
+ const size = @intCast(usize, @maximum(lib.archive_entry_size(entry), 0));
if (size > 0) {
const slice = std.mem.span(pathname);