diff options
author | 2021-12-17 23:06:56 -0800 | |
---|---|---|
committer | 2021-12-17 23:06:56 -0800 | |
commit | d4c55557c3e6de29b9676d9ea0e8eda0970cebfc (patch) | |
tree | 3699e79183e5f64c7c7d129b667520e65c3d7ef6 /src | |
parent | 0e39174c481adeeb000fe021fa49616906fd9cd8 (diff) | |
download | bun-d4c55557c3e6de29b9676d9ea0e8eda0970cebfc.tar.gz bun-d4c55557c3e6de29b9676d9ea0e8eda0970cebfc.tar.zst bun-d4c55557c3e6de29b9676d9ea0e8eda0970cebfc.zip |
Implement hop archive format
Diffstat (limited to 'src')
-rw-r--r-- | src/hop/hop.zig | 112 | ||||
-rw-r--r-- | src/hop/schema.zig | 45 | ||||
-rw-r--r-- | src/libarchive/libarchive.zig | 156 |
3 files changed, 257 insertions, 56 deletions
diff --git a/src/hop/hop.zig b/src/hop/hop.zig index 7d257c3d7..90def6dc6 100644 --- a/src/hop/hop.zig +++ b/src/hop/hop.zig @@ -2,6 +2,7 @@ const std = @import("std"); const C = @import("../c.zig"); const Schema = @import("./schema.zig"); const Hop = Schema.Hop; +const Environment = @import("../env.zig"); const string = []const u8; @@ -24,9 +25,10 @@ pub fn sortDesc(in: []string) void { std.sort.sort([]const u8, in, {}, cmpStringsDesc); } -const Library = struct { +pub const Library = struct { pub const magic_bytes = "#!/usr/bin/env hop\n\n"; const Header = [magic_bytes.len + 5]u8; + pub usingnamespace Schema.Hop; archive: Hop.Archive, allocator: *std.mem.Allocator, @@ -36,6 +38,7 @@ const Library = struct { pub const Builder = struct { allocator: *std.mem.Allocator, files: std.ArrayListUnmanaged(Hop.File), + directories: std.ArrayListUnmanaged(Hop.Directory), metadata_bytes: std.ArrayListUnmanaged(u8), destination: std.fs.File = undefined, @@ -43,6 +46,7 @@ const Library = struct { return Builder{ .allocator = allocator, .metadata_bytes = .{}, + .directories = .{}, .files = std.ArrayListUnmanaged(Hop.File){}, }; } @@ -62,14 +66,31 @@ const Library = struct { } }; + const DirSorter = struct { + metadata: []const u8, + pub fn sortByName(this: DirSorter, lhs: Hop.Directory, rhs: Hop.Directory) bool { + return std.mem.order(u8, this.metadata[lhs.name.off..][0..lhs.name.len], this.metadata[rhs.name.off..][0..rhs.name.len]) == .lt; + } + }; + pub fn done(this: *Builder) !Hop.Archive { const metadata_offset = @truncate(u32, try this.destination.getPos()); - var sorter = FileSorter{ - .metadata = this.metadata_bytes.items, - }; + { + var sorter = FileSorter{ + .metadata = this.metadata_bytes.items, + }; - std.sort.sort(Hop.File, this.files.items, sorter, FileSorter.sortByName); + std.sort.sort(Hop.File, this.files.items, sorter, FileSorter.sortByName); + } + + { + var sorter = DirSorter{ + .metadata = this.metadata_bytes.items, + }; + + std.sort.sort(Hop.Directory, this.directories.items, sorter, DirSorter.sortByName); + } var name_hashes = try this.allocator.alloc(u32, this.files.items.len); @@ -80,6 +101,7 @@ const Library = struct { var archive = Hop.Archive{ .version = 1, .files = this.files.items, + .directories = this.directories.items, .name_hashes = name_hashes, .content_offset = metadata_offset, .metadata = this.metadata_bytes.items, @@ -132,11 +154,17 @@ const Library = struct { const written = try std.os.copy_file_range(in.handle, 0, this.destination.handle, off_in, stat.size, 0); try this.destination.seekTo(off_in + written); const end = try this.destination.getPos(); - try this.appendFileMetadata(name, off_in, end, stat); + try this.appendFileMetadataFromDisk(name, off_in, end, stat); try this.destination.writeAll(&[_]u8{0}); } - pub fn appendFileMetadata(this: *Builder, name_buf: []const u8, start_pos: u64, end_pos: u64, stat: std.fs.File.Stat) !void { + pub fn appendDirectoryFromDisk(this: *Builder, name: []const u8, in: std.fs.Dir) !void { + var stat = try std.os.fstatat(in.fd, name, 0); + + try this.appendDirMetadataFromDisk(name, stat); + } + + pub fn appendFileMetadataFromDisk(this: *Builder, name_buf: []const u8, start_pos: u64, end_pos: u64, stat: std.fs.File.Stat) !void { const name = try this.appendMetadata(name_buf); try this.files.append( this.allocator, @@ -151,15 +179,48 @@ const Library = struct { ); } + pub fn appendDirMetadataFromDisk(this: *Builder, name_buf: []const u8, stat: std.fs.File.Stat) !void { + const name = try this.appendMetadata(name_buf); + try this.directories.append( + this.allocator, + Hop.Directory{ + .name = name, + .name_hash = @truncate(u32, std.hash.Wyhash.hash(0, name_buf)), + .chmod = @truncate(u32, stat.mode), + }, + ); + } + + pub fn appendFileMetadata(this: *Builder, name_buf: []const u8, meta: Hop.File) !void { + const name = try this.appendMetadata(name_buf); + try this.files.append( + this.allocator, + Hop.File{ + .name = name, + .name_hash = @truncate(u32, std.hash.Wyhash.hash(0, name_buf)), + .data = meta.data, + .chmod = meta.chmod, + .mtime = meta.mtime, + .ctime = meta.ctime, + }, + ); + } + pub fn appendDirectoryRecursively(this: *Builder, dir: std.fs.Dir) !void { var walker = try dir.walk(this.allocator); defer walker.deinit(); while (try walker.next()) |entry_| { const entry: std.fs.Dir.Walker.WalkerEntry = entry_; - if (entry.kind != .File) continue; - - try this.appendContentFromDisk(entry.path, try entry.dir.openFile(entry.basename, .{ .read = true })); + switch (entry.kind) { + .Directory => { + try this.appendDirectoryFromDisk(entry.path, entry.basename, entry.dir); + }, + .File => { + try this.appendContentFromDisk(entry.path, try entry.dir.openFile(entry.basename, .{ .read = true })); + }, + else => {}, + } } } }; @@ -182,7 +243,36 @@ const Library = struct { }; }; - const written = try std.os.copy_file_range(this.fd.?, file.data.off, out.handle, 0, file.data.len, 0); + if (file.data.len > std.mem.page_size) { + if (comptime Environment.isLinux) { + _ = std.os.system.fallocate(out.handle, 0, 0, @intCast(i64, file.data.len)); + } else if (comptime Environment.isMac) { + try C.preallocate_file( + out.handle, + @intCast(std.os.off_t, 0), + @intCast(std.os.off_t, file.data.len), + ); + } + } + + var remain: usize = file.data.len; + var written: usize = 0; + var in_off: usize = file.data.off; + + while (remain > 0) { + const wrote = try std.os.copy_file_range( + this.fd.?, + in_off, + out.handle, + written, + remain, + 0, + ); + in_off += wrote; + remain -= wrote; + written += wrote; + } + if (verbose) { std.log.info("Extracted file: {s} ({d} bytes)\n", .{ name_slice, written }); } diff --git a/src/hop/schema.zig b/src/hop/schema.zig index fd80e71f3..8e8a15eaf 100644 --- a/src/hop/schema.zig +++ b/src/hop/schema.zig @@ -397,6 +397,35 @@ pub const Hop = struct { } }; + pub const Directory = packed struct { + /// name + name: StringPointer, + + /// name_hash + name_hash: u32 = 0, + + /// chmod + chmod: u32 = 0, + + pub fn decode(reader: anytype) anyerror!Directory { + var this = Directory{ + .name = StringPointer{}, + }; + + this.name = try reader.readValue(StringPointer); + this.name_hash = try reader.readValue(u32); + this.chmod = try reader.readValue(u32); + + return this; + } + + pub fn encode(this: *const @This(), writer: anytype) anyerror!void { + try writer.writeValue(@TypeOf(this.name), this.name); + try writer.writeInt(this.name_hash); + try writer.writeInt(this.chmod); + } + }; + pub const Archive = struct { /// version version: ?u32 = null, @@ -407,6 +436,9 @@ pub const Hop = struct { /// files files: []align(1) const File, + /// files + directories: []align(1) const Directory, + /// name_hashes name_hashes: []align(1) const u32, @@ -432,9 +464,12 @@ pub const Hop = struct { this.files = try reader.readArray(File); }, 4 => { - this.name_hashes = try reader.readArray(u32); + this.directories = try reader.readArray(Directory); }, 5 => { + this.name_hashes = try reader.readArray(u32); + }, + 6 => { this.metadata = try reader.readArray(u8); }, else => { @@ -458,12 +493,16 @@ pub const Hop = struct { try writer.writeFieldID(3); try writer.writeArray(File, this.files); } - if (this.name_hashes.len > 0) { + if (this.directories.len > 0) { try writer.writeFieldID(4); + try writer.writeArray(Directory, this.directories); + } + if (this.name_hashes.len > 0) { + try writer.writeFieldID(5); try writer.writeArray(u32, this.name_hashes); } if (this.metadata.len > 0) { - try writer.writeFieldID(5); + try writer.writeFieldID(6); try writer.writeArray(u8, this.metadata); } try writer.endMessage(); diff --git a/src/libarchive/libarchive.zig b/src/libarchive/libarchive.zig index 1ab109068..47ac23645 100644 --- a/src/libarchive/libarchive.zig +++ b/src/libarchive/libarchive.zig @@ -523,6 +523,49 @@ pub const Archive = struct { } } + const SeekableBufferedWriter = struct { + pos: u64 = 0, + buf: [std.mem.page_size]u8 = undefined, + len: usize = 0, + fd: std.os.fd_t = 0, + + pub fn flush(this: *SeekableBufferedWriter) !usize { + const end = this.len + this.pos; + var off: usize = this.pos; + const initial = this.pos; + defer this.pos = off; + var slice = this.buf[0..this.len]; + while (slice.len > 0) { + const written = try std.os.pwrite(this.fd, slice, off); + slice = slice[written..]; + off += written; + } + this.len = 0; + return off - initial; + } + + pub fn write(this: *SeekableBufferedWriter, buf: []const u8) !usize { + if (this.buf.len - this.len < 32) { + _ = try this.flush(); + } + var queue = buf; + while (queue.len > 0) { + var to_write = @minimum(this.buf.len - this.len, queue.len); + if (to_write == 0 and this.len > 0) { + _ = try this.flush(); + to_write = @minimum(this.buf.len - this.len, queue.len); + } + + var remainder = queue[0..to_write]; + queue = queue[remainder.len..]; + @memcpy(this.buf[this.len..].ptr, remainder.ptr, remainder.len); + this.len += remainder.len; + } + + return buf.len; + } + }; + pub fn convertToHop( hop: *Hop.Builder, file_buffer: []const u8, @@ -530,7 +573,6 @@ pub const Archive = struct { comptime FilePathAppender: type, appender: FilePathAppender, comptime depth_to_skip: usize, - comptime close_handles: bool, comptime log: bool, ) !u32 { var entry: *lib.archive_entry = undefined; @@ -544,6 +586,18 @@ pub const Archive = struct { var archive = stream.archive; var count: u32 = 0; + var chunk_remain: usize = 0; + var chunk_offset: isize = 0; + var chunk_output_offset: isize = 0; + var chunk_size: usize = 0; + var chunk_buf: ?[*]u8 = null; + const handle = hop.destination.handle; + + var writer = SeekableBufferedWriter{ + .pos = try hop.destination.getPos(), + .fd = hop.destination.handle, + }; + loop: while (true) { const r = @intToEnum(Status, lib.archive_read_next_header(archive, &entry)); @@ -559,65 +613,83 @@ pub const Archive = struct { if (tokenizer.next() == null) continue :loop; } + const Kind = std.fs.Dir.Entry.Kind; + const entry_kind: Kind = switch (lib.archive_entry_filetype(entry)) { + std.os.S_IFDIR => Kind.Directory, + std.os.S_IFREG => Kind.File, + else => continue :loop, + }; + var pathname_ = tokenizer.rest(); - pathname = @intToPtr([*]const u8, @ptrToInt(pathname_.ptr))[0..pathname_.len :0]; - const mask = lib.archive_entry_filetype(entry); - const size = @intCast(usize, std.math.max(lib.archive_entry_size(entry), 0)); - if (size > 0) { - const slice = std.mem.span(pathname); + count += 1; + const mode = @truncate(u32, lib.archive_entry_perm(entry)); + const slice = std.mem.span(pathname); + const name = hop.appendMetadata(pathname_) catch unreachable; + + if (entry_kind == .Directory) { + hop.directories.append(hop.allocator, .{ + .name = name, + .name_hash = @truncate(u32, std.hash.Wyhash.hash(0, pathname_)), + .chmod = mode, + }) catch unreachable; if (comptime log) { - Output.prettyln(" {s}", .{pathname}); + Output.prettyErrorln("Dir: {s}", .{ + pathname_, + }); } - - const file = dir.createFileZ(pathname, .{ .truncate = true }) catch |err| brk: { - switch (err) { - error.FileNotFound => { - try dir.makePath(std.fs.path.dirname(slice) orelse return err); - break :brk try dir.createFileZ(pathname, .{ .truncate = true }); - }, - else => { - return err; - }, - } + } else { + var data = Hop.StringPointer{ + .off = @truncate(u32, writer.pos), }; - count += 1; - _ = C.fchmod(file.handle, lib.archive_entry_perm(entry)); + chunk_offset = 0; + chunk_output_offset = 0; + const size = lib.archive_entry_size(entry); - if (ctx) |ctx_| { - const hash: u64 = if (ctx_.pluckers.len > 0) - std.hash.Wyhash.hash(0, slice) - else - @as(u64, 0); + var data_block_status: c_int = lib.archive_read_data_block(archive, @ptrCast([*c]*const c_void, &chunk_buf), &chunk_size, &chunk_offset); - if (comptime FilePathAppender != void) { - var result = ctx.?.all_files.getOrPutAdapted(hash, Context.U64Context{}) catch unreachable; - if (!result.found_existing) { - result.value_ptr.* = (try appender.appendMutable(@TypeOf(slice), slice)).ptr; - } + while (data_block_status == lib.ARCHIVE_OK) : (data_block_status = lib.archive_read_data_block(archive, @ptrCast([*c]*const c_void, &chunk_buf), &chunk_size, &chunk_offset)) { + if (chunk_offset > chunk_output_offset) { + writer.pos = @intCast(usize, @intCast(isize, writer.pos) + chunk_offset - chunk_output_offset); } - for (ctx_.pluckers) |*plucker_| { - if (plucker_.filename_hash == hash) { - try plucker_.contents.inflate(size); - plucker_.contents.list.expandToCapacity(); - var read = lib.archive_read_data(archive, plucker_.contents.list.items.ptr, size); - try plucker_.contents.inflate(@intCast(usize, read)); - plucker_.found = read > 0; - plucker_.fd = file.handle; - continue :loop; - } + while (chunk_size > 0) { + const remain = size - chunk_output_offset; + chunk_size = @minimum(@intCast(usize, remain), chunk_size); + // const written = try std.os.pwrite(handle, chunk_buf.?[0..chunk_size], writer.pos); + // writer.pos += written; + const written = try writer.write(chunk_buf.?[0..chunk_size]); + chunk_buf.? += written; + chunk_size -= written; + chunk_output_offset += @intCast(i64, written); } } - _ = lib.archive_read_data_into_fd(archive, file.handle); + data.len = @intCast(u32, size); + _ = try writer.flush(); + + if (comptime log) { + Output.prettyErrorln("File: {s} - [{d}, {d}]", .{ pathname_, data.off, data.len }); + } + + hop.files.append( + hop.allocator, + Hop.File{ + .name = name, + .name_hash = @truncate(u32, std.hash.Wyhash.hash(0, pathname_)), + .chmod = mode, + .data = data, + }, + ) catch unreachable; } }, } } + try hop.destination.seekTo(writer.pos); + return count; } @@ -676,7 +748,7 @@ pub const Archive = struct { pathname = @intToPtr([*]const u8, @ptrToInt(pathname_.ptr))[0..pathname_.len :0]; const mask = lib.archive_entry_filetype(entry); - const size = @intCast(usize, std.math.max(lib.archive_entry_size(entry), 0)); + const size = @intCast(usize, @maximum(lib.archive_entry_size(entry), 0)); if (size > 0) { const slice = std.mem.span(pathname); |