aboutsummaryrefslogtreecommitdiff
path: root/src/sourcemap
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <jarred@jarredsumner.com> 2022-03-08 04:46:25 -0800
committerGravatar Jarred Sumner <jarred@jarredsumner.com> 2022-03-08 04:46:25 -0800
commit880f6a17b84ad4ca09d075d22fdccb2256411435 (patch)
tree365d78f89378b00b31775d61e82ea52561a89ece /src/sourcemap
parent04568452567995e3aa42535ca356d83e60ed030f (diff)
downloadbun-880f6a17b84ad4ca09d075d22fdccb2256411435.tar.gz
bun-880f6a17b84ad4ca09d075d22fdccb2256411435.tar.zst
bun-880f6a17b84ad4ca09d075d22fdccb2256411435.zip
[bun.js] WIP sourcemap support
Diffstat (limited to 'src/sourcemap')
-rw-r--r--src/sourcemap/sourcemap.zig403
-rw-r--r--src/sourcemap/vlq_bench.zig239
2 files changed, 483 insertions, 159 deletions
diff --git a/src/sourcemap/sourcemap.zig b/src/sourcemap/sourcemap.zig
index e043a493b..dc8de4b43 100644
--- a/src/sourcemap/sourcemap.zig
+++ b/src/sourcemap/sourcemap.zig
@@ -9,7 +9,6 @@ const BabyList = JSAst.BabyList;
const Logger = @import("../logger.zig");
const strings = @import("../string_immutable.zig");
const MutableString = @import("../string_mutable.zig").MutableString;
-const base64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
const Joiner = @import("../string_joiner.zig");
const JSPrinter = @import("../js_printer.zig");
const URL = @import("../query_string_map.zig").URL;
@@ -17,18 +16,6 @@ const FileSystem = @import("../fs.zig").FileSystem;
const SourceMap = @This();
-const vlq_max_in_bytes = 8;
-pub const VLQ = struct {
- // We only need to worry about i32
- // That means the maximum VLQ-encoded value is 8 bytes
- // because there are only 4 bits of number inside each VLQ value
- // and it expects i32
- // therefore, it can never be more than 32 bits long
- // I believe the actual number is 7 bytes long, however we can add an extra byte to be more cautious
- bytes: [vlq_max_in_bytes]u8,
- len: u4 = 0,
-};
-
/// Coordinates in source maps are stored using relative offsets for size
/// reasons. When joining together chunks of a source map that were emitted
/// in parallel for different parts of a file, we need to fix up the first
@@ -55,7 +42,7 @@ pub const Mapping = struct {
original: LineColumnOffset,
source_index: i32,
- pub const List = std.MultiArrayList(Mapping);
+ pub const List = std.ArrayList(Mapping);
pub inline fn generatedLine(mapping: Mapping) i32 {
return mapping.generated.lines;
@@ -190,6 +177,18 @@ const vlq_lookup_table: [256]VLQ = brk: {
break :brk entries;
};
+const vlq_max_in_bytes = 8;
+pub const VLQ = struct {
+ // We only need to worry about i32
+ // That means the maximum VLQ-encoded value is 8 bytes
+ // because there are only 4 bits of number inside each VLQ value
+ // and it expects i32
+ // therefore, it can never be more than 32 bits long
+ // I believe the actual number is 7 bytes long, however we can add an extra byte to be more cautious
+ bytes: [vlq_max_in_bytes]u8,
+ len: u4 = 0,
+};
+
pub fn encodeVLQWithLookupTable(
value: i32,
) VLQ {
@@ -224,7 +223,6 @@ test "decodeVLQ" {
.{ -1, "D" },
.{ 123, "2H" },
.{ 123456789, "qxmvrH" },
- .{ 8, "Q" },
};
inline for (fixtures) |fixture| {
const result = decodeVLQ(fixture[1], 0);
@@ -258,8 +256,7 @@ pub fn encodeVLQ(
else
@bitCast(u32, (-value << 1) | 1);
- // The max amount of digits a VLQ value for sourcemaps can contain is 9
- // therefore, we can unroll the loop
+ // source mappings are limited to i32
comptime var i: usize = 0;
inline while (i < vlq_max_in_bytes) : (i += 1) {
var digit = vlq & 31;
@@ -287,6 +284,9 @@ pub const VLQResult = struct {
start: usize = 0,
};
+const base64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+// base64 stores values up to 7 bits
const base64_lut: [std.math.maxInt(u7)]u7 = brk: {
@setEvalBranchQuota(9999);
var bytes = [_]u7{std.math.maxInt(u7)} ** std.math.maxInt(u7);
@@ -302,12 +302,11 @@ pub fn decodeVLQ(encoded: []const u8, start: usize) VLQResult {
var shift: u8 = 0;
var vlq: u32 = 0;
- // it will never exceed 9
- // by doing it this way, we can hint to the compiler that it will not exceed 9
+ // hint to the compiler what the maximum value is
const encoded_ = encoded[start..][0..@minimum(encoded.len - start, comptime (vlq_max_in_bytes + 1))];
+ // inlining helps for the 1 or 2 byte case, hurts a little for larger
comptime var i: usize = 0;
-
inline while (i < vlq_max_in_bytes + 1) : (i += 1) {
const index = @as(u32, base64_lut[@truncate(u7, encoded_[i])]);
@@ -473,6 +472,7 @@ pub const LineOffsetTable = struct {
.byte_offset_to_first_non_ascii = byte_offset_to_first_non_ascii,
.columns_for_non_ascii = BabyList(i32).fromList(columns_list),
}) catch unreachable;
+
column = 0;
byte_offset_to_first_non_ascii = 0;
column_byte_offset = 0;
@@ -577,6 +577,8 @@ pub fn appendMappingToBuffer(buffer_: MutableString, last_byte: u8, prev_state:
pub const Chunk = struct {
buffer: MutableString,
+ mappings_count: usize = 0,
+
/// This end state will be used to rewrite the start of the following source
/// map chunk so that the delta-encoded VLQ numbers are preserved.
end_state: SourceMapState = .{},
@@ -625,171 +627,260 @@ pub const Chunk = struct {
return output;
}
- pub const Builder = struct {
- input_source_map: ?*SourceMap = null,
- source_map: MutableString,
- line_offset_tables: LineOffsetTable.List = .{},
- prev_state: SourceMapState = SourceMapState{},
- last_generated_update: u32 = 0,
- generated_column: i32 = 0,
- prev_loc: Logger.Loc = Logger.Loc.Empty,
- has_prev_state: bool = false,
-
- // This is a workaround for a bug in the popular "source-map" library:
- // https://github.com/mozilla/source-map/issues/261. The library will
- // sometimes return null when querying a source map unless every line
- // starts with a mapping at column zero.
- //
- // The workaround is to replicate the previous mapping if a line ends
- // up not starting with a mapping. This is done lazily because we want
- // to avoid replicating the previous mapping if we don't need to.
- line_starts_with_mapping: bool = false,
- cover_lines_without_mappings: bool = false,
-
- pub fn generateChunk(b: *Builder, output: []const u8) Chunk {
- b.updateGeneratedLineAndColumn(output);
- return Chunk{
- .buffer = b.source_map,
- .end_state = b.prev_state,
- .final_generated_column = b.generated_column,
- .should_ignore = !strings.containsAnyBesidesChar(b.source_map.list.items, ';'),
- };
- }
+ pub fn SourceMapFormat(comptime Type: type) type {
+ return struct {
+ ctx: Type,
+ const Format = @This();
- // Scan over the printed text since the last source mapping and update the
- // generated line and column numbers
- pub fn updateGeneratedLineAndColumn(b: *Builder, output: []const u8) void {
- const slice = output[b.last_generated_update..];
- var needs_mapping = b.cover_lines_without_mappings and !b.line_starts_with_mapping and b.has_prev_state;
+ pub fn init(allocator: std.mem.Allocator) Format {
+ return Format{ .ctx = Type.init(allocator) };
+ }
- var i: usize = 0;
- const n = @intCast(usize, slice.len);
- var c: i32 = 0;
- while (i < n) {
- const len = strings.wtf8ByteSequenceLength(slice[i]);
- c = strings.decodeWTF8RuneT(slice[i..].ptr[0..4], len, i32, strings.unicode_replacement);
- i += @as(usize, len);
+ pub inline fn appendLineSeparator(this: *Format) anyerror!void {
+ try this.ctx.appendLineSeparator();
+ }
- switch (c) {
- 14...127 => {
- if (strings.indexOfNewlineOrNonASCII(slice, @intCast(u32, i))) |j| {
- b.generated_column += @intCast(i32, (@as(usize, j) - i) + 1);
- i = j;
- continue;
- } else {
- b.generated_column += @intCast(i32, slice[i..].len);
- i = n;
- break;
- }
- },
- '\r', '\n', 0x2028, 0x2029 => {
- // windows newline
- if (c == '\r') {
- const newline_check = b.last_generated_update + i;
- if (newline_check < output.len and output[newline_check] == '\n') {
- continue;
- }
- }
+ pub inline fn append(this: *Format, current_state: SourceMapState, prev_state: SourceMapState) anyerror!void {
+ try this.ctx.append(current_state, prev_state);
+ }
- // If we're about to move to the next line and the previous line didn't have
- // any mappings, add a mapping at the start of the previous line.
- if (needs_mapping) {
- b.appendMappingWithoutRemapping(.{
- .generated_line = b.prev_state.generated_line,
- .generated_column = 0,
- .source_index = b.prev_state.source_index,
- .original_line = b.prev_state.original_line,
- .original_column = b.prev_state.original_column,
- });
- }
+ pub inline fn shouldIgnore(this: Format) bool {
+ return this.ctx.shouldIgnore();
+ }
+
+ pub inline fn getBuffer(this: Format) MutableString {
+ return this.ctx.getBuffer();
+ }
- b.prev_state.generated_line += 1;
- b.prev_state.generated_column = 0;
- b.generated_column = 0;
- b.source_map.appendChar(';') catch unreachable;
+ pub inline fn getCount(this: Format) usize {
+ return this.ctx.getCount();
+ }
+ };
+ }
- // This new line doesn't have a mapping yet
- b.line_starts_with_mapping = false;
+ pub const VLQSourceMap = struct {
+ data: MutableString,
+ count: usize = 0,
+ offset: usize = 0,
- needs_mapping = b.cover_lines_without_mappings and !b.line_starts_with_mapping and b.has_prev_state;
- },
+ pub const Format = SourceMapFormat(VLQSourceMap);
- else => {
- // Mozilla's "source-map" library counts columns using UTF-16 code units
- b.generated_column += @as(i32, @boolToInt(c > 0xFFFF)) + 1;
- },
- }
+ pub fn init(allocator: std.mem.Allocator, prepend_count: bool) VLQSourceMap {
+ var map = VLQSourceMap{
+ .data = MutableString.initEmpty(allocator),
+ };
+
+ // For bun.js, we store the number of mappings and how many bytes the final list is at the beginning of the array
+ if (prepend_count) {
+ map.offset = 16;
+ map.data.append([16]u8{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }) catch unreachable;
}
- b.last_generated_update = @truncate(u32, output.len);
+ return map;
}
- pub fn appendMapping(b: *Builder, current_state_: SourceMapState) void {
- var current_state = current_state_;
- // If the input file had a source map, map all the way back to the original
- if (b.input_source_map) |input| {
- if (input.find(current_state.original_line, current_state.original_column)) |mapping| {
- current_state.source_index = mapping.sourceIndex();
- current_state.original_line = mapping.originalLine();
- current_state.original_column = mapping.originalColumn();
- }
- }
-
- b.appendMappingWithoutRemapping(current_state);
+ pub fn appendLineSeparator(this: *VLQSourceMap) anyerror!void {
+ try this.data.appendChar(';');
}
- pub fn appendMappingWithoutRemapping(b: *Builder, current_state: SourceMapState) void {
- const last_byte: u8 = if (b.source_map.list.items.len > 0)
- b.source_map.list.items[b.source_map.list.items.len - 1]
+ pub fn append(this: *VLQSourceMap, current_state: SourceMapState, prev_state: SourceMapState) anyerror!void {
+ const last_byte: u8 = if (this.data.list.items.len > this.offset)
+ this.data.list.items[this.data.list.items.len - 1]
else
0;
- b.source_map = appendMappingToBuffer(b.source_map, last_byte, b.prev_state, current_state);
- b.prev_state = current_state;
- b.has_prev_state = true;
+ this.data = appendMappingToBuffer(this.data, last_byte, prev_state, current_state);
+ this.count += 1;
+ }
+
+ pub fn shouldIgnore(this: VLQSourceMap) bool {
+ return this.count == 0;
}
- pub fn addSourceMapping(b: *Builder, loc: Logger.Loc, output: []const u8) void {
- // exclude generated code from source
- if (b.prev_loc.eql(loc) or loc.start == Logger.Loc.Empty.start) {
- return;
+ pub fn getBuffer(this: VLQSourceMap) MutableString {
+ return this.data;
+ }
+
+ pub fn getCount(this: VLQSourceMap) usize {
+ return this.count;
+ }
+ };
+
+ pub fn NewBuilder(comptime SourceMapFormatType: type) type {
+ return struct {
+ const ThisBuilder = @This();
+ input_source_map: ?*SourceMap = null,
+ source_map: SourceMapper,
+ line_offset_tables: LineOffsetTable.List = .{},
+ prev_state: SourceMapState = SourceMapState{},
+ last_generated_update: u32 = 0,
+ generated_column: i32 = 0,
+ prev_loc: Logger.Loc = Logger.Loc.Empty,
+ has_prev_state: bool = false,
+
+ // This is a workaround for a bug in the popular "source-map" library:
+ // https://github.com/mozilla/source-map/issues/261. The library will
+ // sometimes return null when querying a source map unless every line
+ // starts with a mapping at column zero.
+ //
+ // The workaround is to replicate the previous mapping if a line ends
+ // up not starting with a mapping. This is done lazily because we want
+ // to avoid replicating the previous mapping if we don't need to.
+ line_starts_with_mapping: bool = false,
+ cover_lines_without_mappings: bool = false,
+
+ /// When generating sourcemappings for bun, we store a count of how many mappings there were
+ prepend_count: bool = false,
+
+ pub const SourceMapper = SourceMapFormat(SourceMapFormatType);
+
+ pub fn generateChunk(b: *ThisBuilder, output: []const u8) Chunk {
+ b.updateGeneratedLineAndColumn(output);
+ if (b.prepend_count) {
+ b.source_map.getBuffer().list.items[0..8].* = @bitCast([8]u8, b.source_map.getBuffer().list.items.len);
+ b.source_map.getBuffer().list.items[8..16].* = @bitCast([8]u8, b.source_map.getCount());
+ }
+ return Chunk{
+ .buffer = b.source_map.getBuffer(),
+ .mappings_count = b.source_map.getCount(),
+ .end_state = b.prev_state,
+ .final_generated_column = b.generated_column,
+ .should_ignore = !b.source_map.shouldIgnore(),
+ };
}
- b.prev_loc = loc;
- const list = b.line_offset_tables;
- const original_line = LineOffsetTable.findLine(list, loc);
- const line = list.get(@intCast(usize, @maximum(original_line, 0)));
+ // Scan over the printed text since the last source mapping and update the
+ // generated line and column numbers
+ pub fn updateGeneratedLineAndColumn(b: *ThisBuilder, output: []const u8) void {
+ const slice = output[b.last_generated_update..];
+ var needs_mapping = b.cover_lines_without_mappings and !b.line_starts_with_mapping and b.has_prev_state;
+
+ var i: usize = 0;
+ const n = @intCast(usize, slice.len);
+ var c: i32 = 0;
+ while (i < n) {
+ const len = strings.wtf8ByteSequenceLength(slice[i]);
+ c = strings.decodeWTF8RuneT(slice[i..].ptr[0..4], len, i32, strings.unicode_replacement);
+ i += @as(usize, len);
+
+ switch (c) {
+ 14...127 => {
+ if (strings.indexOfNewlineOrNonASCII(slice, @intCast(u32, i))) |j| {
+ b.generated_column += @intCast(i32, (@as(usize, j) - i) + 1);
+ i = j;
+ continue;
+ } else {
+ b.generated_column += @intCast(i32, slice[i..].len);
+ i = n;
+ break;
+ }
+ },
+ '\r', '\n', 0x2028, 0x2029 => {
+ // windows newline
+ if (c == '\r') {
+ const newline_check = b.last_generated_update + i;
+ if (newline_check < output.len and output[newline_check] == '\n') {
+ continue;
+ }
+ }
+
+ // If we're about to move to the next line and the previous line didn't have
+ // any mappings, add a mapping at the start of the previous line.
+ if (needs_mapping) {
+ b.appendMappingWithoutRemapping(.{
+ .generated_line = b.prev_state.generated_line,
+ .generated_column = 0,
+ .source_index = b.prev_state.source_index,
+ .original_line = b.prev_state.original_line,
+ .original_column = b.prev_state.original_column,
+ });
+ }
+
+ b.prev_state.generated_line += 1;
+ b.prev_state.generated_column = 0;
+ b.generated_column = 0;
+ b.source_map.appendLineSeparator();
- // Use the line to compute the column
- var original_column = loc.start - @intCast(i32, line.byte_offset_to_start_of_line);
- if (line.columns_for_non_ascii.len > 0 and original_column >= @intCast(i32, line.byte_offset_to_first_non_ascii)) {
- original_column = line.columns_for_non_ascii.ptr[@intCast(u32, original_column) - line.byte_offset_to_first_non_ascii];
+ // This new line doesn't have a mapping yet
+ b.line_starts_with_mapping = false;
+
+ needs_mapping = b.cover_lines_without_mappings and !b.line_starts_with_mapping and b.has_prev_state;
+ },
+
+ else => {
+ // Mozilla's "source-map" library counts columns using UTF-16 code units
+ b.generated_column += @as(i32, @boolToInt(c > 0xFFFF)) + 1;
+ },
+ }
+ }
+
+ b.last_generated_update = @truncate(u32, output.len);
+ }
+
+ pub fn appendMapping(b: *ThisBuilder, current_state_: SourceMapState) void {
+ var current_state = current_state_;
+ // If the input file had a source map, map all the way back to the original
+ if (b.input_source_map) |input| {
+ if (input.find(current_state.original_line, current_state.original_column)) |mapping| {
+ current_state.source_index = mapping.sourceIndex();
+ current_state.original_line = mapping.originalLine();
+ current_state.original_column = mapping.originalColumn();
+ }
+ }
+
+ b.appendMappingWithoutRemapping(current_state);
}
- b.updateGeneratedLineAndColumn(output);
+ pub fn appendMappingWithoutRemapping(b: *ThisBuilder, current_state: SourceMapState) void {
+ try b.source_map.append(current_state, b.prev_state);
+ b.prev_state = current_state;
+ b.has_prev_state = true;
+ }
- // If this line doesn't start with a mapping and we're about to add a mapping
- // that's not at the start, insert a mapping first so the line starts with one.
- if (b.cover_lines_without_mappings and !b.line_starts_with_mapping and b.generated_column > 0 and b.has_prev_state) {
- b.appendMappingWithoutRemapping(.{
+ pub fn addSourceMapping(b: *ThisBuilder, loc: Logger.Loc, output: []const u8) void {
+ // exclude generated code from source
+ if (b.prev_loc.eql(loc) or loc.start == Logger.Loc.Empty.start) {
+ return;
+ }
+
+ b.prev_loc = loc;
+ const list = b.line_offset_tables;
+ const original_line = LineOffsetTable.findLine(list, loc);
+ const line = list.get(@intCast(usize, @maximum(original_line, 0)));
+
+ // Use the line to compute the column
+ var original_column = loc.start - @intCast(i32, line.byte_offset_to_start_of_line);
+ if (line.columns_for_non_ascii.len > 0 and original_column >= @intCast(i32, line.byte_offset_to_first_non_ascii)) {
+ original_column = line.columns_for_non_ascii.ptr[@intCast(u32, original_column) - line.byte_offset_to_first_non_ascii];
+ }
+
+ b.updateGeneratedLineAndColumn(output);
+
+ // If this line doesn't start with a mapping and we're about to add a mapping
+ // that's not at the start, insert a mapping first so the line starts with one.
+ if (b.cover_lines_without_mappings and !b.line_starts_with_mapping and b.generated_column > 0 and b.has_prev_state) {
+ b.appendMappingWithoutRemapping(.{
+ .generated_line = b.prev_state.generated_line,
+ .generated_column = 0,
+ .source_index = b.prev_state.source_index,
+ .original_line = b.prev_state.original_line,
+ .original_column = b.prev_state.original_column,
+ });
+ }
+
+ b.appendMapping(.{
.generated_line = b.prev_state.generated_line,
- .generated_column = 0,
+ .generated_column = b.generated_column,
.source_index = b.prev_state.source_index,
- .original_line = b.prev_state.original_line,
+ .original_line = original_line,
.original_column = b.prev_state.original_column,
});
- }
- b.appendMapping(.{
- .generated_line = b.prev_state.generated_line,
- .generated_column = b.generated_column,
- .source_index = b.prev_state.source_index,
- .original_line = original_line,
- .original_column = b.prev_state.original_column,
- });
+ // This line now has a mapping on it, so don't insert another one
+ b.line_starts_with_mapping = true;
+ }
+ };
+ }
- // This line now has a mapping on it, so don't insert another one
- b.line_starts_with_mapping = true;
- }
- };
+ pub const Builder = NewBuilder(VLQSourceMap);
};
diff --git a/src/sourcemap/vlq_bench.zig b/src/sourcemap/vlq_bench.zig
index 20f88a21d..e6ea2724f 100644
--- a/src/sourcemap/vlq_bench.zig
+++ b/src/sourcemap/vlq_bench.zig
@@ -1,6 +1,137 @@
const std = @import("std");
-const SourceMap = @import("./sourcemap.zig");
+const SourceMap = struct {
+ const base64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+ const vlq_lookup_table: [256]VLQ = brk: {
+ var entries: [256]VLQ = undefined;
+ var i: usize = 0;
+ var j: i32 = 0;
+ while (i < 256) : (i += 1) {
+ entries[i] = encodeVLQ(j);
+ j += 1;
+ }
+ break :brk entries;
+ };
+
+ const vlq_max_in_bytes = 8;
+ pub const VLQ = struct {
+ // We only need to worry about i32
+ // That means the maximum VLQ-encoded value is 8 bytes
+ // because there are only 4 bits of number inside each VLQ value
+ // and it expects i32
+ // therefore, it can never be more than 32 bits long
+ // I believe the actual number is 7 bytes long, however we can add an extra byte to be more cautious
+ bytes: [vlq_max_in_bytes]u8,
+ len: u4 = 0,
+ };
+
+ pub fn encodeVLQWithLookupTable(
+ value: i32,
+ ) VLQ {
+ return if (value >= 0 and value <= 255)
+ vlq_lookup_table[@intCast(usize, value)]
+ else
+ encodeVLQ(value);
+ }
+
+ // A single base 64 digit can contain 6 bits of data. For the base 64 variable
+ // length quantities we use in the source map spec, the first bit is the sign,
+ // the next four bits are the actual value, and the 6th bit is the continuation
+ // bit. The continuation bit tells us whether there are more digits in this
+ // value following this digit.
+ //
+ // Continuation
+ // | Sign
+ // | |
+ // V V
+ // 101011
+ //
+ pub fn encodeVLQ(
+ value: i32,
+ ) VLQ {
+ var len: u4 = 0;
+ var bytes: [vlq_max_in_bytes]u8 = undefined;
+
+ var vlq: u32 = if (value >= 0)
+ @bitCast(u32, value << 1)
+ else
+ @bitCast(u32, (-value << 1) | 1);
+
+ // source mappings are limited to i32
+ comptime var i: usize = 0;
+ inline while (i < vlq_max_in_bytes) : (i += 1) {
+ var digit = vlq & 31;
+ vlq >>= 5;
+
+ // If there are still more digits in this value, we must make sure the
+ // continuation bit is marked
+ if (vlq != 0) {
+ digit |= 32;
+ }
+
+ bytes[len] = base64[digit];
+ len += 1;
+
+ if (vlq == 0) {
+ return VLQ{
+ .bytes = bytes,
+ .len = len,
+ };
+ }
+ }
+
+ return .{ .bytes = bytes, .len = 0 };
+ }
+
+ pub const VLQResult = struct {
+ value: i32 = 0,
+ start: usize = 0,
+ };
+
+ // base64 stores values up to 7 bits
+ const base64_lut: [std.math.maxInt(u7)]u7 = brk: {
+ @setEvalBranchQuota(9999);
+ var bytes = [_]u7{std.math.maxInt(u7)} ** std.math.maxInt(u7);
+
+ for (base64) |c, i| {
+ bytes[c] = i;
+ }
+
+ break :brk bytes;
+ };
+
+ pub fn decodeVLQ(encoded: []const u8, start: usize) VLQResult {
+ var shift: u8 = 0;
+ var vlq: u32 = 0;
+
+ // hint to the compiler what the maximum value is
+ const encoded_ = encoded[start..][0..@minimum(encoded.len - start, comptime (vlq_max_in_bytes + 1))];
+
+ // inlining helps for the 1 or 2 byte case, hurts a little for larger
+ comptime var i: usize = 0;
+ inline while (i < vlq_max_in_bytes + 1) : (i += 1) {
+ const index = @as(u32, base64_lut[@truncate(u7, encoded_[i])]);
+
+ // decode a byte
+ vlq |= (index & 31) << @truncate(u5, shift);
+ shift += 5;
+
+ // Stop if there's no continuation bit
+ if ((index & 32) == 0) {
+ return VLQResult{
+ .start = i + start,
+ .value = if ((vlq & 1) == 0)
+ @intCast(i32, vlq >> 1)
+ else
+ -@intCast(i32, (vlq >> 1)),
+ };
+ }
+ }
+
+ return VLQResult{ .start = start + encoded_.len, .value = 0 };
+ }
+};
pub fn main() anyerror!void {
const args = try std.process.argsAlloc(std.heap.c_allocator);
@@ -8,6 +139,7 @@ pub fn main() anyerror!void {
var numbers = try std.heap.c_allocator.alloc(i32, how_many);
var results = try std.heap.c_allocator.alloc(SourceMap.VLQ, how_many);
+ var leb_buf = try std.heap.c_allocator.alloc(u8, how_many * 8);
const byte_size = std.mem.sliceAsBytes(numbers).len;
var rand = std.rand.DefaultPrng.init(0);
@@ -49,7 +181,29 @@ pub fn main() anyerror!void {
std.debug.print("[{d}] decode: {} in {}\n", .{ how_many, std.fmt.fmtIntSizeDec(byte_size), std.fmt.fmtDuration(elapsed) });
}
- std.debug.print("\nNumbers between 0 - 8096 (most columns won't exceed 255):\n\n", .{});
+ {
+ var timer = try std.time.Timer.start();
+ var stream = std.io.fixedBufferStream(leb_buf);
+ var writer = stream.writer();
+ for (numbers) |n| {
+ std.leb.writeILEB128(writer, n) catch unreachable;
+ }
+ const elapsed = timer.read();
+ std.debug.print("[{d}] ILEB128 encode: {} in {}\n", .{ how_many, std.fmt.fmtIntSizeDec(byte_size), std.fmt.fmtDuration(elapsed) });
+ }
+
+ {
+ var timer = try std.time.Timer.start();
+ var stream = std.io.fixedBufferStream(leb_buf);
+ var reader = stream.reader();
+ for (numbers) |_, i| {
+ numbers[i] = std.leb.readILEB128(i32, reader) catch unreachable;
+ }
+ const elapsed = timer.read();
+ std.debug.print("[{d}] ILEB128 decode: {} in {}\n", .{ how_many, std.fmt.fmtIntSizeDec(byte_size), std.fmt.fmtDuration(elapsed) });
+ }
+
+ std.debug.print("\nNumbers between 0 - 8096:\n\n", .{});
for (numbers) |_, i| {
numbers[i] = rand.random().intRangeAtMost(i32, 0, 8096);
@@ -86,7 +240,29 @@ pub fn main() anyerror!void {
std.debug.print("[{d}] decode: {} in {}\n", .{ how_many, std.fmt.fmtIntSizeDec(byte_size), std.fmt.fmtDuration(elapsed) });
}
- std.debug.print("\nNumbers between 0 - 255 (most columns won't exceed 255):\n\n", .{});
+ {
+ var timer = try std.time.Timer.start();
+ var stream = std.io.fixedBufferStream(leb_buf);
+ var writer = stream.writer();
+ for (numbers) |n| {
+ std.leb.writeILEB128(writer, n) catch unreachable;
+ }
+ const elapsed = timer.read();
+ std.debug.print("[{d}] ILEB128 encode: {} in {}\n", .{ how_many, std.fmt.fmtIntSizeDec(byte_size), std.fmt.fmtDuration(elapsed) });
+ }
+
+ {
+ var timer = try std.time.Timer.start();
+ var stream = std.io.fixedBufferStream(leb_buf);
+ var reader = stream.reader();
+ for (numbers) |_, i| {
+ numbers[i] = std.leb.readILEB128(i32, reader) catch unreachable;
+ }
+ const elapsed = timer.read();
+ std.debug.print("[{d}] ILEB128 decode: {} in {}\n", .{ how_many, std.fmt.fmtIntSizeDec(byte_size), std.fmt.fmtDuration(elapsed) });
+ }
+
+ std.debug.print("\nNumbers between 0 - 255:\n\n", .{});
for (numbers) |_, i| {
numbers[i] = rand.random().intRangeAtMost(i32, 0, 255);
@@ -122,4 +298,61 @@ pub fn main() anyerror!void {
const elapsed = timer.read();
std.debug.print("[{d}] decode: {} in {}\n", .{ how_many, std.fmt.fmtIntSizeDec(byte_size), std.fmt.fmtDuration(elapsed) });
}
+
+ {
+ var timer = try std.time.Timer.start();
+ var stream = std.io.fixedBufferStream(leb_buf);
+ var writer = stream.writer();
+ for (numbers) |n| {
+ std.leb.writeILEB128(writer, n) catch unreachable;
+ }
+ const elapsed = timer.read();
+ std.debug.print("[{d}] ILEB128 encode: {} in {}\n", .{ how_many, std.fmt.fmtIntSizeDec(byte_size), std.fmt.fmtDuration(elapsed) });
+ }
+
+ {
+ var timer = try std.time.Timer.start();
+ var stream = std.io.fixedBufferStream(leb_buf);
+ var reader = stream.reader();
+ for (numbers) |_, i| {
+ numbers[i] = std.leb.readILEB128(i32, reader) catch unreachable;
+ }
+ const elapsed = timer.read();
+ std.debug.print("[{d}] ILEB128 decode: {} in {}\n", .{ how_many, std.fmt.fmtIntSizeDec(byte_size), std.fmt.fmtDuration(elapsed) });
+ }
+}
+
+test "encodeVLQ" {
+ const fixtures = .{
+ .{ 2_147_483_647, "+/////D" },
+ .{ -2_147_483_647, "//////D" },
+ .{ 0, "A" },
+ .{ 1, "C" },
+ .{ -1, "D" },
+ .{ 123, "2H" },
+ .{ 123456789, "qxmvrH" },
+ };
+ inline for (fixtures) |fixture| {
+ const result = SourceMap.encodeVLQ(fixture[0]);
+ try std.testing.expectEqualStrings(fixture[1], result.bytes[0..result.len]);
+ }
+}
+
+test "decodeVLQ" {
+ const fixtures = .{
+ .{ 2_147_483_647, "+/////D" },
+ .{ -2_147_483_647, "//////D" },
+ .{ 0, "A" },
+ .{ 1, "C" },
+ .{ -1, "D" },
+ .{ 123, "2H" },
+ .{ 123456789, "qxmvrH" },
+ };
+ inline for (fixtures) |fixture| {
+ const result = SourceMap.decodeVLQ(fixture[1], 0);
+ try std.testing.expectEqual(
+ result.value,
+ fixture[0],
+ );
+ }
}