From 6362414d65b69cd01624e84d08eca654fc8cb101 Mon Sep 17 00:00:00 2001 From: Jarred Sumner Date: Fri, 7 Apr 2023 20:08:01 -0700 Subject: Bun gets a new bundler (#2312) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * alright now just gotta try running it * fix a gajillion compiler errors * even more code * okay i fixed more errors * wip * Update launch.json * Update string_builder.zig * `fast_debug_build_mode` makes debug build 2x faster * Update bundle_v2.zig * more code! * It bundles! * Rename `Bun.Transpiler` to `Bun.Bundler` * `import()` expressions almost work * wip attempt to get import() expr to work * Bundle namespace imports * Attempt to fix the issue with import() unsuccessfully * consider current working directory when resolving relative paths (#2313) * consider current working directory when resolving relative paths fixes #2298 * comment test --------- Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> * support `expect().toThrow(/pattern/)` (#2314) - fix time-zone-dependent test failure * fix missing `Blob` error messages on Linux (#2315) * fix & clean up tests (#2318) - skip flaky tests when running as `root` - use `expect().toThrow()` - clean up temporary files after tests * feat(tty): add some `tty.WriteStream` methods to `process.{stdout, stderr}` (#2320) * feat(stdio): add some `tty.WriteStream` methods * chore(builtins): add process builtin gen'd code * Fix docker install command * `bun test` on macOS in GitHub Actions (#2322) * Fixes #2323 * throw invalid parameter errors in `crypto.scryptSync` (#2331) * throw invalid parameter errors * remove comptime, add empty buffer function * remove error_name comptime * Add reference documentation for bun:test (#2327) * Reorganize tests (#2332) * Fix html-rewriter.test.js * fix the wrong thing being incremented in hmr example (#2334) * Add more test harness * Improve Benchmarking page, small fixes (#2339) * Improve benchmarking page * WIP * Add typescript instructions to hot * Document preload in Plugins. Fix loader in plugin types. * Fix typo * Fix links * run prettier * Document openInEditor * improve `Buffer` compatibility with Node.js (#2341) * improve `Buffer` compatibility with Node.js * use `memmove()` allow `encoding` to be `undefined` * run `bun test` after macOS builds (#2343) * "binary" is an alias of "latin1" Fixes https://github.com/oven-sh/bun/issues/2110 * More spec compliant `Blob.prototype.type` (#2340) * Make `Blob.prototype. type` more spec compliant * Add a few more checks for isNumber() * Fix `make headers` * Safer JSValue.isString() * More tests for blob.slice * Make `Blob.prototype.type` more spec compliant * Add isASCII check * Fix types * Fix failing type test * Update blob.zig * Update blob.zig * Fix .eql check on empty values --------- Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> * Fix bug in test runner * Support `import()` expressions * Implement `require()` * clean up bit_set.zig slightly * Move some things around * misc cleanup * Cleanup some things * Fix a lot of stuff * Fix `module.exports.fn = fn;` in ESM entry point * Fix crash due when printing file * Fix issue with class names * Fix issue with `export default identifier` * Update js_parser.zig * optimization: inline single-property object acceses and arrays * Fix undefined memory in renamed symbols list * Handle call target * wip * Inline it * Fix undefined memory issue when reclaiming blocks in ast * Halt linking on any parse errors * alias * Rename `enable_bundling` to `enable_legacy_bundling` * Workaround anonymous struct literal zig bug * Use slower approach (without bitset) because it doesn't break after 8 symbols * Fix incorrectly-renaming statically defined symbols * Handle more edgecases in our bit_set fork * Reduce number of allocations for `define` * Do not rename unbound symbols * Clean up dot defines a little more * Make the generated names prettier * Workaround runtime symbol missing issue * Fail the build on errors * Support export * from * Support `--outfile` * partially fix renaming * fanicer symbol renaming impl * misc, extremely revertible cleanup * Fix up some bugs with symbol renaming * formatting * Update launch.json * Parse `__PURE__` comments * clean up simd code for pure comments * changes to merge * workaround runtime issue * Fix issue with `export * as` not propagating correctly * Make all top-level declarations `var` when bundling * Fix missing prefix * Fix assigning to stack copy * Fix missing runtime symbol * Fix bug with namespace exports * Dramatically reduce allocations * Update launch.json * Add missing flags * Update js_parser.zig * small cleanup * Make the export name better * Fix unnecessary `var foo = foo` * Implement CommonJS -> ESM conversion * Implement module redirects * Port esbuild bundler tests for new bundler (#2380) * started porting esbuild tests * clean up test names and api before moving on * port tests using a program i wrote * replace todo generated comment * fix generated tests not including some files * work on tests * [github web editor] add define, external, inject, minifySyntax, minifyWhitespace options. * get most of the todo comments out of the way, but expectBundled does not handle most of the cases * continue working on esbuild tests * use test.skip for unsupported tests * Fixups for test runner * Hoist imports & exports * Fix test * Hoist classes * bundler test refining, 51/835 * Fix runtime require * bundler test refining, 81/835 * bundler test refining, 93/835 * Make the test work in any timezone * feat(expect): update toBeInstanceOf (#2396) * feat: update instanceof binding * fix: according to PR comments * Rename `expectObjectTypeCount` to `expectMaxObjectTypeCount` * Fix socket tests with connection errors (#2403) * release pending activity with connection error handler * unref poll_ref * remove trailing comma * Organize Dockerfiles for official status * Remove test Dockerfile * Remove old Docker workflow * Feat(test): add toMatch (#2404) * Fix various fetch/response/request tests (#2416) * fix most fetch tests, skip a few * fastGet, toValueGC, and invalid init * bigint unreachable, range error, log process as process * remove extra fetch_headers * remove js_type parameter, check isObject() * throw invalid mime type error, use enum literal * switch back to promise rejection * RangeError pascal case * Fix several bugs (#2418) * utf16 codepoint with replacement character * Fix test failure with `TextEncoder("ascii')` * Add missing type * Fix Response.prototype.bodyUsed and Request.prototype.bodyUsed * Fix bug with scrypt error not clearing * Update server.zig * oopsie * :nail_care: * docs: Use correct url in the 'Issues' link in README header (#2420) * Fix crash when rendering error page and the server or network is slow * [fetch] Make the default body value `null` when unspecified This is better aligned with the fetch spec * Make node-net tests less flaky * [node:net] Fix issue with `listen` callback firing before it's listening * Always clear timers in node test harness * Fix out of bounds access Repro'd in Buffer tests * Update UWS cc @cirospaciari * Make this test more thorough * Hanging abort test * 0 length body is a null stream * Several bug fixes (#2427) * Fix test * Fix segfault when unexpected type is passed in `expect().toThrow` * Fix issues with request constructor * Don't bother cloning headers when its empty * woops * more tests * fix incorrect test * Make the fetch error messages better * Update response.zig * Fix test that failed on macOS * Fix test * Remove extra hash table lookups * Support running dummy registry directly cc @alexlamsl * Update test * Update test * fixup * Workaround crash in test runner * Fixup test * Fixup test * Update os.test.js --------- Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> * Remove usages of port numbers in tests * Set -O2 and -fno-rtti * Remove -g * Prevent undefined memory access * [bun test] Implement `--rerun-each` flag to run each test N times * Reduce number of module scopes created * add some extra abort checks into streams (#2430) * add some checks to avoid UAF * avoid multiple calls to finalize if endFromJS is called more than once * fix no-op comment * mark as requested_end on abort * remove requested_end from abort * remove unnecessary check (#2432) * Fix bug with scoped aliased dependencies in bun install on macOS * remove `addLog`, remove `--prominent-compile-errors` * Finish the upgrade * Optional chaining flag * Implement same_target_becomes_destructuring optimization * bundler test refining, 109/835 * Reset bindings * Support multiple entry points * Implement `--entry-names` flag * Use a tempdir with a better name * prettier * Log file name * Update js_parser.zig * Mark all bun builtins as external * Make resolve errors actually errors * Update bundler_default.test.ts * Fix `await import(foo)` * WIP react server components * Do more stuff at runtime * :scissors: * Support automatic JSX imports * Use a module cache for now * Update tsconfig.base.json * Fix ThisOutsideFunctionNotRenamed * woopsie * moar cpu * clamp it * fixup * Add a bunch of assertions * Bun uses automatic runtime by default * Parse Import Attributes * Add a note about Valgrind * Update developing.md * Fix up code splitting for React Server Components * Implement client component manifest * Fix crash with --react-server-components and no client components * Backport https://github.com/ziglang/zig/commit/4d31e3c917a05541394c544708f0047cfb53331a * Update launch.json * Fix for latest zig * Workaround bug with ?[]const string Occasionally saw alignment errors in this code Workaround https://github.com/ziglang/zig/issues/15085 related: https://github.com/ziglang/zig/pull/15089 * switch to regular slice * Avoid initializing named_imports and named_exports as undefined * Reduce usages of `undefined` * Add more assertions * --watch wip * Update javascript.zig * Possibly fix the race condition * Faster `do` * bump allocator * Reduce the size of `Symbol` slightly * Alphabetically sort runtime import symbols, for determinism * Prepare for code splitting * handle overlapping stdout * pure * clean up some things * Fix bug with `$$typeof` * Address CommonJS -> ESM hoisting bug * Support `"use server"` in manifest * Implement `"use server"` * Fix importing bun builtins when bundling * Make `commonjs_to_esm` a feature flag, fix some splitting bugs * :scissors: * fixme remove this * Fix crash in longestCommonPath * Chunking! Just need to do import paths now. * Import paths work...now trying to figure out how to make runtime symbols work * add workaround * Replace `bun bun` with `bun build` * Fix crash with dual package hazard * Fix many CommonJS <> ESM interop bugs * Support package.json `"sideEffects"` also skip loading unnecessary package.json data in `bun run` * add a not good --watch implementation * bundler test refining, 140/831 * remove accidentally committed file * do not return status code 1 on successful bundles * bundler test refining, 159/830 * pass exit code to exitOrWatch * clean up help menu -remove two spaces to line up bun build -moved all tags to the end of the text they are colorizing -moved other colors to the start of the text they colorize -removed unneeded tags, keeping only one at the start of the block * importstar is fully ported * wip * you can run code in this branch now * Disable this transform * organize and document bundler tests * Fix double import * Fix sloppy mode function declarations * Disable our CommonJS transform for now * add `assertNotPresent` to make splitting cases easier * Bump! * Update bun.d.ts * use import.meta.require in runtime code * Disable this again * Fix dirname * Fix ESM -> CJS wrapper * :nail_care: --------- Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> Co-authored-by: Alex Lam S.L Co-authored-by: Derrick Farris Co-authored-by: Ashcon Partovi Co-authored-by: Dylan Conway <35280289+dylan-conway@users.noreply.github.com> Co-authored-by: pfg Co-authored-by: Colin McDonnell Co-authored-by: dave caruso Co-authored-by: zhiyuan <32867472+zhiyuang@users.noreply.github.com> Co-authored-by: Dylan Conway Co-authored-by: Kamil Ogórek Co-authored-by: Ciro Spaciari --- src/string_immutable.zig | 160 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 120 insertions(+), 40 deletions(-) (limited to 'src/string_immutable.zig') diff --git a/src/string_immutable.zig b/src/string_immutable.zig index f4c6fae07..889add550 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -20,7 +20,7 @@ pub inline fn containsChar(self: string, char: u8) bool { } pub inline fn contains(self: string, str: string) bool { - return std.mem.indexOf(u8, self, str) != null; + return indexOf(self, str) != null; } pub fn toUTF16Literal(comptime str: []const u8) []const u16 { @@ -41,11 +41,9 @@ pub fn toUTF16Literal(comptime str: []const u8) []const u16 { const OptionalUsize = std.meta.Int(.unsigned, @bitSizeOf(usize) - 1); pub fn indexOfAny(self: string, comptime str: anytype) ?OptionalUsize { - for (self, 0..) |c, i| { - inline for (str) |a| { - if (c == a) { - return @intCast(OptionalUsize, i); - } + inline for (str) |a| { + if (indexOfChar(self, a)) |i| { + return @intCast(OptionalUsize, i); } } @@ -148,6 +146,79 @@ pub fn indexOfCharNeg(self: string, char: u8) i32 { return -1; } +/// Format a string to an ECMAScript identifier. +/// Unlike the string_mutable.zig version, this always allocate/copy +pub fn fmtIdentifier(name: string) FormatValidIdentifier { + return FormatValidIdentifier{ .name = name }; +} + +/// Format a string to an ECMAScript identifier. +/// Different implementation than string_mutable because string_mutable may avoid allocating +/// This will always allocate +pub const FormatValidIdentifier = struct { + name: string, + const js_lexer = @import("./js_lexer.zig"); + pub fn format(self: FormatValidIdentifier, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { + var iterator = strings.CodepointIterator.init(self.name); + var cursor = strings.CodepointIterator.Cursor{}; + + var has_needed_gap = false; + var needs_gap = false; + var start_i: usize = 0; + + if (!iterator.next(&cursor)) { + try writer.writeAll("_"); + return; + } + + // Common case: no gap necessary. No allocation necessary. + needs_gap = !js_lexer.isIdentifierStart(cursor.c); + if (!needs_gap) { + // Are there any non-alphanumeric chars at all? + while (iterator.next(&cursor)) { + if (!js_lexer.isIdentifierContinue(cursor.c) or cursor.width > 1) { + needs_gap = true; + start_i = cursor.i; + break; + } + } + } + + if (needs_gap) { + needs_gap = false; + if (start_i > 0) try writer.writeAll(self.name[0..start_i]); + var slice = self.name[start_i..]; + iterator = strings.CodepointIterator.init(slice); + cursor = strings.CodepointIterator.Cursor{}; + + while (iterator.next(&cursor)) { + if (js_lexer.isIdentifierContinue(cursor.c) and cursor.width == 1) { + if (needs_gap) { + try writer.writeAll("_"); + needs_gap = false; + has_needed_gap = true; + } + try writer.writeAll(slice[cursor.i .. cursor.i + @as(u32, cursor.width)]); + } else if (!needs_gap) { + needs_gap = true; + // skip the code point, replace it with a single _ + } + } + + // If it ends with an emoji + if (needs_gap) { + try writer.writeAll("_"); + needs_gap = false; + has_needed_gap = true; + } + + return; + } + + try writer.writeAll(self.name); + } +}; + pub fn indexOfSigned(self: string, str: string) i32 { const i = std.mem.indexOf(u8, self, str) orelse return -1; return @intCast(i32, i); @@ -177,7 +248,9 @@ pub inline fn indexOf(self: string, str: string) ?usize { const start = bun.C.memmem(self_ptr, self_len, str_ptr, str_len) orelse return null; - return @ptrToInt(start) - @ptrToInt(self_ptr); + const i = @ptrToInt(start) - @ptrToInt(self_ptr); + std.debug.assert(i < self_len); + return @intCast(usize, i); } pub fn split(self: string, delimiter: string) SplitIterator { @@ -2899,12 +2972,12 @@ pub const max_16_ascii = @splat(ascii_vector_size, @as(u8, 127)); pub const min_16_ascii = @splat(ascii_vector_size, @as(u8, 0x20)); pub const max_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 127)); pub const min_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 0x20)); -pub const AsciiVector = std.meta.Vector(ascii_vector_size, u8); -pub const AsciiVectorSmall = std.meta.Vector(8, u8); -pub const AsciiVectorU1 = std.meta.Vector(ascii_vector_size, u1); -pub const AsciiVectorU1Small = std.meta.Vector(8, u1); -pub const AsciiVectorU16U1 = std.meta.Vector(ascii_u16_vector_size, u1); -pub const AsciiU16Vector = std.meta.Vector(ascii_u16_vector_size, u16); +pub const AsciiVector = @Vector(ascii_vector_size, u8); +pub const AsciiVectorSmall = @Vector(8, u8); +pub const AsciiVectorU1 = @Vector(ascii_vector_size, u1); +pub const AsciiVectorU1Small = @Vector(8, u1); +pub const AsciiVectorU16U1 = @Vector(ascii_u16_vector_size, u1); +pub const AsciiU16Vector = @Vector(ascii_u16_vector_size, u16); pub const max_4_ascii = @splat(4, @as(u8, 127)); pub fn isAllASCII(slice: []const u8) bool { if (bun.FeatureFlags.use_simdutf) @@ -3200,34 +3273,15 @@ pub fn indexOfCharZ(sliceZ: [:0]const u8, char: u8) ?u63 { } pub fn indexOfChar(slice: []const u8, char: u8) ?u32 { - var remaining = slice; - if (remaining.len == 0) + if (slice.len == 0) return null; - if (remaining[0] == char) - return 0; - - if (comptime Environment.enableSIMD) { - while (remaining.len >= ascii_vector_size) { - const vec: AsciiVector = remaining[0..ascii_vector_size].*; - const cmp = vec == @splat(ascii_vector_size, char); - - if (@reduce(.Max, @bitCast(AsciiVectorU1, cmp)) > 0) { - const bitmask = @bitCast(AsciiVectorInt, cmp); - const first = @ctz(bitmask); - return @intCast(u32, @as(u32, first) + @intCast(u32, slice.len - remaining.len)); - } - remaining = remaining[ascii_vector_size..]; - } - } - - for (remaining, 0..) |c, i| { - if (c == char) { - return @truncate(u32, i + (slice.len - remaining.len)); - } - } + const ptr = bun.C.memchr(slice.ptr, char, slice.len) orelse return null; + const i = @ptrToInt(ptr) - @ptrToInt(slice.ptr); + std.debug.assert(i < slice.len); + std.debug.assert(slice[i] == char); - return null; + return @truncate(u32, i); } test "indexOfChar" { @@ -3829,25 +3883,51 @@ pub fn join(slices: []const string, delimiter: string, allocator: std.mem.Alloca return try std.mem.join(allocator, delimiter, slices); } +pub fn order(a: []const u8, b: []const u8) std.math.Order { + const len = @min(a.len, b.len); + const cmp = bun.C.memcmp(a.ptr, b.ptr, len); + return switch (std.math.sign(cmp)) { + 0 => std.math.order(a.len, b.len), + 1 => .gt, + -1 => .lt, + else => unreachable, + }; +} + pub fn cmpStringsAsc(_: void, a: string, b: string) bool { - return std.mem.order(u8, a, b) == .lt; + return order(a, b) == .lt; } pub fn cmpStringsDesc(_: void, a: string, b: string) bool { - return std.mem.order(u8, a, b) == .gt; + return order(a, b) == .gt; } const sort_asc = std.sort.asc(u8); const sort_desc = std.sort.desc(u8); pub fn sortAsc(in: []string) void { + // TODO: experiment with simd to see if it's faster std.sort.sort([]const u8, in, {}, cmpStringsAsc); } pub fn sortDesc(in: []string) void { + // TODO: experiment with simd to see if it's faster std.sort.sort([]const u8, in, {}, cmpStringsDesc); } +pub const StringArrayByIndexSorter = struct { + keys: []const []const u8, + pub fn lessThan(sorter: *const @This(), a: usize, b: usize) bool { + return strings.order(sorter.keys[a], sorter.keys[b]) == .lt; + } + + pub fn init(keys: []const []const u8) @This() { + return .{ + .keys = keys, + }; + } +}; + pub fn isASCIIHexDigit(c: u8) bool { return std.ascii.isHex(c); } -- cgit v1.2.3