diff options
author | 2023-04-07 20:08:01 -0700 | |
---|---|---|
committer | 2023-04-07 20:08:01 -0700 | |
commit | 6362414d65b69cd01624e84d08eca654fc8cb101 (patch) | |
tree | 77bda836b819769002b746da3c4b7bdb90546f41 /src/string_immutable.zig | |
parent | c0c5f07218096a52ed12d86374f785e6dc331fa4 (diff) | |
download | bun-6362414d65b69cd01624e84d08eca654fc8cb101.tar.gz bun-6362414d65b69cd01624e84d08eca654fc8cb101.tar.zst bun-6362414d65b69cd01624e84d08eca654fc8cb101.zip |
Bun gets a new bundler (#2312)
* alright now just gotta try running it
* fix a gajillion compiler errors
* even more code
* okay i fixed more errors
* wip
* Update launch.json
* Update string_builder.zig
* `fast_debug_build_mode` makes debug build 2x faster
* Update bundle_v2.zig
* more code!
* It bundles!
* Rename `Bun.Transpiler` to `Bun.Bundler`
* `import()` expressions almost work
* wip attempt to get import() expr to work
* Bundle namespace imports
* Attempt to fix the issue with import() unsuccessfully
* consider current working directory when resolving relative paths (#2313)
* consider current working directory when resolving relative paths
fixes #2298
* comment test
---------
Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
* support `expect().toThrow(/pattern/)` (#2314)
- fix time-zone-dependent test failure
* fix missing `Blob` error messages on Linux (#2315)
* fix & clean up tests (#2318)
- skip flaky tests when running as `root`
- use `expect().toThrow()`
- clean up temporary files after tests
* feat(tty): add some `tty.WriteStream` methods to `process.{stdout, stderr}` (#2320)
* feat(stdio): add some `tty.WriteStream` methods
* chore(builtins): add process builtin gen'd code
* Fix docker install command
* `bun test` on macOS in GitHub Actions (#2322)
* Fixes #2323
* throw invalid parameter errors in `crypto.scryptSync` (#2331)
* throw invalid parameter errors
* remove comptime, add empty buffer function
* remove error_name comptime
* Add reference documentation for bun:test (#2327)
* Reorganize tests (#2332)
* Fix html-rewriter.test.js
* fix the wrong thing being incremented in hmr example (#2334)
* Add more test harness
* Improve Benchmarking page, small fixes (#2339)
* Improve benchmarking page
* WIP
* Add typescript instructions to hot
* Document preload in Plugins. Fix loader in plugin types.
* Fix typo
* Fix links
* run prettier
* Document openInEditor
* improve `Buffer` compatibility with Node.js (#2341)
* improve `Buffer` compatibility with Node.js
* use `memmove()`
allow `encoding` to be `undefined`
* run `bun test` after macOS builds (#2343)
* "binary" is an alias of "latin1"
Fixes https://github.com/oven-sh/bun/issues/2110
* More spec compliant `Blob.prototype.type` (#2340)
* Make `Blob.prototype. type` more spec compliant
* Add a few more checks for isNumber()
* Fix `make headers`
* Safer JSValue.isString()
* More tests for blob.slice
* Make `Blob.prototype.type` more spec compliant
* Add isASCII check
* Fix types
* Fix failing type test
* Update blob.zig
* Update blob.zig
* Fix .eql check on empty values
---------
Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
* Fix bug in test runner
* Support `import()` expressions
* Implement `require()`
* clean up bit_set.zig slightly
* Move some things around
* misc cleanup
* Cleanup some things
* Fix a lot of stuff
* Fix `module.exports.fn = fn;` in ESM entry point
* Fix crash due when printing file
* Fix issue with class names
* Fix issue with `export default identifier`
* Update js_parser.zig
* optimization: inline single-property object acceses and arrays
* Fix undefined memory in renamed symbols list
* Handle call target
* wip
* Inline it
* Fix undefined memory issue when reclaiming blocks in ast
* Halt linking on any parse errors
* alias
* Rename `enable_bundling` to `enable_legacy_bundling`
* Workaround anonymous struct literal zig bug
* Use slower approach (without bitset) because it doesn't break after 8 symbols
* Fix incorrectly-renaming statically defined symbols
* Handle more edgecases in our bit_set fork
* Reduce number of allocations for `define`
* Do not rename unbound symbols
* Clean up dot defines a little more
* Make the generated names prettier
* Workaround runtime symbol missing issue
* Fail the build on errors
* Support export * from
* Support `--outfile`
* partially fix renaming
* fanicer symbol renaming impl
* misc, extremely revertible cleanup
* Fix up some bugs with symbol renaming
* formatting
* Update launch.json
* Parse `__PURE__` comments
* clean up simd code for pure comments
* changes to merge
* workaround runtime issue
* Fix issue with `export * as` not propagating correctly
* Make all top-level declarations `var` when bundling
* Fix missing prefix
* Fix assigning to stack copy
* Fix missing runtime symbol
* Fix bug with namespace exports
* Dramatically reduce allocations
* Update launch.json
* Add missing flags
* Update js_parser.zig
* small cleanup
* Make the export name better
* Fix unnecessary `var foo = foo`
* Implement CommonJS -> ESM conversion
* Implement module redirects
* Port esbuild bundler tests for new bundler (#2380)
* started porting esbuild tests
* clean up test names and api before moving on
* port tests using a program i wrote
* replace todo generated comment
* fix generated tests not including some files
* work on tests
* [github web editor] add define, external, inject, minifySyntax, minifyWhitespace options.
* get most of the todo comments out of the way, but expectBundled does not handle most of the cases
* continue working on esbuild tests
* use test.skip for unsupported tests
* Fixups for test runner
* Hoist imports & exports
* Fix test
* Hoist classes
* bundler test refining, 51/835
* Fix runtime require
* bundler test refining, 81/835
* bundler test refining, 93/835
* Make the test work in any timezone
* feat(expect): update toBeInstanceOf (#2396)
* feat: update instanceof binding
* fix: according to PR comments
* Rename `expectObjectTypeCount` to `expectMaxObjectTypeCount`
* Fix socket tests with connection errors (#2403)
* release pending activity with connection error handler
* unref poll_ref
* remove trailing comma
* Organize Dockerfiles for official status
* Remove test Dockerfile
* Remove old Docker workflow
* Feat(test): add toMatch (#2404)
* Fix various fetch/response/request tests (#2416)
* fix most fetch tests, skip a few
* fastGet, toValueGC, and invalid init
* bigint unreachable, range error, log process as process
* remove extra fetch_headers
* remove js_type parameter, check isObject()
* throw invalid mime type error, use enum literal
* switch back to promise rejection
* RangeError pascal case
* Fix several bugs (#2418)
* utf16 codepoint with replacement character
* Fix test failure with `TextEncoder("ascii')`
* Add missing type
* Fix Response.prototype.bodyUsed and Request.prototype.bodyUsed
* Fix bug with scrypt error not clearing
* Update server.zig
* oopsie
* :nail_care:
* docs: Use correct url in the 'Issues' link in README header (#2420)
* Fix crash when rendering error page and the server or network is slow
* [fetch] Make the default body value `null` when unspecified
This is better aligned with the fetch spec
* Make node-net tests less flaky
* [node:net] Fix issue with `listen` callback firing before it's listening
* Always clear timers in node test harness
* Fix out of bounds access
Repro'd in Buffer tests
* Update UWS
cc @cirospaciari
* Make this test more thorough
* Hanging abort test
* 0 length body is a null stream
* Several bug fixes (#2427)
* Fix test
* Fix segfault when unexpected type is passed in `expect().toThrow`
* Fix issues with request constructor
* Don't bother cloning headers when its empty
* woops
* more tests
* fix incorrect test
* Make the fetch error messages better
* Update response.zig
* Fix test that failed on macOS
* Fix test
* Remove extra hash table lookups
* Support running dummy registry directly
cc @alexlamsl
* Update test
* Update test
* fixup
* Workaround crash in test runner
* Fixup test
* Fixup test
* Update os.test.js
---------
Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
* Remove usages of port numbers in tests
* Set -O2 and -fno-rtti
* Remove -g
* Prevent undefined memory access
* [bun test] Implement `--rerun-each` flag to run each test N times
* Reduce number of module scopes created
* add some extra abort checks into streams (#2430)
* add some checks to avoid UAF
* avoid multiple calls to finalize if endFromJS is called more than once
* fix no-op comment
* mark as requested_end on abort
* remove requested_end from abort
* remove unnecessary check (#2432)
* Fix bug with scoped aliased dependencies in bun install on macOS
* remove `addLog`, remove `--prominent-compile-errors`
* Finish the upgrade
* Optional chaining flag
* Implement same_target_becomes_destructuring optimization
* bundler test refining, 109/835
* Reset bindings
* Support multiple entry points
* Implement `--entry-names` flag
* Use a tempdir with a better name
* prettier
* Log file name
* Update js_parser.zig
* Mark all bun builtins as external
* Make resolve errors actually errors
* Update bundler_default.test.ts
* Fix `await import(foo)`
* WIP react server components
* Do more stuff at runtime
* :scissors:
* Support automatic JSX imports
* Use a module cache for now
* Update tsconfig.base.json
* Fix ThisOutsideFunctionNotRenamed
* woopsie
* moar cpu
* clamp it
* fixup
* Add a bunch of assertions
* Bun uses automatic runtime by default
* Parse Import Attributes
* Add a note about Valgrind
* Update developing.md
* Fix up code splitting for React Server Components
* Implement client component manifest
* Fix crash with --react-server-components and no client components
* Backport https://github.com/ziglang/zig/commit/4d31e3c917a05541394c544708f0047cfb53331a
* Update launch.json
* Fix for latest zig
* Workaround bug with ?[]const string
Occasionally saw alignment errors in this code
Workaround https://github.com/ziglang/zig/issues/15085
related: https://github.com/ziglang/zig/pull/15089
* switch to regular slice
* Avoid initializing named_imports and named_exports as undefined
* Reduce usages of `undefined`
* Add more assertions
* --watch wip
* Update javascript.zig
* Possibly fix the race condition
* Faster `do`
* bump allocator
* Reduce the size of `Symbol` slightly
* Alphabetically sort runtime import symbols, for determinism
* Prepare for code splitting
* handle overlapping stdout
* pure
* clean up some things
* Fix bug with `$$typeof`
* Address CommonJS -> ESM hoisting bug
* Support `"use server"` in manifest
* Implement `"use server"`
* Fix importing bun builtins when bundling
* Make `commonjs_to_esm` a feature flag, fix some splitting bugs
* :scissors:
* fixme remove this
* Fix crash in longestCommonPath
* Chunking! Just need to do import paths now.
* Import paths work...now trying to figure out how to make runtime symbols work
* add workaround
* Replace `bun bun` with `bun build`
* Fix crash with dual package hazard
* Fix many CommonJS <> ESM interop bugs
* Support package.json `"sideEffects"`
also skip loading unnecessary package.json data in `bun run`
* add a not good --watch implementation
* bundler test refining, 140/831
* remove accidentally committed file
* do not return status code 1 on successful bundles
* bundler test refining, 159/830
* pass exit code to exitOrWatch
* clean up help menu
-remove two spaces to line up bun build
-moved all <r> tags to the end of the text they are colorizing
-moved other colors to the start of the text they colorize
-removed unneeded <r> tags, keeping only one at the start of the block
* importstar is fully ported
* wip
* you can run code in this branch now
* Disable this transform
* organize and document bundler tests
* Fix double import
* Fix sloppy mode function declarations
* Disable our CommonJS transform for now
* add `assertNotPresent` to make splitting cases easier
* Bump!
* Update bun.d.ts
* use import.meta.require in runtime code
* Disable this again
* Fix dirname
* Fix ESM -> CJS wrapper
* :nail_care:
---------
Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
Co-authored-by: Alex Lam S.L <alexlamsl@gmail.com>
Co-authored-by: Derrick Farris <mr.dcfarris@gmail.com>
Co-authored-by: Ashcon Partovi <ashcon@partovi.net>
Co-authored-by: Dylan Conway <35280289+dylan-conway@users.noreply.github.com>
Co-authored-by: pfg <pfg@pfg.pw>
Co-authored-by: Colin McDonnell <colinmcd94@gmail.com>
Co-authored-by: dave caruso <me@paperdave.net>
Co-authored-by: zhiyuan <32867472+zhiyuang@users.noreply.github.com>
Co-authored-by: Dylan Conway <dylan.conway567@gmail.com>
Co-authored-by: Kamil Ogórek <kamil.ogorek@gmail.com>
Co-authored-by: Ciro Spaciari <ciro.spaciari@gmail.com>
Diffstat (limited to 'src/string_immutable.zig')
-rw-r--r-- | src/string_immutable.zig | 160 |
1 files changed, 120 insertions, 40 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig index f4c6fae07..889add550 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -20,7 +20,7 @@ pub inline fn containsChar(self: string, char: u8) bool { } pub inline fn contains(self: string, str: string) bool { - return std.mem.indexOf(u8, self, str) != null; + return indexOf(self, str) != null; } pub fn toUTF16Literal(comptime str: []const u8) []const u16 { @@ -41,11 +41,9 @@ pub fn toUTF16Literal(comptime str: []const u8) []const u16 { const OptionalUsize = std.meta.Int(.unsigned, @bitSizeOf(usize) - 1); pub fn indexOfAny(self: string, comptime str: anytype) ?OptionalUsize { - for (self, 0..) |c, i| { - inline for (str) |a| { - if (c == a) { - return @intCast(OptionalUsize, i); - } + inline for (str) |a| { + if (indexOfChar(self, a)) |i| { + return @intCast(OptionalUsize, i); } } @@ -148,6 +146,79 @@ pub fn indexOfCharNeg(self: string, char: u8) i32 { return -1; } +/// Format a string to an ECMAScript identifier. +/// Unlike the string_mutable.zig version, this always allocate/copy +pub fn fmtIdentifier(name: string) FormatValidIdentifier { + return FormatValidIdentifier{ .name = name }; +} + +/// Format a string to an ECMAScript identifier. +/// Different implementation than string_mutable because string_mutable may avoid allocating +/// This will always allocate +pub const FormatValidIdentifier = struct { + name: string, + const js_lexer = @import("./js_lexer.zig"); + pub fn format(self: FormatValidIdentifier, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { + var iterator = strings.CodepointIterator.init(self.name); + var cursor = strings.CodepointIterator.Cursor{}; + + var has_needed_gap = false; + var needs_gap = false; + var start_i: usize = 0; + + if (!iterator.next(&cursor)) { + try writer.writeAll("_"); + return; + } + + // Common case: no gap necessary. No allocation necessary. + needs_gap = !js_lexer.isIdentifierStart(cursor.c); + if (!needs_gap) { + // Are there any non-alphanumeric chars at all? + while (iterator.next(&cursor)) { + if (!js_lexer.isIdentifierContinue(cursor.c) or cursor.width > 1) { + needs_gap = true; + start_i = cursor.i; + break; + } + } + } + + if (needs_gap) { + needs_gap = false; + if (start_i > 0) try writer.writeAll(self.name[0..start_i]); + var slice = self.name[start_i..]; + iterator = strings.CodepointIterator.init(slice); + cursor = strings.CodepointIterator.Cursor{}; + + while (iterator.next(&cursor)) { + if (js_lexer.isIdentifierContinue(cursor.c) and cursor.width == 1) { + if (needs_gap) { + try writer.writeAll("_"); + needs_gap = false; + has_needed_gap = true; + } + try writer.writeAll(slice[cursor.i .. cursor.i + @as(u32, cursor.width)]); + } else if (!needs_gap) { + needs_gap = true; + // skip the code point, replace it with a single _ + } + } + + // If it ends with an emoji + if (needs_gap) { + try writer.writeAll("_"); + needs_gap = false; + has_needed_gap = true; + } + + return; + } + + try writer.writeAll(self.name); + } +}; + pub fn indexOfSigned(self: string, str: string) i32 { const i = std.mem.indexOf(u8, self, str) orelse return -1; return @intCast(i32, i); @@ -177,7 +248,9 @@ pub inline fn indexOf(self: string, str: string) ?usize { const start = bun.C.memmem(self_ptr, self_len, str_ptr, str_len) orelse return null; - return @ptrToInt(start) - @ptrToInt(self_ptr); + const i = @ptrToInt(start) - @ptrToInt(self_ptr); + std.debug.assert(i < self_len); + return @intCast(usize, i); } pub fn split(self: string, delimiter: string) SplitIterator { @@ -2899,12 +2972,12 @@ pub const max_16_ascii = @splat(ascii_vector_size, @as(u8, 127)); pub const min_16_ascii = @splat(ascii_vector_size, @as(u8, 0x20)); pub const max_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 127)); pub const min_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 0x20)); -pub const AsciiVector = std.meta.Vector(ascii_vector_size, u8); -pub const AsciiVectorSmall = std.meta.Vector(8, u8); -pub const AsciiVectorU1 = std.meta.Vector(ascii_vector_size, u1); -pub const AsciiVectorU1Small = std.meta.Vector(8, u1); -pub const AsciiVectorU16U1 = std.meta.Vector(ascii_u16_vector_size, u1); -pub const AsciiU16Vector = std.meta.Vector(ascii_u16_vector_size, u16); +pub const AsciiVector = @Vector(ascii_vector_size, u8); +pub const AsciiVectorSmall = @Vector(8, u8); +pub const AsciiVectorU1 = @Vector(ascii_vector_size, u1); +pub const AsciiVectorU1Small = @Vector(8, u1); +pub const AsciiVectorU16U1 = @Vector(ascii_u16_vector_size, u1); +pub const AsciiU16Vector = @Vector(ascii_u16_vector_size, u16); pub const max_4_ascii = @splat(4, @as(u8, 127)); pub fn isAllASCII(slice: []const u8) bool { if (bun.FeatureFlags.use_simdutf) @@ -3200,34 +3273,15 @@ pub fn indexOfCharZ(sliceZ: [:0]const u8, char: u8) ?u63 { } pub fn indexOfChar(slice: []const u8, char: u8) ?u32 { - var remaining = slice; - if (remaining.len == 0) + if (slice.len == 0) return null; - if (remaining[0] == char) - return 0; - - if (comptime Environment.enableSIMD) { - while (remaining.len >= ascii_vector_size) { - const vec: AsciiVector = remaining[0..ascii_vector_size].*; - const cmp = vec == @splat(ascii_vector_size, char); - - if (@reduce(.Max, @bitCast(AsciiVectorU1, cmp)) > 0) { - const bitmask = @bitCast(AsciiVectorInt, cmp); - const first = @ctz(bitmask); - return @intCast(u32, @as(u32, first) + @intCast(u32, slice.len - remaining.len)); - } - remaining = remaining[ascii_vector_size..]; - } - } - - for (remaining, 0..) |c, i| { - if (c == char) { - return @truncate(u32, i + (slice.len - remaining.len)); - } - } + const ptr = bun.C.memchr(slice.ptr, char, slice.len) orelse return null; + const i = @ptrToInt(ptr) - @ptrToInt(slice.ptr); + std.debug.assert(i < slice.len); + std.debug.assert(slice[i] == char); - return null; + return @truncate(u32, i); } test "indexOfChar" { @@ -3829,25 +3883,51 @@ pub fn join(slices: []const string, delimiter: string, allocator: std.mem.Alloca return try std.mem.join(allocator, delimiter, slices); } +pub fn order(a: []const u8, b: []const u8) std.math.Order { + const len = @min(a.len, b.len); + const cmp = bun.C.memcmp(a.ptr, b.ptr, len); + return switch (std.math.sign(cmp)) { + 0 => std.math.order(a.len, b.len), + 1 => .gt, + -1 => .lt, + else => unreachable, + }; +} + pub fn cmpStringsAsc(_: void, a: string, b: string) bool { - return std.mem.order(u8, a, b) == .lt; + return order(a, b) == .lt; } pub fn cmpStringsDesc(_: void, a: string, b: string) bool { - return std.mem.order(u8, a, b) == .gt; + return order(a, b) == .gt; } const sort_asc = std.sort.asc(u8); const sort_desc = std.sort.desc(u8); pub fn sortAsc(in: []string) void { + // TODO: experiment with simd to see if it's faster std.sort.sort([]const u8, in, {}, cmpStringsAsc); } pub fn sortDesc(in: []string) void { + // TODO: experiment with simd to see if it's faster std.sort.sort([]const u8, in, {}, cmpStringsDesc); } +pub const StringArrayByIndexSorter = struct { + keys: []const []const u8, + pub fn lessThan(sorter: *const @This(), a: usize, b: usize) bool { + return strings.order(sorter.keys[a], sorter.keys[b]) == .lt; + } + + pub fn init(keys: []const []const u8) @This() { + return .{ + .keys = keys, + }; + } +}; + pub fn isASCIIHexDigit(c: u8) bool { return std.ascii.isHex(c); } |