aboutsummaryrefslogtreecommitdiff
path: root/src/string_immutable.zig
diff options
context:
space:
mode:
authorGravatar Jarred Sumner <jarred@jarredsumner.com> 2023-04-07 20:08:01 -0700
committerGravatar GitHub <noreply@github.com> 2023-04-07 20:08:01 -0700
commit6362414d65b69cd01624e84d08eca654fc8cb101 (patch)
tree77bda836b819769002b746da3c4b7bdb90546f41 /src/string_immutable.zig
parentc0c5f07218096a52ed12d86374f785e6dc331fa4 (diff)
downloadbun-6362414d65b69cd01624e84d08eca654fc8cb101.tar.gz
bun-6362414d65b69cd01624e84d08eca654fc8cb101.tar.zst
bun-6362414d65b69cd01624e84d08eca654fc8cb101.zip
Bun gets a new bundler (#2312)
* alright now just gotta try running it * fix a gajillion compiler errors * even more code * okay i fixed more errors * wip * Update launch.json * Update string_builder.zig * `fast_debug_build_mode` makes debug build 2x faster * Update bundle_v2.zig * more code! * It bundles! * Rename `Bun.Transpiler` to `Bun.Bundler` * `import()` expressions almost work * wip attempt to get import() expr to work * Bundle namespace imports * Attempt to fix the issue with import() unsuccessfully * consider current working directory when resolving relative paths (#2313) * consider current working directory when resolving relative paths fixes #2298 * comment test --------- Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> * support `expect().toThrow(/pattern/)` (#2314) - fix time-zone-dependent test failure * fix missing `Blob` error messages on Linux (#2315) * fix & clean up tests (#2318) - skip flaky tests when running as `root` - use `expect().toThrow()` - clean up temporary files after tests * feat(tty): add some `tty.WriteStream` methods to `process.{stdout, stderr}` (#2320) * feat(stdio): add some `tty.WriteStream` methods * chore(builtins): add process builtin gen'd code * Fix docker install command * `bun test` on macOS in GitHub Actions (#2322) * Fixes #2323 * throw invalid parameter errors in `crypto.scryptSync` (#2331) * throw invalid parameter errors * remove comptime, add empty buffer function * remove error_name comptime * Add reference documentation for bun:test (#2327) * Reorganize tests (#2332) * Fix html-rewriter.test.js * fix the wrong thing being incremented in hmr example (#2334) * Add more test harness * Improve Benchmarking page, small fixes (#2339) * Improve benchmarking page * WIP * Add typescript instructions to hot * Document preload in Plugins. Fix loader in plugin types. * Fix typo * Fix links * run prettier * Document openInEditor * improve `Buffer` compatibility with Node.js (#2341) * improve `Buffer` compatibility with Node.js * use `memmove()` allow `encoding` to be `undefined` * run `bun test` after macOS builds (#2343) * "binary" is an alias of "latin1" Fixes https://github.com/oven-sh/bun/issues/2110 * More spec compliant `Blob.prototype.type` (#2340) * Make `Blob.prototype. type` more spec compliant * Add a few more checks for isNumber() * Fix `make headers` * Safer JSValue.isString() * More tests for blob.slice * Make `Blob.prototype.type` more spec compliant * Add isASCII check * Fix types * Fix failing type test * Update blob.zig * Update blob.zig * Fix .eql check on empty values --------- Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> * Fix bug in test runner * Support `import()` expressions * Implement `require()` * clean up bit_set.zig slightly * Move some things around * misc cleanup * Cleanup some things * Fix a lot of stuff * Fix `module.exports.fn = fn;` in ESM entry point * Fix crash due when printing file * Fix issue with class names * Fix issue with `export default identifier` * Update js_parser.zig * optimization: inline single-property object acceses and arrays * Fix undefined memory in renamed symbols list * Handle call target * wip * Inline it * Fix undefined memory issue when reclaiming blocks in ast * Halt linking on any parse errors * alias * Rename `enable_bundling` to `enable_legacy_bundling` * Workaround anonymous struct literal zig bug * Use slower approach (without bitset) because it doesn't break after 8 symbols * Fix incorrectly-renaming statically defined symbols * Handle more edgecases in our bit_set fork * Reduce number of allocations for `define` * Do not rename unbound symbols * Clean up dot defines a little more * Make the generated names prettier * Workaround runtime symbol missing issue * Fail the build on errors * Support export * from * Support `--outfile` * partially fix renaming * fanicer symbol renaming impl * misc, extremely revertible cleanup * Fix up some bugs with symbol renaming * formatting * Update launch.json * Parse `__PURE__` comments * clean up simd code for pure comments * changes to merge * workaround runtime issue * Fix issue with `export * as` not propagating correctly * Make all top-level declarations `var` when bundling * Fix missing prefix * Fix assigning to stack copy * Fix missing runtime symbol * Fix bug with namespace exports * Dramatically reduce allocations * Update launch.json * Add missing flags * Update js_parser.zig * small cleanup * Make the export name better * Fix unnecessary `var foo = foo` * Implement CommonJS -> ESM conversion * Implement module redirects * Port esbuild bundler tests for new bundler (#2380) * started porting esbuild tests * clean up test names and api before moving on * port tests using a program i wrote * replace todo generated comment * fix generated tests not including some files * work on tests * [github web editor] add define, external, inject, minifySyntax, minifyWhitespace options. * get most of the todo comments out of the way, but expectBundled does not handle most of the cases * continue working on esbuild tests * use test.skip for unsupported tests * Fixups for test runner * Hoist imports & exports * Fix test * Hoist classes * bundler test refining, 51/835 * Fix runtime require * bundler test refining, 81/835 * bundler test refining, 93/835 * Make the test work in any timezone * feat(expect): update toBeInstanceOf (#2396) * feat: update instanceof binding * fix: according to PR comments * Rename `expectObjectTypeCount` to `expectMaxObjectTypeCount` * Fix socket tests with connection errors (#2403) * release pending activity with connection error handler * unref poll_ref * remove trailing comma * Organize Dockerfiles for official status * Remove test Dockerfile * Remove old Docker workflow * Feat(test): add toMatch (#2404) * Fix various fetch/response/request tests (#2416) * fix most fetch tests, skip a few * fastGet, toValueGC, and invalid init * bigint unreachable, range error, log process as process * remove extra fetch_headers * remove js_type parameter, check isObject() * throw invalid mime type error, use enum literal * switch back to promise rejection * RangeError pascal case * Fix several bugs (#2418) * utf16 codepoint with replacement character * Fix test failure with `TextEncoder("ascii')` * Add missing type * Fix Response.prototype.bodyUsed and Request.prototype.bodyUsed * Fix bug with scrypt error not clearing * Update server.zig * oopsie * :nail_care: * docs: Use correct url in the 'Issues' link in README header (#2420) * Fix crash when rendering error page and the server or network is slow * [fetch] Make the default body value `null` when unspecified This is better aligned with the fetch spec * Make node-net tests less flaky * [node:net] Fix issue with `listen` callback firing before it's listening * Always clear timers in node test harness * Fix out of bounds access Repro'd in Buffer tests * Update UWS cc @cirospaciari * Make this test more thorough * Hanging abort test * 0 length body is a null stream * Several bug fixes (#2427) * Fix test * Fix segfault when unexpected type is passed in `expect().toThrow` * Fix issues with request constructor * Don't bother cloning headers when its empty * woops * more tests * fix incorrect test * Make the fetch error messages better * Update response.zig * Fix test that failed on macOS * Fix test * Remove extra hash table lookups * Support running dummy registry directly cc @alexlamsl * Update test * Update test * fixup * Workaround crash in test runner * Fixup test * Fixup test * Update os.test.js --------- Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> * Remove usages of port numbers in tests * Set -O2 and -fno-rtti * Remove -g * Prevent undefined memory access * [bun test] Implement `--rerun-each` flag to run each test N times * Reduce number of module scopes created * add some extra abort checks into streams (#2430) * add some checks to avoid UAF * avoid multiple calls to finalize if endFromJS is called more than once * fix no-op comment * mark as requested_end on abort * remove requested_end from abort * remove unnecessary check (#2432) * Fix bug with scoped aliased dependencies in bun install on macOS * remove `addLog`, remove `--prominent-compile-errors` * Finish the upgrade * Optional chaining flag * Implement same_target_becomes_destructuring optimization * bundler test refining, 109/835 * Reset bindings * Support multiple entry points * Implement `--entry-names` flag * Use a tempdir with a better name * prettier * Log file name * Update js_parser.zig * Mark all bun builtins as external * Make resolve errors actually errors * Update bundler_default.test.ts * Fix `await import(foo)` * WIP react server components * Do more stuff at runtime * :scissors: * Support automatic JSX imports * Use a module cache for now * Update tsconfig.base.json * Fix ThisOutsideFunctionNotRenamed * woopsie * moar cpu * clamp it * fixup * Add a bunch of assertions * Bun uses automatic runtime by default * Parse Import Attributes * Add a note about Valgrind * Update developing.md * Fix up code splitting for React Server Components * Implement client component manifest * Fix crash with --react-server-components and no client components * Backport https://github.com/ziglang/zig/commit/4d31e3c917a05541394c544708f0047cfb53331a * Update launch.json * Fix for latest zig * Workaround bug with ?[]const string Occasionally saw alignment errors in this code Workaround https://github.com/ziglang/zig/issues/15085 related: https://github.com/ziglang/zig/pull/15089 * switch to regular slice * Avoid initializing named_imports and named_exports as undefined * Reduce usages of `undefined` * Add more assertions * --watch wip * Update javascript.zig * Possibly fix the race condition * Faster `do` * bump allocator * Reduce the size of `Symbol` slightly * Alphabetically sort runtime import symbols, for determinism * Prepare for code splitting * handle overlapping stdout * pure * clean up some things * Fix bug with `$$typeof` * Address CommonJS -> ESM hoisting bug * Support `"use server"` in manifest * Implement `"use server"` * Fix importing bun builtins when bundling * Make `commonjs_to_esm` a feature flag, fix some splitting bugs * :scissors: * fixme remove this * Fix crash in longestCommonPath * Chunking! Just need to do import paths now. * Import paths work...now trying to figure out how to make runtime symbols work * add workaround * Replace `bun bun` with `bun build` * Fix crash with dual package hazard * Fix many CommonJS <> ESM interop bugs * Support package.json `"sideEffects"` also skip loading unnecessary package.json data in `bun run` * add a not good --watch implementation * bundler test refining, 140/831 * remove accidentally committed file * do not return status code 1 on successful bundles * bundler test refining, 159/830 * pass exit code to exitOrWatch * clean up help menu -remove two spaces to line up bun build -moved all <r> tags to the end of the text they are colorizing -moved other colors to the start of the text they colorize -removed unneeded <r> tags, keeping only one at the start of the block * importstar is fully ported * wip * you can run code in this branch now * Disable this transform * organize and document bundler tests * Fix double import * Fix sloppy mode function declarations * Disable our CommonJS transform for now * add `assertNotPresent` to make splitting cases easier * Bump! * Update bun.d.ts * use import.meta.require in runtime code * Disable this again * Fix dirname * Fix ESM -> CJS wrapper * :nail_care: --------- Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com> Co-authored-by: Alex Lam S.L <alexlamsl@gmail.com> Co-authored-by: Derrick Farris <mr.dcfarris@gmail.com> Co-authored-by: Ashcon Partovi <ashcon@partovi.net> Co-authored-by: Dylan Conway <35280289+dylan-conway@users.noreply.github.com> Co-authored-by: pfg <pfg@pfg.pw> Co-authored-by: Colin McDonnell <colinmcd94@gmail.com> Co-authored-by: dave caruso <me@paperdave.net> Co-authored-by: zhiyuan <32867472+zhiyuang@users.noreply.github.com> Co-authored-by: Dylan Conway <dylan.conway567@gmail.com> Co-authored-by: Kamil Ogórek <kamil.ogorek@gmail.com> Co-authored-by: Ciro Spaciari <ciro.spaciari@gmail.com>
Diffstat (limited to 'src/string_immutable.zig')
-rw-r--r--src/string_immutable.zig160
1 files changed, 120 insertions, 40 deletions
diff --git a/src/string_immutable.zig b/src/string_immutable.zig
index f4c6fae07..889add550 100644
--- a/src/string_immutable.zig
+++ b/src/string_immutable.zig
@@ -20,7 +20,7 @@ pub inline fn containsChar(self: string, char: u8) bool {
}
pub inline fn contains(self: string, str: string) bool {
- return std.mem.indexOf(u8, self, str) != null;
+ return indexOf(self, str) != null;
}
pub fn toUTF16Literal(comptime str: []const u8) []const u16 {
@@ -41,11 +41,9 @@ pub fn toUTF16Literal(comptime str: []const u8) []const u16 {
const OptionalUsize = std.meta.Int(.unsigned, @bitSizeOf(usize) - 1);
pub fn indexOfAny(self: string, comptime str: anytype) ?OptionalUsize {
- for (self, 0..) |c, i| {
- inline for (str) |a| {
- if (c == a) {
- return @intCast(OptionalUsize, i);
- }
+ inline for (str) |a| {
+ if (indexOfChar(self, a)) |i| {
+ return @intCast(OptionalUsize, i);
}
}
@@ -148,6 +146,79 @@ pub fn indexOfCharNeg(self: string, char: u8) i32 {
return -1;
}
+/// Format a string to an ECMAScript identifier.
+/// Unlike the string_mutable.zig version, this always allocate/copy
+pub fn fmtIdentifier(name: string) FormatValidIdentifier {
+ return FormatValidIdentifier{ .name = name };
+}
+
+/// Format a string to an ECMAScript identifier.
+/// Different implementation than string_mutable because string_mutable may avoid allocating
+/// This will always allocate
+pub const FormatValidIdentifier = struct {
+ name: string,
+ const js_lexer = @import("./js_lexer.zig");
+ pub fn format(self: FormatValidIdentifier, comptime _: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void {
+ var iterator = strings.CodepointIterator.init(self.name);
+ var cursor = strings.CodepointIterator.Cursor{};
+
+ var has_needed_gap = false;
+ var needs_gap = false;
+ var start_i: usize = 0;
+
+ if (!iterator.next(&cursor)) {
+ try writer.writeAll("_");
+ return;
+ }
+
+ // Common case: no gap necessary. No allocation necessary.
+ needs_gap = !js_lexer.isIdentifierStart(cursor.c);
+ if (!needs_gap) {
+ // Are there any non-alphanumeric chars at all?
+ while (iterator.next(&cursor)) {
+ if (!js_lexer.isIdentifierContinue(cursor.c) or cursor.width > 1) {
+ needs_gap = true;
+ start_i = cursor.i;
+ break;
+ }
+ }
+ }
+
+ if (needs_gap) {
+ needs_gap = false;
+ if (start_i > 0) try writer.writeAll(self.name[0..start_i]);
+ var slice = self.name[start_i..];
+ iterator = strings.CodepointIterator.init(slice);
+ cursor = strings.CodepointIterator.Cursor{};
+
+ while (iterator.next(&cursor)) {
+ if (js_lexer.isIdentifierContinue(cursor.c) and cursor.width == 1) {
+ if (needs_gap) {
+ try writer.writeAll("_");
+ needs_gap = false;
+ has_needed_gap = true;
+ }
+ try writer.writeAll(slice[cursor.i .. cursor.i + @as(u32, cursor.width)]);
+ } else if (!needs_gap) {
+ needs_gap = true;
+ // skip the code point, replace it with a single _
+ }
+ }
+
+ // If it ends with an emoji
+ if (needs_gap) {
+ try writer.writeAll("_");
+ needs_gap = false;
+ has_needed_gap = true;
+ }
+
+ return;
+ }
+
+ try writer.writeAll(self.name);
+ }
+};
+
pub fn indexOfSigned(self: string, str: string) i32 {
const i = std.mem.indexOf(u8, self, str) orelse return -1;
return @intCast(i32, i);
@@ -177,7 +248,9 @@ pub inline fn indexOf(self: string, str: string) ?usize {
const start = bun.C.memmem(self_ptr, self_len, str_ptr, str_len) orelse return null;
- return @ptrToInt(start) - @ptrToInt(self_ptr);
+ const i = @ptrToInt(start) - @ptrToInt(self_ptr);
+ std.debug.assert(i < self_len);
+ return @intCast(usize, i);
}
pub fn split(self: string, delimiter: string) SplitIterator {
@@ -2899,12 +2972,12 @@ pub const max_16_ascii = @splat(ascii_vector_size, @as(u8, 127));
pub const min_16_ascii = @splat(ascii_vector_size, @as(u8, 0x20));
pub const max_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 127));
pub const min_u16_ascii = @splat(ascii_u16_vector_size, @as(u16, 0x20));
-pub const AsciiVector = std.meta.Vector(ascii_vector_size, u8);
-pub const AsciiVectorSmall = std.meta.Vector(8, u8);
-pub const AsciiVectorU1 = std.meta.Vector(ascii_vector_size, u1);
-pub const AsciiVectorU1Small = std.meta.Vector(8, u1);
-pub const AsciiVectorU16U1 = std.meta.Vector(ascii_u16_vector_size, u1);
-pub const AsciiU16Vector = std.meta.Vector(ascii_u16_vector_size, u16);
+pub const AsciiVector = @Vector(ascii_vector_size, u8);
+pub const AsciiVectorSmall = @Vector(8, u8);
+pub const AsciiVectorU1 = @Vector(ascii_vector_size, u1);
+pub const AsciiVectorU1Small = @Vector(8, u1);
+pub const AsciiVectorU16U1 = @Vector(ascii_u16_vector_size, u1);
+pub const AsciiU16Vector = @Vector(ascii_u16_vector_size, u16);
pub const max_4_ascii = @splat(4, @as(u8, 127));
pub fn isAllASCII(slice: []const u8) bool {
if (bun.FeatureFlags.use_simdutf)
@@ -3200,34 +3273,15 @@ pub fn indexOfCharZ(sliceZ: [:0]const u8, char: u8) ?u63 {
}
pub fn indexOfChar(slice: []const u8, char: u8) ?u32 {
- var remaining = slice;
- if (remaining.len == 0)
+ if (slice.len == 0)
return null;
- if (remaining[0] == char)
- return 0;
-
- if (comptime Environment.enableSIMD) {
- while (remaining.len >= ascii_vector_size) {
- const vec: AsciiVector = remaining[0..ascii_vector_size].*;
- const cmp = vec == @splat(ascii_vector_size, char);
-
- if (@reduce(.Max, @bitCast(AsciiVectorU1, cmp)) > 0) {
- const bitmask = @bitCast(AsciiVectorInt, cmp);
- const first = @ctz(bitmask);
- return @intCast(u32, @as(u32, first) + @intCast(u32, slice.len - remaining.len));
- }
- remaining = remaining[ascii_vector_size..];
- }
- }
-
- for (remaining, 0..) |c, i| {
- if (c == char) {
- return @truncate(u32, i + (slice.len - remaining.len));
- }
- }
+ const ptr = bun.C.memchr(slice.ptr, char, slice.len) orelse return null;
+ const i = @ptrToInt(ptr) - @ptrToInt(slice.ptr);
+ std.debug.assert(i < slice.len);
+ std.debug.assert(slice[i] == char);
- return null;
+ return @truncate(u32, i);
}
test "indexOfChar" {
@@ -3829,25 +3883,51 @@ pub fn join(slices: []const string, delimiter: string, allocator: std.mem.Alloca
return try std.mem.join(allocator, delimiter, slices);
}
+pub fn order(a: []const u8, b: []const u8) std.math.Order {
+ const len = @min(a.len, b.len);
+ const cmp = bun.C.memcmp(a.ptr, b.ptr, len);
+ return switch (std.math.sign(cmp)) {
+ 0 => std.math.order(a.len, b.len),
+ 1 => .gt,
+ -1 => .lt,
+ else => unreachable,
+ };
+}
+
pub fn cmpStringsAsc(_: void, a: string, b: string) bool {
- return std.mem.order(u8, a, b) == .lt;
+ return order(a, b) == .lt;
}
pub fn cmpStringsDesc(_: void, a: string, b: string) bool {
- return std.mem.order(u8, a, b) == .gt;
+ return order(a, b) == .gt;
}
const sort_asc = std.sort.asc(u8);
const sort_desc = std.sort.desc(u8);
pub fn sortAsc(in: []string) void {
+ // TODO: experiment with simd to see if it's faster
std.sort.sort([]const u8, in, {}, cmpStringsAsc);
}
pub fn sortDesc(in: []string) void {
+ // TODO: experiment with simd to see if it's faster
std.sort.sort([]const u8, in, {}, cmpStringsDesc);
}
+pub const StringArrayByIndexSorter = struct {
+ keys: []const []const u8,
+ pub fn lessThan(sorter: *const @This(), a: usize, b: usize) bool {
+ return strings.order(sorter.keys[a], sorter.keys[b]) == .lt;
+ }
+
+ pub fn init(keys: []const []const u8) @This() {
+ return .{
+ .keys = keys,
+ };
+ }
+};
+
pub fn isASCIIHexDigit(c: u8) bool {
return std.ascii.isHex(c);
}