diff options
author | 2022-11-30 05:49:01 -0800 | |
---|---|---|
committer | 2022-11-30 05:49:01 -0800 | |
commit | 6213a91f058cc461fb916d479f03f602c552c377 (patch) | |
tree | 5442b8e30a1eebfda378586913dd56a27bd3ba88 | |
parent | f999bdca26256b7ad42d93268c34362b251330c2 (diff) | |
download | bun-jarred/make-strings-better.tar.gz bun-jarred/make-strings-better.tar.zst bun-jarred/make-strings-better.zip |
[wip internal] Introduce `bun.String` which wraps `WTF::String` and allows us to reuse themjarred/make-strings-better
-rw-r--r-- | src/bun.js/api/bun.zig | 2 | ||||
-rw-r--r-- | src/bun.js/bindings/ModuleLoader.cpp | 38 | ||||
-rw-r--r-- | src/bun.js/bindings/ModuleLoader.h | 8 | ||||
-rw-r--r-- | src/bun.js/bindings/ZigGlobalObject.cpp | 27 | ||||
-rw-r--r-- | src/bun.js/bindings/bindings.cpp | 120 | ||||
-rw-r--r-- | src/bun.js/bindings/bindings.zig | 66 | ||||
-rw-r--r-- | src/bun.js/bindings/exports.zig | 7 | ||||
-rw-r--r-- | src/bun.js/bindings/headers-cpp.h | 2 | ||||
-rw-r--r-- | src/bun.js/bindings/headers-handwritten.h | 48 | ||||
-rw-r--r-- | src/bun.js/bindings/headers-replacements.zig | 1 | ||||
-rw-r--r-- | src/bun.js/bindings/headers.h | 8 | ||||
-rw-r--r-- | src/bun.js/bindings/headers.zig | 1 | ||||
-rw-r--r-- | src/bun.js/bindings/helpers.h | 22 | ||||
-rw-r--r-- | src/bun.js/javascript.zig | 81 | ||||
-rw-r--r-- | src/bun.js/module_loader.zig | 48 | ||||
-rw-r--r-- | src/bun.zig | 2 | ||||
-rw-r--r-- | src/comptime_string_map.zig | 5 | ||||
-rw-r--r-- | src/string.zig | 375 | ||||
-rw-r--r-- | src/string_immutable.zig | 29 | ||||
-rw-r--r-- | test/bun.js/non-english-import.test.ts | 26 | ||||
-rw-r--r-- | test/bun.js/not-english-àⒸ.js | 1 | ||||
-rw-r--r-- | test/bun.js/not-english-食物.ts | 1 |
22 files changed, 800 insertions, 118 deletions
diff --git a/src/bun.js/api/bun.zig b/src/bun.js/api/bun.zig index 087d35250..e1c16593d 100644 --- a/src/bun.js/api/bun.zig +++ b/src/bun.js/api/bun.zig @@ -976,7 +976,7 @@ fn doResolveWithArgs( return null; } - return errorable.result.value.toValue(ctx.ptr()); + return errorable.result.value.toJS(ctx); } pub fn resolveSync( diff --git a/src/bun.js/bindings/ModuleLoader.cpp b/src/bun.js/bindings/ModuleLoader.cpp index 45f3cc46b..763b18c65 100644 --- a/src/bun.js/bindings/ModuleLoader.cpp +++ b/src/bun.js/bindings/ModuleLoader.cpp @@ -39,7 +39,7 @@ namespace Bun { using namespace Zig; using namespace WebCore; -extern "C" BunLoaderType Bun__getDefaultLoader(JSC::JSGlobalObject*, ZigString* specifier); +extern "C" BunLoaderType Bun__getDefaultLoader(JSC::JSGlobalObject*, BunString* specifier); static JSC::JSInternalPromise* rejectedInternalPromise(JSC::JSGlobalObject* globalObject, JSC::JSValue value) { @@ -129,7 +129,7 @@ PendingVirtualModuleResult* PendingVirtualModuleResult::create(JSC::JSGlobalObje return virtualModule; } -OnLoadResult handleOnLoadResultNotPromise(Zig::GlobalObject* globalObject, JSC::JSValue objectValue, ZigString* specifier) +OnLoadResult handleOnLoadResultNotPromise(Zig::GlobalObject* globalObject, JSC::JSValue objectValue, BunString* specifier) { OnLoadResult result = {}; result.type = OnLoadResultTypeError; @@ -210,7 +210,7 @@ OnLoadResult handleOnLoadResultNotPromise(Zig::GlobalObject* globalObject, JSC:: return result; } -static OnLoadResult handleOnLoadResult(Zig::GlobalObject* globalObject, JSC::JSValue objectValue, ZigString* specifier) +static OnLoadResult handleOnLoadResult(Zig::GlobalObject* globalObject, JSC::JSValue objectValue, BunString* specifier) { if (JSC::JSPromise* promise = JSC::jsDynamicCast<JSC::JSPromise*>(objectValue)) { OnLoadResult result = {}; @@ -227,8 +227,8 @@ static JSValue handleVirtualModuleResult( Zig::GlobalObject* globalObject, JSValue virtualModuleResult, ErrorableResolvedSource* res, - ZigString* specifier, - ZigString* referrer) + BunString* specifier, + BunString* referrer) { auto onLoadResult = handleOnLoadResult(globalObject, virtualModuleResult, specifier); JSC::VM& vm = globalObject->vm(); @@ -295,7 +295,7 @@ static JSValue handleVirtualModuleResult( object); auto source = JSC::SourceCode( JSC::SyntheticSourceProvider::create(WTFMove(function), - JSC::SourceOrigin(), Zig::toString(*specifier))); + JSC::SourceOrigin(), Bun::toWTFString(*specifier))); JSC::ensureStillAliveHere(object); return rejectOrResolve(JSSourceCode::create(globalObject->vm(), WTFMove(source))); } @@ -305,8 +305,8 @@ static JSValue handleVirtualModuleResult( JSFunction* performPromiseThenFunction = globalObject->performPromiseThenFunction(); auto callData = JSC::getCallData(performPromiseThenFunction); ASSERT(callData.type != CallData::Type::None); - auto specifierString = Zig::toString(*specifier); - auto referrerString = Zig::toString(*referrer); + auto specifierString = Bun::toWTFString(*specifier); + auto referrerString = Bun::toWTFString(*referrer); PendingVirtualModuleResult* pendingModule = PendingVirtualModuleResult::create(globalObject, specifierString, referrerString); JSC::JSInternalPromise* internalPromise = pendingModule->internalPromise(); MarkedArgumentBuffer arguments; @@ -328,8 +328,8 @@ static JSValue handleVirtualModuleResult( extern "C" void Bun__onFulfillAsyncModule( EncodedJSValue promiseValue, ErrorableResolvedSource* res, - ZigString* specifier, - ZigString* referrer) + BunString* specifier, + BunString* referrer) { JSC::JSValue value = JSValue::decode(promiseValue); JSC::JSInternalPromise* promise = jsCast<JSC::JSInternalPromise*>(value); @@ -352,8 +352,8 @@ template<bool allowPromise> static JSValue fetchSourceCode( Zig::GlobalObject* globalObject, ErrorableResolvedSource* res, - ZigString* specifier, - ZigString* referrer) + BunString* specifier, + BunString* referrer) { void* bunVM = globalObject->bunVM(); auto& vm = globalObject->vm(); @@ -401,7 +401,7 @@ static JSValue fetchSourceCode( return reject(exception); } - auto moduleKey = Zig::toString(*specifier); + auto moduleKey = Bun::toWTFString(*specifier); switch (res->result.value.tag) { case SyntheticModuleType::Module: { @@ -491,8 +491,8 @@ extern "C" JSC::EncodedJSValue jsFunctionOnLoadObjectResultResolve(JSC::JSGlobal pendingModule->internalField(1).set(vm, pendingModule, JSC::jsUndefined()); JSC::JSInternalPromise* promise = pendingModule->internalPromise(); - ZigString specifier = Zig::toZigString(specifierString, globalObject); - ZigString referrer = Zig::toZigString(referrerString, globalObject); + BunString specifier = Bun::fromJS(globalObject, specifierString); + BunString referrer = Bun::fromJS(globalObject, referrerString); auto scope = DECLARE_THROW_SCOPE(vm); JSC::JSValue result = handleVirtualModuleResult<false>(reinterpret_cast<Zig::GlobalObject*>(globalObject), objectResult, &res, &specifier, &referrer); if (res.success) { @@ -536,8 +536,8 @@ extern "C" JSC::EncodedJSValue jsFunctionOnLoadObjectResultReject(JSC::JSGlobalO JSValue fetchSourceCodeSync( Zig::GlobalObject* globalObject, ErrorableResolvedSource* res, - ZigString* specifier, - ZigString* referrer) + BunString* specifier, + BunString* referrer) { return fetchSourceCode<false>(globalObject, res, specifier, referrer); } @@ -545,8 +545,8 @@ JSValue fetchSourceCodeSync( JSValue fetchSourceCodeAsync( Zig::GlobalObject* globalObject, ErrorableResolvedSource* res, - ZigString* specifier, - ZigString* referrer) + BunString* specifier, + BunString* referrer) { return fetchSourceCode<true>(globalObject, res, specifier, referrer); } diff --git a/src/bun.js/bindings/ModuleLoader.h b/src/bun.js/bindings/ModuleLoader.h index 98f8b7dbb..31af41e6e 100644 --- a/src/bun.js/bindings/ModuleLoader.h +++ b/src/bun.js/bindings/ModuleLoader.h @@ -82,13 +82,13 @@ OnLoadResult handleOnLoadResultNotPromise(Zig::GlobalObject* globalObject, JSC:: JSValue fetchSourceCodeSync( Zig::GlobalObject* globalObject, ErrorableResolvedSource* res, - ZigString* specifier, - ZigString* referrer); + BunString* specifier, + BunString* referrer); JSValue fetchSourceCodeAsync( Zig::GlobalObject* globalObject, ErrorableResolvedSource* res, - ZigString* specifier, - ZigString* referrer); + BunString* specifier, + BunString* referrer); } // namespace Bun
\ No newline at end of file diff --git a/src/bun.js/bindings/ZigGlobalObject.cpp b/src/bun.js/bindings/ZigGlobalObject.cpp index be88430bd..04e79d303 100644 --- a/src/bun.js/bindings/ZigGlobalObject.cpp +++ b/src/bun.js/bindings/ZigGlobalObject.cpp @@ -3455,8 +3455,15 @@ JSC::Identifier GlobalObject::moduleLoaderResolve(JSGlobalObject* globalObject, { ErrorableZigString res; res.success = false; - ZigString keyZ = toZigString(key, globalObject); - ZigString referrerZ = referrer && !referrer.isUndefinedOrNull() && referrer.isString() ? toZigString(referrer, globalObject) : ZigStringEmpty; + BunString keyZ = Bun::fromJS(globalObject, key); + BunString referrerZ; + + if (referrer && !referrer.isUndefinedOrNull() && referrer.isString()) { + referrerZ = Bun::fromJS(globalObject, referrer); + } else { + referrerZ = BunString { BunStringTag::Empty }; + } + Zig__GlobalObject__resolve(&res, globalObject, &keyZ, &referrerZ); if (res.success) { @@ -3482,8 +3489,12 @@ JSC::JSInternalPromise* GlobalObject::moduleLoaderImportModule(JSGlobalObject* g auto sourceURL = sourceOrigin.url(); ErrorableZigString resolved; - auto moduleNameZ = toZigString(moduleNameValue, globalObject); - auto sourceOriginZ = sourceURL.isEmpty() ? ZigStringCwd : toZigString(sourceURL.fileSystemPath()); + auto moduleNameZ = Bun::fromJS(globalObject, moduleNameValue); + WTF::String fsPath; + if (!sourceURL.isEmpty()) { + fsPath = sourceURL.fileSystemPath(); + } + auto sourceOriginZ = sourceURL.isEmpty() ? BunString { BunStringTag::StaticZigString, { .zig = ZigStringCwd } } : Bun::fromString(fsPath); resolved.success = false; Zig__GlobalObject__resolve(&resolved, globalObject, &moduleNameZ, &sourceOriginZ); if (!resolved.success) { @@ -3514,7 +3525,7 @@ static JSC_DEFINE_HOST_FUNCTION(functionFulfillModuleSync, return JSValue::encode(JSC::jsUndefined()); } - auto specifier = Zig::toZigString(moduleKey); + auto specifier = Bun::fromString(moduleKey); ErrorableResolvedSource res; res.success = false; res.result.err.code = 0; @@ -3559,8 +3570,8 @@ JSC::JSInternalPromise* GlobalObject::moduleLoaderFetch(JSGlobalObject* globalOb return rejectedInternalPromise(globalObject, createTypeError(globalObject, "To load Node-API modules, use require() or process.dlopen instead of import."_s)); } - auto moduleKeyZig = toZigString(moduleKey); - auto source = Zig::toZigString(value1, globalObject); + auto moduleKeyZig = Bun::fromString(moduleKey); + auto source = Bun::fromJS(globalObject, value1); ErrorableResolvedSource res; res.success = false; res.result.err.code = 0; @@ -3612,8 +3623,6 @@ JSC::JSValue GlobalObject::moduleLoaderEvaluate(JSGlobalObject* globalObject, return result; } - - #include "ZigGeneratedClasses+lazyStructureImpl.h" } // namespace Zig diff --git a/src/bun.js/bindings/bindings.cpp b/src/bun.js/bindings/bindings.cpp index 955971ec0..dd26a88ba 100644 --- a/src/bun.js/bindings/bindings.cpp +++ b/src/bun.js/bindings/bindings.cpp @@ -1382,7 +1382,6 @@ void JSC__JSString__toZigString(JSC__JSString* arg0, JSC__JSGlobalObject* arg1, *arg2 = Zig::toZigString(arg0->value(arg1)); } - bool JSC__JSString__eql(const JSC__JSString* arg0, JSC__JSGlobalObject* obj, JSC__JSString* arg2) { return arg0->equal(obj, arg2); @@ -1394,8 +1393,6 @@ JSC__JSObject* JSC__JSString__toObject(JSC__JSString* arg0, JSC__JSGlobalObject* return arg0->toObject(arg1); } - - #pragma mark - JSC::JSModuleLoader // JSC__JSValue @@ -1414,7 +1411,6 @@ void Microtask__run_default(void* microtask, void* global) reinterpret_cast<Zig::JSMicrotaskCallbackDefaultGlobal*>(microtask)->call(reinterpret_cast<Zig::GlobalObject*>(global)); } - JSC__JSValue JSC__JSModuleLoader__evaluate(JSC__JSGlobalObject* globalObject, const unsigned char* arg1, size_t arg2, const unsigned char* originUrlPtr, size_t originURLLen, const unsigned char* referrerUrlPtr, size_t referrerUrlLen, JSC__JSValue JSValue5, JSC__JSValue* arg6) @@ -2037,8 +2033,6 @@ bool JSC__JSInternalPromise__isHandled(const JSC__JSInternalPromise* arg0, JSC__ return arg0->isHandled(reinterpret_cast<JSC::VM&>(arg1)); } - - #pragma mark - JSC::JSGlobalObject JSC__JSValue JSC__JSGlobalObject__generateHeapSnapshot(JSC__JSGlobalObject* globalObject) @@ -2550,7 +2544,6 @@ JSC__JSObject* JSC__JSValue__toObject(JSC__JSValue JSValue0, JSC__JSGlobalObject return value.toObject(arg1); } - JSC__JSString* JSC__JSValue__toString(JSC__JSValue JSValue0, JSC__JSGlobalObject* arg1) { JSC::JSValue value = JSC::JSValue::decode(JSValue0); @@ -2562,7 +2555,6 @@ JSC__JSString* JSC__JSValue__toStringOrNull(JSC__JSValue JSValue0, JSC__JSGlobal return value.toStringOrNull(arg1); } - static void populateStackFrameMetadata(JSC::VM& vm, const JSC::StackFrame* stackFrame, ZigStackFrame* frame) { frame->source_url = Zig::toZigString(stackFrame->sourceURL(vm)); @@ -3058,7 +3050,6 @@ void JSC__Exception__getStackTrace(JSC__Exception* arg0, ZigStackTrace* trace) populateStackTrace(arg0->vm(), arg0->stack(), trace); } - #pragma mark - JSC::VM JSC__JSValue JSC__VM__runGC(JSC__VM* vm, bool sync) @@ -3193,9 +3184,6 @@ bJSC__CatchScope JSC__CatchScope__declare(JSC__VM* arg0, unsigned char* arg1, un } JSC__Exception* JSC__CatchScope__exception(JSC__CatchScope* arg0) { return arg0->exception(); } - - - JSC__JSValue JSC__JSPromise__rejectedPromiseValue(JSC__JSGlobalObject* arg0, JSC__JSValue JSValue1) { @@ -3445,3 +3433,111 @@ restart: return; } } + +extern "C" void Bun__WTFStringImpl__deref(WTF::StringImpl* impl) +{ + impl->deref(); +} +extern "C" void Bun__WTFStringImpl__ref(WTF::StringImpl* impl) +{ + impl->ref(); +} + +extern "C" bool BunString__fromJS(JSC::JSGlobalObject* globalObject, JSC::EncodedJSValue encodedValue, BunString* bunString) +{ + JSC::JSValue value = JSC::JSValue::decode(encodedValue); + *bunString = Bun::fromJS(globalObject, value); + return bunString->tag != BunStringTag::Dead; +} + +namespace Bun { +JSC::JSValue toJS(JSC::JSGlobalObject* globalObject, BunString bunString) +{ + if (bunString.tag == BunStringTag::Empty || bunString.tag == BunStringTag::Dead) { + return JSValue(JSC::jsEmptyString(globalObject->vm())); + } + if (bunString.tag == BunStringTag::WTFStringImpl) { + return JSValue(jsString(globalObject->vm(), String(bunString.impl.wtf))); + } + + if (bunString.tag == BunStringTag::StaticZigString) { + return JSValue(jsOwnedString(globalObject->vm(), Zig::toStringStatic(bunString.impl.zig))); + } + + return JSValue(Zig::toJSStringGC(bunString.impl.zig, globalObject)); +} + +WTF::String toWTFString(BunString& bunString) +{ + if (bunString.tag == BunStringTag::ZigString) { + if (Zig::isTaggedUTF8Ptr(bunString.impl.zig.ptr)) { + return Zig::toStringCopy(bunString.impl.zig); + } else { + return Zig::toString(bunString.impl.zig); + } + + } else if (bunString.tag == BunStringTag::StaticZigString) { + return Zig::toStringStatic(bunString.impl.zig); + } + + if (bunString.tag == BunStringTag::WTFStringImpl) { + return WTF::String(bunString.impl.wtf); + } + + return WTF::String(); +} + +BunString fromJS(JSC::JSGlobalObject* globalObject, JSValue value) +{ + JSC::JSString* str = value.toStringOrNull(globalObject); + if (UNLIKELY(!str)) { + return { BunStringTag::Dead }; + } + + if (str->length() == 0) { + return { BunStringTag::Empty }; + } + + auto wtfString = str->value(globalObject); + + return { BunStringTag::WTFStringImpl, { .wtf = wtfString.impl() } }; +} + +BunString fromString(WTF::String& wtfString) +{ + if (wtfString.length() == 0) + return { BunStringTag::Empty }; + + return { BunStringTag::WTFStringImpl, { .wtf = wtfString.impl() } }; +} + +BunString fromString(WTF::StringImpl* wtfString) +{ + if (wtfString->length() == 0) + return { BunStringTag::Empty }; + + return { BunStringTag::WTFStringImpl, { .wtf = wtfString } }; +} + +} + +extern "C" JSC::EncodedJSValue BunString__toJS(JSC::JSGlobalObject* globalObject, BunString* bunString) +{ + return JSValue::encode(Bun::toJS(globalObject, *bunString)); +} + +extern "C" void BunString__toWTFString(BunString* bunString) +{ + if (bunString->tag == BunStringTag::ZigString) { + if (Zig::isTaggedUTF8Ptr(bunString->impl.zig.ptr)) { + bunString->impl.wtf = Zig::toStringCopy(bunString->impl.zig).impl(); + } else { + bunString->impl.wtf = Zig::toString(bunString->impl.zig).impl(); + } + + bunString->tag = BunStringTag::WTFStringImpl; + } else if (bunString->tag == BunStringTag::StaticZigString) { + bunString->impl.wtf = Zig::toStringStatic(bunString->impl.zig).impl(); + bunString->tag = BunStringTag::WTFStringImpl; + } +} diff --git a/src/bun.js/bindings/bindings.zig b/src/bun.js/bindings/bindings.zig index 15e15b032..b029c3ffe 100644 --- a/src/bun.js/bindings/bindings.zig +++ b/src/bun.js/bindings/bindings.zig @@ -17,6 +17,7 @@ const JSC = @import("bun").JSC; const Shimmer = JSC.Shimmer; const FFI = @import("./FFI.zig"); const NullableAllocator = @import("../../nullable_allocator.zig").NullableAllocator; +const String = bun.String; pub const JSObject = extern struct { pub const shim = Shimmer("JSC", "JSObject", @This()); @@ -91,6 +92,14 @@ pub const ZigString = extern struct { ptr: [*]const u8, len: usize, + pub fn byteSlice(this: ZigString) []const u8 { + if (this.is16Bit()) { + return std.mem.sliceAsBytes(this.utf16SliceAligned()); + } + + return this.slice(); + } + pub fn clone(this: ZigString, allocator: std.mem.Allocator) !ZigString { var sliced = this.toSlice(allocator); if (!sliced.isAllocated()) { @@ -198,6 +207,10 @@ pub const ZigString = extern struct { pub const shim = Shimmer("", "ZigString", @This()); + pub inline fn length(this: ZigString) usize { + return this.len; + } + pub const Slice = struct { allocator: NullableAllocator = .{}, ptr: [*]const u8 = undefined, @@ -211,6 +224,24 @@ pub const ZigString = extern struct { }; } + pub fn init(allocator: std.mem.Allocator, input: []const u8) Slice { + return .{ + .ptr = input.ptr, + .len = @truncate(u32, input.len), + .allocator = NullableAllocator.init(allocator), + }; + } + + pub fn toZigString(this: Slice) ZigString { + if (this.isAllocated()) + return ZigString.initUTF8(this.ptr[0..this.len]); + return ZigString.init(this.slice()); + } + + pub inline fn length(this: Slice) usize { + return this.len; + } + pub const empty = Slice{ .ptr = undefined, .len = 0 }; pub inline fn isAllocated(this: Slice) bool { @@ -282,6 +313,12 @@ pub const ZigString = extern struct { /// Does nothing if the slice is not allocated pub fn deinit(this: *const Slice) void { if (this.allocator.get()) |allocator| { + if (bun.String.isWTFAllocator(allocator)) { + // workaround for https://github.com/ziglang/zig/issues/4298 + bun.String.StringImplAllocator.free(allocator.ptr, bun.constStrToU8(this.slice()), 0, 0); + return; + } + allocator.free(this.slice()); } } @@ -339,10 +376,16 @@ pub const ZigString = extern struct { return out; } + pub fn fromBytes(slice_: []const u8) ZigString { + if (!strings.isAllASCII(slice_)) + return fromUTF8(slice_); + + return init(slice_); + } + pub fn fromUTF8(slice_: []const u8) ZigString { var out = init(slice_); - if (strings.isAllASCII(slice_)) - out.markUTF8(); + out.markUTF8(); return out; } @@ -409,6 +452,14 @@ pub const ZigString = extern struct { this.ptr = @intToPtr([*]const u8, @ptrToInt(this.ptr) | (1 << 61)); } + pub fn markStatic(this: *ZigString) void { + this.ptr = @intToPtr([*]const u8, @ptrToInt(this.ptr) | (1 << 60)); + } + + pub fn isStatic(this: *const ZigString) bool { + return @ptrToInt(this.ptr) & (1 << 60) != 0; + } + pub fn markUTF16(this: *ZigString) void { this.ptr = @intToPtr([*]const u8, @ptrToInt(this.ptr) | (1 << 63)); } @@ -1245,14 +1296,14 @@ pub fn NewGlobalObject(comptime Type: type) type { } return ErrorableZigString.err(error.ImportFailed, ZigString.init(importNotImpl).toErrorInstance(global).asVoid()); } - pub fn resolve(res: *ErrorableZigString, global: *JSGlobalObject, specifier: *ZigString, source: *ZigString) callconv(.C) void { + pub fn resolve(res: *ErrorableString, global: *JSGlobalObject, specifier: *String, source: *String) callconv(.C) void { if (comptime @hasDecl(Type, "resolve")) { @call(.{ .modifier = .always_inline }, Type.resolve, .{ res, global, specifier.*, source.* }); return; } - res.* = ErrorableZigString.err(error.ResolveFailed, ZigString.init(resolveNotImpl).toErrorInstance(global).asVoid()); + res.* = ErrorableString.err(error.ResolveFailed, ZigString.init(resolveNotImpl).toErrorInstance(global).asVoid()); } - pub fn fetch(ret: *ErrorableResolvedSource, global: *JSGlobalObject, specifier: *ZigString, source: *ZigString) callconv(.C) void { + pub fn fetch(ret: *ErrorableResolvedSource, global: *JSGlobalObject, specifier: *String, source: *String) callconv(.C) void { if (comptime @hasDecl(Type, "fetch")) { @call(.{ .modifier = .always_inline }, Type.fetch, .{ ret, global, specifier.*, source.* }); return; @@ -2401,6 +2452,10 @@ pub const JSValue = enum(JSValueReprInt) { return callWithThis(this, globalThis, JSC.JSValue.jsUndefined(), args); } + pub fn toBunString(this: JSValue, globalObject: *JSC.JSGlobalObject) bun.String { + return bun.String.fromJS(this, globalObject); + } + pub fn callWithThis(this: JSValue, globalThis: *JSGlobalObject, thisValue: JSC.JSValue, args: []const JSC.JSValue) JSC.JSValue { JSC.markBinding(@src()); return JSC.C.JSObjectCallAsFunctionReturnValue( @@ -3844,3 +3899,4 @@ pub const DOMCalls = .{ @import("../api/bun.zig").FFI.Reader, @import("../webcore.zig").Crypto, }; +const ErrorableString = bun.JSC.ErrorableString; diff --git a/src/bun.js/bindings/exports.zig b/src/bun.js/bindings/exports.zig index 2387122dd..8995cff08 100644 --- a/src/bun.js/bindings/exports.zig +++ b/src/bun.js/bindings/exports.zig @@ -60,13 +60,13 @@ pub const ZigGlobalObject = extern struct { return @call(.{ .modifier = .always_inline }, Interface.import, .{ global, specifier, source }); } - pub fn resolve(res: *ErrorableZigString, global: *JSGlobalObject, specifier: *ZigString, source: *ZigString) callconv(.C) void { + pub fn resolve(res: *ErrorableZigString, global: *JSGlobalObject, specifier: *bun.String, source: *bun.String) callconv(.C) void { if (comptime is_bindgen) { unreachable; } @call(.{ .modifier = .always_inline }, Interface.resolve, .{ res, global, specifier, source }); } - pub fn fetch(ret: *ErrorableResolvedSource, global: *JSGlobalObject, specifier: *ZigString, source: *ZigString) callconv(.C) void { + pub fn fetch(ret: *ErrorableResolvedSource, global: *JSGlobalObject, specifier: *bun.String, source: *bun.String) callconv(.C) void { if (comptime is_bindgen) { unreachable; } @@ -214,7 +214,7 @@ pub const ResolvedSource = extern struct { pub const name = "ResolvedSource"; pub const namespace = shim.namespace; - specifier: ZigString, + specifier: bun.String, source_code: ZigString, source_url: ZigString, hash: u32, @@ -857,6 +857,7 @@ pub const ZigException = extern struct { pub const ErrorableResolvedSource = Errorable(ResolvedSource); pub const ErrorableZigString = Errorable(ZigString); +pub const ErrorableString = Errorable(bun.String); pub const ErrorableJSValue = Errorable(JSValue); pub const ZigConsoleClient = struct { diff --git a/src/bun.js/bindings/headers-cpp.h b/src/bun.js/bindings/headers-cpp.h index e35afe0f0..8229fd247 100644 --- a/src/bun.js/bindings/headers-cpp.h +++ b/src/bun.js/bindings/headers-cpp.h @@ -1,4 +1,4 @@ -//-- AUTOGENERATED FILE -- 1669793662 +//-- AUTOGENERATED FILE -- 1669812013 // clang-format off #pragma once diff --git a/src/bun.js/bindings/headers-handwritten.h b/src/bun.js/bindings/headers-handwritten.h index 9018615fc..026d2ea1c 100644 --- a/src/bun.js/bindings/headers-handwritten.h +++ b/src/bun.js/bindings/headers-handwritten.h @@ -182,6 +182,7 @@ typedef void WebSocketClientTLS; #ifndef __cplusplus typedef struct Bun__ArrayBuffer Bun__ArrayBuffer; typedef struct Uint8Array_alias Uint8Array_alias; +typedef struct BunString BunString; #endif #ifdef __cplusplus @@ -209,6 +210,39 @@ enum SyntheticModuleType : uint64_t { TTY = 1029, }; +enum class BunStringTag : uint8_t { + Dead = 0, + WTFStringImpl = 1, + ZigString = 2, + StaticZigString = 3, + Empty = 4, +}; + +typedef union BunStringImpl { + ZigString zig; + WTF::StringImpl* wtf; +} BunStringImpl; + +typedef struct BunString { + BunStringTag tag; + BunStringImpl impl; +} BunString; + +extern "C" void Bun__WTFStringImpl__deref(WTF::StringImpl* impl); +extern "C" void Bun__WTFStringImpl__ref(WTF::StringImpl* impl); +extern "C" bool BunString__fromJS(JSC::JSGlobalObject*, JSC::EncodedJSValue, BunString*); +extern "C" JSC::EncodedJSValue BunString__toJS(JSC::JSGlobalObject*, BunString*); +extern "C" void BunString__toWTFString(BunString*); + +namespace Bun { +JSC::JSValue toJS(JSC::JSGlobalObject*, BunString); +BunString fromJS(JSC::JSGlobalObject* globalObject, JSC::JSValue value); +WTF::String toWTFString(BunString& bunString); +BunString fromString(WTF::String& wtfString); +BunString fromString(const WTF::String& wtfString); +BunString fromString(WTF::StringImpl* wtfString); +} + extern "C" const char* Bun__userAgent; extern "C" ZigErrorCode Zig_ErrorCodeParserError; @@ -219,21 +253,21 @@ extern "C" void Microtask__run_default(void* ptr, void* global); extern "C" bool Bun__transpileVirtualModule( JSC::JSGlobalObject* global, - ZigString* specifier, - ZigString* referrer, + BunString* specifier, + BunString* referrer, ZigString* sourceCode, BunLoaderType loader, ErrorableResolvedSource* result); extern "C" JSC::EncodedJSValue Bun__runVirtualModule( JSC::JSGlobalObject* global, - ZigString* specifier); + BunString* specifier); extern "C" void* Bun__transpileFile( void* bunVM, JSC::JSGlobalObject* global, - ZigString* specifier, - ZigString* referrer, + BunString* specifier, + BunString* referrer, ErrorableResolvedSource* result, bool allowPromise); extern "C" JSC::EncodedJSValue CallbackJob__onResolve(JSC::JSGlobalObject*, JSC::CallFrame*); @@ -242,8 +276,8 @@ extern "C" JSC::EncodedJSValue CallbackJob__onReject(JSC::JSGlobalObject*, JSC:: extern "C" bool Bun__fetchBuiltinModule( void* bunVM, JSC::JSGlobalObject* global, - ZigString* specifier, - ZigString* referrer, + BunString* specifier, + BunString* referrer, ErrorableResolvedSource* result); // Used in process.version diff --git a/src/bun.js/bindings/headers-replacements.zig b/src/bun.js/bindings/headers-replacements.zig index f586bd028..cd27fac98 100644 --- a/src/bun.js/bindings/headers-replacements.zig +++ b/src/bun.js/bindings/headers-replacements.zig @@ -16,6 +16,7 @@ pub const struct_JSC__AsyncGeneratorPrototype = bindings.AsyncGeneratorPrototype pub const struct_JSC__AsyncGeneratorFunctionPrototype = bindings.AsyncGeneratorFunctionPrototype; pub const struct_JSC__AsyncFunctionPrototype = bindings.AsyncFunctionPrototype; pub const struct_JSC__ArrayPrototype = bindings.ArrayPrototype; +pub const BunString = @import("bun").String; pub const struct_JSC__ArrayIteratorPrototype = bindings.ArrayIteratorPrototype; pub const bWTF__URL = bindings.URL; diff --git a/src/bun.js/bindings/headers.h b/src/bun.js/bindings/headers.h index 399fc54a6..1962658cb 100644 --- a/src/bun.js/bindings/headers.h +++ b/src/bun.js/bindings/headers.h @@ -1,5 +1,5 @@ // clang-format off -//-- AUTOGENERATED FILE -- 1669793662 +//-- AUTOGENERATED FILE -- 1669812013 #pragma once #include <stddef.h> @@ -50,6 +50,7 @@ typedef void* JSClassRef; #ifndef __cplusplus typedef bJSC__CatchScope JSC__CatchScope; // JSC::CatchScope typedef ErrorableResolvedSource ErrorableResolvedSource; + typedef BunString BunString; typedef bJSC__ThrowScope JSC__ThrowScope; // JSC::ThrowScope typedef ErrorableZigString ErrorableZigString; typedef bJSC__JSObject JSC__JSObject; // JSC::JSObject @@ -97,6 +98,7 @@ typedef void* JSClassRef; } typedef ErrorableResolvedSource ErrorableResolvedSource; + typedef BunString BunString; typedef ErrorableZigString ErrorableZigString; typedef WebSocketClient WebSocketClient; typedef WebSocketHTTPSClient WebSocketHTTPSClient; @@ -500,12 +502,12 @@ CPP_DECL bool Zig__GlobalObject__resetModuleRegistryMap(JSC__JSGlobalObject* arg #ifdef __cplusplus -ZIG_DECL void Zig__GlobalObject__fetch(ErrorableResolvedSource* arg0, JSC__JSGlobalObject* arg1, ZigString* arg2, ZigString* arg3); +ZIG_DECL void Zig__GlobalObject__fetch(ErrorableResolvedSource* arg0, JSC__JSGlobalObject* arg1, BunString* arg2, BunString* arg3); ZIG_DECL ErrorableZigString Zig__GlobalObject__import(JSC__JSGlobalObject* arg0, ZigString* arg1, ZigString* arg2); ZIG_DECL void Zig__GlobalObject__onCrash(); ZIG_DECL JSC__JSValue Zig__GlobalObject__promiseRejectionTracker(JSC__JSGlobalObject* arg0, JSC__JSPromise* arg1, uint32_t JSPromiseRejectionOperation2); ZIG_DECL JSC__JSValue Zig__GlobalObject__reportUncaughtException(JSC__JSGlobalObject* arg0, JSC__Exception* arg1); -ZIG_DECL void Zig__GlobalObject__resolve(ErrorableZigString* arg0, JSC__JSGlobalObject* arg1, ZigString* arg2, ZigString* arg3); +ZIG_DECL void Zig__GlobalObject__resolve(ErrorableZigString* arg0, JSC__JSGlobalObject* arg1, BunString* arg2, BunString* arg3); #endif diff --git a/src/bun.js/bindings/headers.zig b/src/bun.js/bindings/headers.zig index e99db7ef0..3127f6598 100644 --- a/src/bun.js/bindings/headers.zig +++ b/src/bun.js/bindings/headers.zig @@ -18,6 +18,7 @@ pub const struct_JSC__AsyncGeneratorPrototype = bindings.AsyncGeneratorPrototype pub const struct_JSC__AsyncGeneratorFunctionPrototype = bindings.AsyncGeneratorFunctionPrototype; pub const struct_JSC__AsyncFunctionPrototype = bindings.AsyncFunctionPrototype; pub const struct_JSC__ArrayPrototype = bindings.ArrayPrototype; +pub const BunString = @import("bun").String; pub const struct_JSC__ArrayIteratorPrototype = bindings.ArrayIteratorPrototype; pub const bWTF__URL = bindings.URL; diff --git a/src/bun.js/bindings/helpers.h b/src/bun.js/bindings/helpers.h index da97b4797..c52c17997 100644 --- a/src/bun.js/bindings/helpers.h +++ b/src/bun.js/bindings/helpers.h @@ -78,7 +78,7 @@ namespace Zig { static const unsigned char* untag(const unsigned char* ptr) { return reinterpret_cast<const unsigned char*>( - ((reinterpret_cast<uintptr_t>(ptr) & ~(static_cast<uint64_t>(1) << 63) & ~(static_cast<uint64_t>(1) << 62)) & ~(static_cast<uint64_t>(1) << 61))); + (((reinterpret_cast<uintptr_t>(ptr) & ~(static_cast<uint64_t>(1) << 63) & ~(static_cast<uint64_t>(1) << 62)) & ~(static_cast<uint64_t>(1) << 61)) & ~(static_cast<uint64_t>(1) << 60))); } static void* untagVoid(const unsigned char* ptr) @@ -148,6 +148,26 @@ static const WTF::String toString(ZigString str) reinterpret_cast<const UChar*>(untag(str.ptr)), str.len)); } +static const WTF::String toStringStatic(ZigString str) +{ + if (str.len == 0 || str.ptr == nullptr) { + return WTF::String(); + } + if (UNLIKELY(isTaggedUTF8Ptr(str.ptr))) { + abort(); + } + + if (isTaggedUTF16Ptr(str.ptr)) { + return WTF::String(WTF::ExternalStringImpl::createStatic(untag(str.ptr), str.len)); + } + + + return WTF::String(WTF::ExternalStringImpl::createStatic( + reinterpret_cast<const UChar*>(untag(str.ptr)), str.len)); + + +} + static WTF::AtomString toAtomString(ZigString str) { diff --git a/src/bun.js/javascript.zig b/src/bun.js/javascript.zig index 6f966df2b..2d44f6691 100644 --- a/src/bun.js/javascript.zig +++ b/src/bun.js/javascript.zig @@ -75,6 +75,7 @@ const JSModuleLoader = @import("bun").JSC.JSModuleLoader; const JSPromiseRejectionOperation = @import("bun").JSC.JSPromiseRejectionOperation; const Exception = @import("bun").JSC.Exception; const ErrorableZigString = @import("bun").JSC.ErrorableZigString; +const ErrorableString = @import("bun").JSC.ErrorableString; const ZigGlobalObject = @import("bun").JSC.ZigGlobalObject; const VM = @import("bun").JSC.VM; const JSFunction = @import("bun").JSC.JSFunction; @@ -86,6 +87,7 @@ const EventLoop = JSC.EventLoop; const PendingResolution = @import("../resolver/resolver.zig").PendingResolution; const ThreadSafeFunction = JSC.napi.ThreadSafeFunction; const PackageManager = @import("../install/install.zig").PackageManager; +const String = bun.String; const ModuleLoader = JSC.ModuleLoader; const FetchFlags = JSC.FetchFlags; @@ -753,11 +755,6 @@ pub const VirtualMachine = struct { return VirtualMachine.vm; } - // dynamic import - // pub fn import(global: *JSGlobalObject, specifier: ZigString, source: ZigString) callconv(.C) ErrorableZigString { - - // } - pub threadlocal var source_code_printer: ?*js_printer.BufferPrinter = null; pub fn clearRefString(_: *anyopaque, ref_string: *JSC.RefString) void { @@ -880,6 +877,7 @@ pub const VirtualMachine = struct { pub const ResolveFunctionResult = struct { result: ?Resolver.Result, path: string, + static: bool = false, }; fn _resolve( @@ -898,25 +896,31 @@ pub const VirtualMachine = struct { if (jsc_vm.node_modules == null and strings.eqlComptime(std.fs.path.basename(specifier), Runtime.Runtime.Imports.alt_name)) { ret.path = Runtime.Runtime.Imports.Name; + ret.static = true; return; } else if (jsc_vm.node_modules != null and strings.eqlComptime(specifier, bun_file_import_path)) { ret.path = bun_file_import_path; + ret.static = true; return; } else if (strings.eqlComptime(specifier, main_file_name)) { ret.result = null; ret.path = jsc_vm.entry_point.source.path.text; + ret.static = true; return; } else if (specifier.len > js_ast.Macro.namespaceWithColon.len and strings.eqlComptimeIgnoreLen(specifier[0..js_ast.Macro.namespaceWithColon.len], js_ast.Macro.namespaceWithColon)) { ret.result = null; ret.path = specifier; + ret.static = true; return; } else if (specifier.len > "/bun-vfs/node_modules/".len and strings.eqlComptimeIgnoreLen(specifier[0.."/bun-vfs/node_modules/".len], "/bun-vfs/node_modules/")) { ret.result = null; ret.path = specifier; + ret.static = true; return; } else if (JSC.HardcodedModule.Map.get(specifier)) |result| { ret.result = null; ret.path = @as(string, @tagName(result)); + ret.static = true; return; } @@ -985,6 +989,7 @@ pub const VirtualMachine = struct { if (node_modules_bundle.findModuleIDInPackage(package, package_relative_path) == null) break :node_module_checker; ret.path = bun_file_import_path; + ret.static = true; return; } } @@ -1008,40 +1013,51 @@ pub const VirtualMachine = struct { } } - pub fn resolveForAPI(res: *ErrorableZigString, global: *JSGlobalObject, specifier: ZigString, source: ZigString) void { - resolveMaybeNeedsTrailingSlash(res, global, specifier, source, false, true); + pub fn resolveForAPI(res: *ErrorableString, global: *JSGlobalObject, specifier: ZigString, source: ZigString) void { + resolveMaybeNeedsTrailingSlash(res, global, String.init(specifier), String.init(source), false, true); } - pub fn resolveFilePathForAPI(res: *ErrorableZigString, global: *JSGlobalObject, specifier: ZigString, source: ZigString) void { - resolveMaybeNeedsTrailingSlash(res, global, specifier, source, true, true); + pub fn resolveFilePathForAPI(res: *ErrorableString, global: *JSGlobalObject, specifier: ZigString, source: ZigString) void { + resolveMaybeNeedsTrailingSlash(res, global, String.init(specifier), String.init(source), true, true); } - pub fn resolve(res: *ErrorableZigString, global: *JSGlobalObject, specifier: ZigString, source: ZigString) void { + pub fn resolve(res: *ErrorableString, global: *JSGlobalObject, specifier: bun.String, source: bun.String) void { resolveMaybeNeedsTrailingSlash(res, global, specifier, source, true, false); } - pub fn resolveMaybeNeedsTrailingSlash(res: *ErrorableZigString, global: *JSGlobalObject, specifier: ZigString, source: ZigString, comptime is_a_file_path: bool, comptime realpath: bool) void { + pub fn resolveMaybeNeedsTrailingSlash(res: *ErrorableString, global: *JSGlobalObject, specifier: String, source: String, comptime is_a_file_path: bool, comptime realpath: bool) void { var result = ResolveFunctionResult{ .path = "", .result = null }; var jsc_vm = vm; + if (jsc_vm.plugin_runner) |plugin_runner| { - if (PluginRunner.couldBePlugin(specifier.slice())) { - const namespace = PluginRunner.extractNamespace(specifier.slice()); + const specifier_slice = specifier.toUTF8(jsc_vm.allocator); + defer specifier_slice.deinit(); + if (PluginRunner.couldBePlugin(specifier_slice.slice())) { + const namespace = PluginRunner.extractNamespace(specifier_slice.slice()); const after_namespace = if (namespace.len == 0) specifier else specifier.substring(namespace.len + 1); - if (plugin_runner.onResolveJSC(ZigString.init(namespace), after_namespace, source, .bun)) |resolved_path| { - res.* = resolved_path; + if (plugin_runner.onResolveJSC(ZigString.init(namespace), after_namespace.toZigString(), source.toZigString(), .bun)) |resolved_path| { + res.* = if (resolved_path.success) ErrorableString.ok(String.init(resolved_path.result.value)) else ErrorableString{ + .result = .{ .err = resolved_path.result.err }, + .success = false, + }; return; } } } - if (JSC.HardcodedModule.Aliases.getWithEql(specifier, ZigString.eqlComptime)) |hardcoded| { - res.* = ErrorableZigString.ok(ZigString.init(hardcoded)); + if (JSC.HardcodedModule.Aliases.getWithEql(specifier, String.eqlComptime)) |hardcoded| { + res.* = ErrorableString.ok(String.static(hardcoded)); return; } + + const specifier_slice = specifier.toUTF8(jsc_vm.allocator); + const source_slice = source.toUTF8(jsc_vm.allocator); + defer source_slice.deinit(); + var old_log = jsc_vm.log; var log = logger.Log.init(jsc_vm.allocator); defer log.deinit(); @@ -1053,7 +1069,7 @@ pub const VirtualMachine = struct { jsc_vm.bundler.linker.log = old_log; jsc_vm.bundler.resolver.log = old_log; } - _resolve(&result, global, specifier.slice(), source.slice(), is_a_file_path, realpath) catch |err_| { + _resolve(&result, global, specifier_slice.slice(), source_slice.slice(), is_a_file_path, realpath) catch |err_| { var err = err_; const msg: logger.Msg = brk: { var msgs: []logger.Msg = log.msgs.items; @@ -1067,8 +1083,8 @@ pub const VirtualMachine = struct { const printed = ResolveError.fmt( jsc_vm.allocator, - specifier.slice(), - source.slice(), + specifier_slice.slice(), + source_slice.slice(), err, ) catch unreachable; break :brk logger.Msg{ @@ -1079,19 +1095,24 @@ pub const VirtualMachine = struct { ), .metadata = .{ // import_kind is wrong probably - .resolve = .{ .specifier = logger.BabyString.in(printed, specifier.slice()), .import_kind = .stmt }, + .resolve = .{ .specifier = logger.BabyString.in(printed, specifier_slice.slice()), .import_kind = .stmt }, }, }; }; { - res.* = ErrorableZigString.err(err, @ptrCast(*anyopaque, ResolveError.create(global, vm.allocator, msg, source.slice()))); + res.* = ErrorableString.err(err, @ptrCast(*anyopaque, ResolveError.create(global, vm.allocator, msg, source_slice.slice()))); } return; }; - - res.* = ErrorableZigString.ok(ZigString.init(result.path)); + if (result.static) { + res.* = ErrorableString.ok(String.static(result.path)); + } else if (strings.eqlLong(specifier_slice.slice(), result.path)) { + res.* = ErrorableString.ok(specifier); + } else { + res.* = ErrorableString.ok(String.fromBytes(result.path)); + } } // // This double prints @@ -1106,31 +1127,31 @@ pub const VirtualMachine = struct { pub const main_file_name: string = "bun:main"; - pub fn fetch(ret: *ErrorableResolvedSource, global: *JSGlobalObject, specifier: ZigString, source: ZigString) callconv(.C) void { + pub fn fetch(ret: *ErrorableResolvedSource, global: *JSGlobalObject, specifier: String, source: String) callconv(.C) void { var jsc_vm: *VirtualMachine = if (comptime Environment.isLinux) vm else global.bunVM(); var log = logger.Log.init(vm.bundler.allocator); - var spec = specifier.toSlice(jsc_vm.allocator); + var spec = specifier.toUTF8(jsc_vm.allocator); defer spec.deinit(); - var refer = source.toSlice(jsc_vm.allocator); + var refer = source.toUTF8(jsc_vm.allocator); defer refer.deinit(); const result = if (!jsc_vm.bundler.options.disable_transpilation) @call(.{ .modifier = .always_inline }, fetchWithoutOnLoadPlugins, .{ jsc_vm, global, spec.slice(), refer.slice(), &log, ret, .transpile }) catch |err| { - processFetchLog(global, specifier, source, &log, ret, err); + processFetchLog(global, spec.toZigString(), refer.toZigString(), &log, ret, err); return; } else fetchWithoutOnLoadPlugins(jsc_vm, global, spec.slice(), refer.slice(), &log, ret, .print_source_and_clone) catch |err| { - processFetchLog(global, specifier, source, &log, ret, err); + processFetchLog(global, spec.toZigString(), refer.toZigString(), &log, ret, err); return; }; if (log.errors > 0) { - processFetchLog(global, specifier, source, &log, ret, error.LinkError); + processFetchLog(global, spec.toZigString(), refer.toZigString(), &log, ret, error.LinkError); return; } diff --git a/src/bun.js/module_loader.zig b/src/bun.js/module_loader.zig index fa938f1ab..d11aceb08 100644 --- a/src/bun.js/module_loader.zig +++ b/src/bun.js/module_loader.zig @@ -89,6 +89,7 @@ const PackageManager = @import("../install/install.zig").PackageManager; const Install = @import("../install/install.zig"); const VirtualMachine = JSC.VirtualMachine; const Dependency = @import("../install/dependency.zig"); +const String = bun.String; // This exists to make it so we can reload these quicker in development fn jsModuleFromFile(from_path: string, comptime input: string) string { @@ -871,9 +872,9 @@ pub const ModuleLoader = struct { ) void; }; - pub export fn Bun__getDefaultLoader(global: *JSC.JSGlobalObject, str: *ZigString) Api.Loader { + pub export fn Bun__getDefaultLoader(global: *JSC.JSGlobalObject, str: *String) Api.Loader { var jsc_vm = global.bunVM(); - const filename = str.toSlice(jsc_vm.allocator); + const filename = str.toUTF8(jsc_vm.allocator); defer filename.deinit(); const loader = jsc_vm.bundler.options.loader(Fs.PathName.init(filename.slice()).ext).toAPI(); if (loader == .file) { @@ -898,7 +899,6 @@ pub const ModuleLoader = struct { comptime flags: FetchFlags, ) !ResolvedSource { const disable_transpilying = comptime flags.disableTranspiling(); - switch (loader) { .js, .jsx, .ts, .tsx, .json, .toml => { jsc_vm.transpiled_count += 1; @@ -1347,19 +1347,22 @@ pub const ModuleLoader = struct { pub export fn Bun__fetchBuiltinModule( jsc_vm: *VirtualMachine, globalObject: *JSC.JSGlobalObject, - specifier: *ZigString, - referrer: *ZigString, + specifier: *String, + referrer: *String, ret: *ErrorableResolvedSource, ) bool { JSC.markBinding(@src()); var log = logger.Log.init(jsc_vm.bundler.allocator); defer log.deinit(); - if (ModuleLoader.fetchBuiltinModule(jsc_vm, specifier.slice(), &log, false) catch |err| { + const specifier_slice = specifier.toUTF8(jsc_vm.allocator); + defer specifier_slice.deinit(); + if (ModuleLoader.fetchBuiltinModule(jsc_vm, specifier_slice.slice(), &log, false) catch |err| { if (err == error.AsyncModule) { unreachable; } - - VirtualMachine.processFetchLog(globalObject, specifier.*, referrer.*, &log, ret, err); + const referrer_slice = referrer.toUTF8(jsc_vm.allocator); + defer referrer_slice.deinit(); + VirtualMachine.processFetchLog(globalObject, specifier_slice.toZigString(), referrer_slice.toZigString(), &log, ret, err); return true; }) |builtin| { ret.* = ErrorableResolvedSource.ok(builtin); @@ -1372,28 +1375,33 @@ pub const ModuleLoader = struct { pub export fn Bun__transpileFile( jsc_vm: *VirtualMachine, globalObject: *JSC.JSGlobalObject, - specifier_ptr: *ZigString, - referrer: *ZigString, + specifier_ptr: *String, + referrer: *String, ret: *ErrorableResolvedSource, allow_promise: bool, ) ?*anyopaque { JSC.markBinding(@src()); + const allocator = jsc_vm.allocator; var log = logger.Log.init(jsc_vm.bundler.allocator); defer log.deinit(); debug("transpileFile: {any}", .{specifier_ptr.*}); - - var _specifier = specifier_ptr.toSlice(jsc_vm.allocator); - var referrer_slice = referrer.toSlice(jsc_vm.allocator); - defer _specifier.deinit(); + // these should already be encoded as utf8 + // see https://github.com/oven-sh/bun/issues/1562 + var specifier_slice = specifier_ptr.toUTF8(allocator); + var referrer_slice = referrer.toUTF8(allocator); + defer specifier_slice.deinit(); defer referrer_slice.deinit(); - var specifier = normalizeSpecifier(jsc_vm, _specifier.slice()); + + const _specifier = specifier_slice.slice(); + + var specifier = normalizeSpecifier(jsc_vm, _specifier); const path = Fs.Path.init(specifier); const loader = jsc_vm.bundler.options.loaders.get(path.name.ext) orelse options.Loader.js; var promise: ?*JSC.JSInternalPromise = null; ret.* = ErrorableResolvedSource.ok( ModuleLoader.transpileSourceCode( jsc_vm, - specifier, + specifier_slice.slice(), referrer_slice.slice(), path, loader, @@ -1413,18 +1421,20 @@ pub const ModuleLoader = struct { if (err == error.PluginError) { return null; } - VirtualMachine.processFetchLog(globalObject, specifier_ptr.*, referrer.*, &log, ret, err); + VirtualMachine.processFetchLog(globalObject, specifier_slice.toZigString(), referrer_slice.toZigString(), &log, ret, err); return null; }, ); return promise; } - export fn Bun__runVirtualModule(globalObject: *JSC.JSGlobalObject, specifier_ptr: *ZigString) JSValue { + export fn Bun__runVirtualModule(globalObject: *JSC.JSGlobalObject, specifier_ptr: *String) JSValue { JSC.markBinding(@src()); if (globalObject.bunVM().plugin_runner == null) return JSValue.zero; - const specifier = specifier_ptr.slice(); + const specifier_ = specifier_ptr.toUTF8(globalObject.allocator()); + defer specifier_.deinit(); + const specifier = specifier_.slice(); if (!PluginRunner.couldBePlugin(specifier)) { return JSValue.zero; diff --git a/src/bun.zig b/src/bun.zig index 6443c6923..1086dfa61 100644 --- a/src/bun.zig +++ b/src/bun.zig @@ -498,3 +498,5 @@ pub const analytics = @import("./analytics.zig"); pub const DateTime = @import("./deps/zig-datetime/src/datetime.zig"); pub var start_time: i128 = 0; + +pub const String = @import("./string.zig").String; diff --git a/src/comptime_string_map.zig b/src/comptime_string_map.zig index 18b06d9de..5f3779c9b 100644 --- a/src/comptime_string_map.zig +++ b/src/comptime_string_map.zig @@ -134,12 +134,13 @@ pub fn ComptimeStringMapWithKeyType(comptime KeyType: type, comptime V: type, co } pub fn getWithEql(input: anytype, comptime eql: anytype) ?V { - if (input.len < precomputed.min_len or input.len > precomputed.max_len) + const length = input.length(); + if (length < precomputed.min_len or length > precomputed.max_len) return null; comptime var i: usize = precomputed.min_len; inline while (i <= precomputed.max_len) : (i += 1) { - if (input.len == i) { + if (length == i) { return getWithLengthAndEql(input, i, eql); } } diff --git a/src/string.zig b/src/string.zig new file mode 100644 index 000000000..a71cf3583 --- /dev/null +++ b/src/string.zig @@ -0,0 +1,375 @@ +const std = @import("std"); +const bun = @import("bun"); +const JSC = bun.JSC; +const JSValue = bun.JSC.JSValue; +const Parent = @This(); + +pub const BufferOwnership = enum { + BufferInternal, + BufferOwned, + BufferSubstring, + BufferExternal, +}; + +// --------------------------------------------------------------------- +// These details must stay in sync with WTFStringImpl.h in WebKit! +// --------------------------------------------------------------------- +const s_flagCount: u32 = 8; + +const s_flagMask: u32 = (1 << s_flagCount) - 1; +const s_flagStringKindCount: u32 = 4; +const s_hashZeroValue: u32 = 0; +const s_hashFlagStringKindIsAtom: u32 = @as(1, u32) << (s_flagStringKindCount); +const s_hashFlagStringKindIsSymbol: u32 = @as(1, u32) << (s_flagStringKindCount + 1); +const s_hashMaskStringKind: u32 = s_hashFlagStringKindIsAtom | s_hashFlagStringKindIsSymbol; +const s_hashFlagDidReportCost: u32 = @as(1, u32) << 3; +const s_hashFlag8BitBuffer: u32 = 1 << 2; +const s_hashMaskBufferOwnership: u32 = (1 << 0) | (1 << 1); + +pub const WTFStringImpl = *WTFStringImplStruct; + +pub const WTFStringImplStruct = extern struct { + m_refCount: u32 = 0, + m_length: u32 = 0, + m_ptr: extern union { latin1: [*]const u8, utf16: [*]const u16 }, + m_hashAndFlags: u32 = 0, + + // --------------------------------------------------------------------- + + pub fn byteLength(this: WTFStringImpl) usize { + return if (this.is8Bit()) this.m_length else this.m_length * 2; + } + + pub fn byteSlice(this: WTFStringImpl) []const u8 { + return this.m_ptr.latin1[0..this.byteLength()]; + } + + pub inline fn is8Bit(self: WTFStringImpl) bool { + return (self.m_hashAndFlags & s_hashFlag8BitBuffer) != 0; + } + + pub inline fn length(self: WTFStringImpl) u32 { + return self.m_length; + } + + pub inline fn utf16Slice(self: WTFStringImpl) []const u16 { + std.debug.assert(!is8Bit(self)); + return self.m_ptr.utf16[0..length(self)]; + } + + pub inline fn latin1Slice(self: WTFStringImpl) []const u8 { + std.debug.assert(is8Bit(self)); + return self.m_ptr.latin1[0..length(self)]; + } + + pub fn toZigString(this: WTFStringImpl) ZigString { + if (this.is8Bit()) { + return ZigString.init(this.latin1Slice()); + } else { + return ZigString.init16(this.utf16Slice()); + } + } + + pub inline fn deref(self: WTFStringImpl) void { + JSC.markBinding(@src()); + const current_count = self.m_refCount; + std.debug.assert(current_count > 0); + Bun__WTFStringImpl__deref(self); + if (comptime bun.Environment.allow_assert) { + if (current_count > 1) { + std.debug.assert(self.m_refCount < current_count); + } + } + } + + pub inline fn ref(self: WTFStringImpl) void { + JSC.markBinding(@src()); + const current_count = self.m_refCount; + std.debug.assert(current_count > 0); + Bun__WTFStringImpl__ref(self); + std.debug.assert(self.m_refCount > current_count); + } + + pub fn toUTF8(this: WTFStringImpl, allocator: std.mem.Allocator) ZigString.Slice { + if (this.is8Bit()) { + if (bun.strings.toUTF8FromLatin1(allocator, this.latin1Slice()) catch null) |utf8| { + return ZigString.Slice.init(allocator, utf8.items); + } + + this.ref(); + return ZigString.Slice.init(this.refCountAllocator(), this.latin1Slice()); + } + + if (bun.strings.toUTF8Alloc(allocator, this.utf16Slice()) catch null) |utf8| { + return ZigString.Slice.init(allocator, utf8); + } + + return .{}; + } + + pub fn refCountAllocator(self: WTFStringImpl) std.mem.Allocator { + return std.mem.Allocator{ .ptr = self, .vtable = StringImplAllocator.VTablePtr }; + } + + extern fn Bun__WTFStringImpl__deref(self: WTFStringImpl) void; + extern fn Bun__WTFStringImpl__ref(self: WTFStringImpl) void; +}; + +pub const StringImplAllocator = struct { + fn alloc( + ptr: *anyopaque, + len: usize, + _: u29, + _: u29, + _: usize, + ) error{OutOfMemory}![]u8 { + var this = bun.cast(WTFStringImpl, ptr); + const len_ = this.byteLength(); + + if (len_ != len) { + // we don't actually allocate, we just reference count + return error.OutOfMemory; + } + + this.ref(); + + // we should never actually allocate + return bun.constStrToU8(this.m_ptr.latin1[0..len]); + } + + fn resize( + _: *anyopaque, + _: []u8, + _: u29, + _: usize, + _: u29, + _: usize, + ) ?usize { + return null; + } + + pub fn free( + ptr: *anyopaque, + buf: []u8, + _: u29, + _: usize, + ) void { + var this = bun.cast(WTFStringImpl, ptr); + std.debug.assert(this.byteSlice().ptr == buf.ptr); + std.debug.assert(this.byteSlice().len == buf.len); + this.deref(); + } + + pub const VTable = std.mem.Allocator.VTable{ + .alloc = alloc, + .resize = resize, + .free = free, + }; + + pub const VTablePtr = &VTable; +}; + +pub const Tag = enum(u8) { + Dead = 0, + WTFStringImpl = 1, + ZigString = 2, + StaticZigString = 3, + Empty = 4, +}; + +const ZigString = bun.JSC.ZigString; + +pub const StringImpl = extern union { + ZigString: ZigString, + WTFStringImpl: WTFStringImpl, + StaticZigString: ZigString, + Dead: void, + Empty: void, +}; + +pub const String = extern struct { + pub const name = "BunString"; + + tag: Tag, + value: StringImpl, + + pub const dead = String{ .tag = .Dead, .value = .{ .Dead = {} } }; + pub const StringImplAllocator = Parent.StringImplAllocator; + + pub fn initWithType(comptime Type: type, value: Type) String { + switch (comptime Type) { + ZigString => return String{ .tag = .ZigString, .value = .{ .ZigString = value } }, + []const u8 => return String{ .tag = .ZigString, .value = .{ .ZigString = ZigString.fromBytes(value) } }, + []const u16 => return String{ .tag = .ZigString, .value = .{ .ZigString = ZigString.from16Slice(value) } }, + WTFStringImpl => return String{ .tag = .WTFStringImpl, .value = .{ .WTFStringImpl = value } }, + *const ZigString, *ZigString => return String{ .tag = .ZigString, .value = .{ .ZigString = value.* } }, + else => @compileError("Unsupported type for String"), + } + } + + pub fn static(input: []const u8) String { + return .{ + .tag = .StaticZigString, + .value = .{ .StaticZigString = ZigString.init(input) }, + }; + } + + pub fn init(value: anytype) String { + return initWithType(@TypeOf(value), value); + } + + pub fn fromUTF8(value: []const u8) String { + return String.initWithType(ZigString, ZigString.initUTF8(value)); + } + + pub fn fromBytes(value: []const u8) String { + return String.initWithType(ZigString, ZigString.fromBytes(value)); + } + + pub fn format(self: String, comptime fmt: []const u8, opts: std.fmt.FormatOptions, writer: anytype) !void { + return self.toZigString().format(fmt, opts, writer); + } + + pub fn fromJS(value: bun.JSC.JSValue, globalObject: *JSC.JSGlobalObject) String { + var out: String = String.dead; + if (BunString__fromJS(globalObject, value, &out)) { + return out; + } else { + return String.dead; + } + } + + pub fn toJS(this: *String, globalObject: *bun.JSC.JSGlobalObject) JSC.JSValue { + return BunString__toJS(globalObject, this); + } + + pub fn toZigString(this: String) ZigString { + if (this.tag == .StaticZigString or this.tag == .ZigString) { + return this.value.ZigString; + } + + if (this.tag == .WTFStringImpl) + return this.value.WTFStringImpl.toZigString(); + + return ZigString.Empty; + } + + pub fn toWTF(this: *String) void { + BunString__toWTFString(this); + } + + pub inline fn length(this: String) usize { + return if (this.tag == .WTFStringImpl) + this.value.WTFStringImpl.length() + else + this.toZigString().length(); + } + + pub inline fn utf16(self: String) []const u16 { + if (self.tag == .Empty) + return &[_]u16{}; + std.debug.assert(self.tag == .WTFStringImpl); + return self.value.WTFStringImpl.utf16(); + } + + pub inline fn latin1(self: String) []const u8 { + if (self.tag == .Empty) + return &[_]u8{}; + + std.debug.assert(self.tag == .WTFStringImpl); + return self.value.WTFStringImpl.latin1(); + } + + pub fn isUTF8(self: String) bool { + if (!self.tag == .ZigString or self.tag == .StaticZigString) + return false; + + return self.value.ZigString.isUTF8(); + } + + pub fn byteSlice(this: String) []const u8 { + return switch (this.tag) { + .ZigString, .StaticZigString => this.value.ZigString.byteSlice(), + .WTFStringImpl => this.value.WTFStringImpl.byteSlice(), + else => &[_]u8{}, + }; + } + + pub fn isUTF16(self: String) bool { + if (self.tag == .WTFStringImpl) + return !self.value.WTFStringImpl.is8Bit(); + + if (self.tag == .ZigString or self.tag == .StaticZigString) + return self.value.ZigString.isUTF16(); + + return false; + } + + pub inline fn utf8(self: String) []const u8 { + if (comptime bun.Environment.allow_assert) + std.debug.assert(self.canBeUTF8()); + return self.value.ZigString.slice(); + } + + pub fn canBeUTF8(self: String) bool { + if (self.tag == .WTFStringImpl) + return self.value.WTFStringImpl.is8Bit() and bun.strings.isAllASCII(self.value.WTFStringImpl.latin1()); + + if (self.tag == .ZigString or self.tag == .StaticZigString) + return self.value.ZigString.isUTF8(); + + return self.tag == .Empty; + } + + pub fn substring(self: String, offset: usize) String { + return String.init(self.toZigString().substring(offset)); + } + + pub fn toUTF8(this: *const String, allocator: std.mem.Allocator) ZigString.Slice { + if (this.tag == .WTFStringImpl) { + return this.value.WTFStringImpl.toUTF8(allocator); + } + + if (this.tag == .ZigString) { + return this.value.ZigString.toSlice(allocator); + } + + if (this.tag == .StaticZigString) { + return ZigString.Slice.fromUTF8NeverFree(this.value.StaticZigString.slice()); + } + + return .{}; + } + + extern fn BunString__fromJS(globalObject: *JSC.JSGlobalObject, value: bun.JSC.JSValue, out: *String) bool; + extern fn BunString__toJS(globalObject: *JSC.JSGlobalObject, in: *String) JSC.JSValue; + extern fn BunString__toWTFString(this: *String) void; + + pub fn ref(this: String) void { + switch (this.tag) { + .WTFStringImpl => this.value.WTFStringImpl.ref(), + else => {}, + } + } + + pub fn deref(this: String) void { + switch (this.tag) { + .WTFStringImpl => this.value.WTFStringImpl.deref(), + else => {}, + } + } + + pub const unref = deref; + + pub fn eqlComptime(this: String, comptime value: []const u8) bool { + return this.toZigString().eqlComptime(value); + } + + pub fn isWTFAllocator(this: std.mem.Allocator) bool { + return this.vtable == @This().StringImplAllocator.VTablePtr; + } + + pub fn eqlBytes(this: String, value: []const u8) bool { + return bun.strings.eqlLong(this.byteSlice(), value, true); + } +}; diff --git a/src/string_immutable.zig b/src/string_immutable.zig index f89e6d12f..a8d4f4f95 100644 --- a/src/string_immutable.zig +++ b/src/string_immutable.zig @@ -743,10 +743,12 @@ pub fn eqlLong(a_: string, b: string, comptime check_len: bool) bool { const len = b.len; var dword_length = b.len >> 3; var b_ptr: usize = 0; + const slice = b.ptr; const a = a_.ptr; + if (slice == a) + return true; while (dword_length > 0) : (dword_length -= 1) { - const slice = b.ptr; if (@bitCast(usize, a[b_ptr..len][0..@sizeOf(usize)].*) != @bitCast(usize, (slice[b_ptr..b.len])[0..@sizeOf(usize)].*)) return false; b_ptr += @sizeOf(usize); @@ -755,7 +757,6 @@ pub fn eqlLong(a_: string, b: string, comptime check_len: bool) bool { if (comptime @sizeOf(usize) == 8) { if ((len & 4) != 0) { - const slice = b.ptr; if (@bitCast(u32, a[b_ptr..len][0..@sizeOf(u32)].*) != @bitCast(u32, (slice[b_ptr..b.len])[0..@sizeOf(u32)].*)) return false; @@ -1095,6 +1096,27 @@ pub fn utf16Codepoint(comptime Type: type, input: Type) UTF16Replacement { } } +pub fn toUTF8FromLatin1(allocator: std.mem.Allocator, latin1: []const u8) !?std.ArrayList(u8) { + if (bun.JSC.is_bindgen) + unreachable; + + if (!bun.FeatureFlags.use_simdutf) { + @compileError("TODO"); + } + + const is_ascii = bun.simdutf.validate.with_errors.ascii(latin1); + if (is_ascii.status == .success) { + return null; + } + + var list = try std.ArrayList(u8).initCapacity(allocator, latin1.len); + if (is_ascii.count > 0) + @memcpy(list.items.ptr, latin1.ptr, is_ascii.count); + list.items.len = is_ascii.count; + std.debug.assert(is_ascii.count < latin1.len); + return try allocateLatin1IntoUTF8WithList(list, is_ascii.count, []const u8, latin1); +} + pub fn toUTF8AllocWithType(allocator: std.mem.Allocator, comptime Type: type, utf16: Type) ![]u8 { if (bun.FeatureFlags.use_simdutf and comptime Type == []const u16) { const length = bun.simdutf.length.utf8.from.utf16.le(utf16); @@ -2640,6 +2662,9 @@ pub const AsciiU16Vector = std.meta.Vector(ascii_u16_vector_size, u16); pub const max_4_ascii = @splat(4, @as(u8, 127)); pub fn isAllASCII(slice: []const u8) bool { var remaining = slice; + if (comptime bun.FeatureFlags.use_simdutf) { + return bun.simdutf.validate.ascii(slice); + } // The NEON SIMD unit is 128-bit wide and includes 16 128-bit registers that can be used as 32 64-bit registers if (comptime Environment.enableSIMD) { diff --git a/test/bun.js/non-english-import.test.ts b/test/bun.js/non-english-import.test.ts new file mode 100644 index 000000000..c311a8caf --- /dev/null +++ b/test/bun.js/non-english-import.test.ts @@ -0,0 +1,26 @@ +import { expect, test } from "bun:test"; + +test("non-english import works (utf16)", async () => { + { + const { default: value } = await import("./not-english-食物.js"); + expect(value).toBe(42); + } + { + const dynamic = "./not-english-食物.js"; + const { default: value } = await import(dynamic); + expect(value).toBe(42); + } +}); + +test("non-english import works (latin1)", async () => { + { + const { default: value } = await import("./not-english-àⒸ.js"); + expect(value).toBe(42); + } + + { + const dynamic = "./not-english-àⒸ.js"; + const { default: value } = await import(dynamic); + expect(value).toBe(42); + } +}); diff --git a/test/bun.js/not-english-àⒸ.js b/test/bun.js/not-english-àⒸ.js new file mode 100644 index 000000000..7a4e8a723 --- /dev/null +++ b/test/bun.js/not-english-àⒸ.js @@ -0,0 +1 @@ +export default 42; diff --git a/test/bun.js/not-english-食物.ts b/test/bun.js/not-english-食物.ts new file mode 100644 index 000000000..7a4e8a723 --- /dev/null +++ b/test/bun.js/not-english-食物.ts @@ -0,0 +1 @@ +export default 42; |