diff options
author | 2023-10-17 19:42:37 -0700 | |
---|---|---|
committer | 2023-10-17 19:42:37 -0700 | |
commit | cb5c4c71c866362dce24eff79251fed6add53e9f (patch) | |
tree | 5635cf21140ff2eac14539316f7c6d6704925bd3 /src/codegen | |
parent | bf12268274faac1a38d33007be7a48af9e570761 (diff) | |
download | bun-jarred/prepare-for-libuv.tar.gz bun-jarred/prepare-for-libuv.tar.zst bun-jarred/prepare-for-libuv.zip |
Diffstat (limited to 'src/codegen')
-rw-r--r-- | src/codegen/builtin-parser.ts | 187 | ||||
-rw-r--r-- | src/codegen/bundle-functions.ts | 627 | ||||
-rw-r--r-- | src/codegen/bundle-modules.ts | 398 | ||||
-rw-r--r-- | src/codegen/client-js.ts | 47 | ||||
-rw-r--r-- | src/codegen/create-hash-table.ts | 10 | ||||
-rw-r--r-- | src/codegen/generate-jssink.ts | 4 | ||||
-rw-r--r-- | src/codegen/helpers.ts | 64 | ||||
-rw-r--r-- | src/codegen/internal-module-registry-scanner.ts | 92 | ||||
-rw-r--r-- | src/codegen/replacements.ts | 179 |
9 files changed, 1601 insertions, 7 deletions
diff --git a/src/codegen/builtin-parser.ts b/src/codegen/builtin-parser.ts new file mode 100644 index 000000000..4e35f13dd --- /dev/null +++ b/src/codegen/builtin-parser.ts @@ -0,0 +1,187 @@ +import { applyReplacements } from "./replacements"; + +/** + * Slices a string until it hits a }, but keeping in mind JS comments, + * regex, template literals, comments, and matching { + * + * Used to extract function bodies without parsing the code. + * + * If you pass replace=true, it will run replacements on the code + */ +export function sliceSourceCode( + contents: string, + replace: boolean, + replaceRequire?: (specifier: string) => string, + endOnComma = false, +): { result: string; rest: string } { + let bracketCount = 0; + let i = 0; + let result = ""; + while (contents.length) { + const match = contents.match( + endOnComma && bracketCount <= 1 + ? /((?:[(,=;:{]|return|\=\>)\s*)\/[^\/\*]|\/\*|\/\/|['"}`\),]|(?<!\$)\brequire\(|(\$assert\(|\$debug\()/ + : /((?:[(,=;:{]|return|\=\>)\s*)\/[^\/\*]|\/\*|\/\/|['"}`\)]|(?<!\$)\brequire\(|(\$assert\(|\$debug\()/, + ); + i = match?.index ?? contents.length; + if (match?.[2]) { + i += match[2].length - 1; + } + bracketCount += [...contents.slice(0, i).matchAll(/[({]/g)].length; + const chunk = replace ? applyReplacements(contents, i) : [contents.slice(0, i), contents.slice(i)]; + result += chunk[0]; + contents = chunk[1] as string; + if (chunk[2]) { + continue; + } + if (match?.[1]) { + if (match[1].startsWith("(") || match[1].startsWith(",")) { + bracketCount++; + } + const { result: result2, rest } = sliceRegularExpressionSourceCode( + contents.slice(match?.[1].length + 1), + replace, + ); + result += contents.slice(0, match?.[1].length + 1) + result2; + contents = rest; + continue; + } + if (!contents.length) break; + if (contents.startsWith("/*")) { + i = contents.slice(2).indexOf("*/") + 2; + } else if (contents.startsWith("//")) { + i = contents.slice(2).indexOf("\n") + 2; + } else if (contents.startsWith("'")) { + i = getEndOfBasicString(contents.slice(1), "'") + 2; + } else if (contents.startsWith('"')) { + i = getEndOfBasicString(contents.slice(1), '"') + 2; + } else if (contents.startsWith("`")) { + const { result: result2, rest } = sliceTemplateLiteralSourceCode(contents.slice(1), replace); + result += "`" + result2; + contents = rest; + i = 0; + continue; + } else if (contents.startsWith("}")) { + bracketCount--; + if (bracketCount <= 0) { + result += "}"; + contents = contents.slice(1); + break; + } + i = 1; + } else if (contents.startsWith(")")) { + bracketCount--; + if (bracketCount <= 0) { + result += ")"; + contents = contents.slice(1); + break; + } + i = 1; + } else if (endOnComma && contents.startsWith(",")) { + if (bracketCount <= 1) { + contents = contents.slice(1); + // if the next non-whitespace character is ), we will treat it like a ) + let match = contents.match(/^\s*\)/); + if (match) { + contents = contents.slice(match[0].length); + result += ")"; + } else { + result += ","; + } + break; + } + i = 1; + } else if (contents.startsWith("require(")) { + if (replaceRequire) { + const staticSpecifier = contents.match(/\brequire\(["']([^"']+)["']\)/); + if (staticSpecifier) { + const specifier = staticSpecifier[1]; + result += replaceRequire(specifier); + contents = contents.slice(staticSpecifier[0].length); + continue; + } else { + throw new Error("Require with dynamic specifier not supported here."); + } + } else { + throw new Error("Require is not supported here."); + } + } else { + console.error(contents.slice(0, 100)); + throw new Error("TODO"); + } + result += contents.slice(0, i); + contents = contents.slice(i); + } + + return { result, rest: contents }; +} + +function sliceTemplateLiteralSourceCode(contents: string, replace: boolean) { + let i = 0; + let result = ""; + while (contents.length) { + i = contents.match(/`|\${/)!.index!; + result += contents.slice(0, i); + contents = contents.slice(i); + if (!contents.length) break; + if (contents.startsWith("`")) { + result += "`"; + contents = contents.slice(1); + break; + } else if (contents.startsWith("$")) { + const { result: result2, rest } = sliceSourceCode(contents.slice(1), replace); + result += "$" + result2; + contents = rest; + continue; + } else { + throw new Error("TODO"); + } + } + + return { result, rest: contents }; +} + +function sliceRegularExpressionSourceCode(contents: string, replace: boolean) { + let i = 0; + let result = ""; + while (contents.length) { + i = contents.match(/\/(?!\/|\*)|\\|\[/)!.index!; + result += contents.slice(0, i); + contents = contents.slice(i); + if (!contents.length) break; + if (contents.startsWith("/")) { + result += "/"; + contents = contents.slice(1); + break; + } else if (contents.startsWith("\\")) { + result += "\\"; + contents = contents.slice(1); + if (!contents.length) break; + result += contents[0]; + contents = contents.slice(1); + continue; + } else if (contents.startsWith("[")) { + let end = contents.match(/(?<!\\)]/)!.index!; + result += contents.slice(0, end + 1); + contents = contents.slice(end + 1); + continue; + } else { + throw new Error("TODO"); + } + } + + return { result, rest: contents }; +} + +function getEndOfBasicString(str: string, quote: "'" | '"') { + let i = 0; + while (i < str.length) { + if (str[i] === "\\") { + i++; + } else if (str[i] === quote) { + return i; + } + i++; + } + throw new Error("String did not end"); +} diff --git a/src/codegen/bundle-functions.ts b/src/codegen/bundle-functions.ts new file mode 100644 index 000000000..ebdf0d748 --- /dev/null +++ b/src/codegen/bundle-functions.ts @@ -0,0 +1,627 @@ +import { existsSync, mkdirSync, readdirSync, rmSync } from "fs"; +import path from "path"; +import { sliceSourceCode } from "./builtin-parser"; +import { applyGlobalReplacements, define } from "./replacements"; +import { cap, fmtCPPString, low, writeIfNotChanged } from "./helpers"; +import { createInternalModuleRegistry } from "./internal-module-registry-scanner"; + +console.log("Bundling Bun builtin functions..."); + +const PARALLEL = false; +const KEEP_TMP = true; + +const CMAKE_BUILD_ROOT = process.argv[2]; + +if (!CMAKE_BUILD_ROOT) { + console.error("Usage: bun bundle-functions.ts <CMAKE_WORK_DIR>"); + process.exit(1); +} + +const SRC_DIR = path.join(import.meta.dir, "../js/builtins"); +const OUT_DIR = path.join(CMAKE_BUILD_ROOT, "./js"); +const TMP_DIR = path.join(CMAKE_BUILD_ROOT, "./tmp"); + +const { + // + requireTransformer, +} = createInternalModuleRegistry(path.join(import.meta.dir, "../js")); + +if (existsSync(TMP_DIR)) rmSync(TMP_DIR, { recursive: true }); +mkdirSync(TMP_DIR, { recursive: true }); + +interface ParsedBuiltin { + name: string; + params: string[]; + directives: Record<string, any>; + source: string; + async: boolean; +} +interface BundledBuiltin { + name: string; + directives: Record<string, any>; + isGetter: boolean; + constructAbility: string; + constructKind: string; + isLinkTimeConstant: boolean; + intrinsic: string; + overriddenName: string; + source: string; + params: string[]; + visibility: string; +} + +/** + * Source .ts file --> Array<bundled js function code> + */ +async function processFileSplit(filename: string): Promise<{ functions: BundledBuiltin[]; internal: boolean }> { + const basename = path.basename(filename, ".ts"); + let contents = await Bun.file(filename).text(); + + contents = applyGlobalReplacements(contents); + + // first approach doesnt work perfectly because we actually need to split each function declaration + // and then compile those separately + + const consumeWhitespace = /^\s*/; + const consumeTopLevelContent = /^(\/\*|\/\/|type|import|interface|\$|export (?:async )?function|(?:async )?function)/; + const consumeEndOfType = /;|.(?=export|type|interface|\$|\/\/|\/\*|function)/; + + const functions: ParsedBuiltin[] = []; + let directives: Record<string, any> = {}; + const bundledFunctions: BundledBuiltin[] = []; + let internal = false; + + while (contents.length) { + contents = contents.replace(consumeWhitespace, ""); + if (!contents.length) break; + const match = contents.match(consumeTopLevelContent); + if (!match) { + throw new SyntaxError("Could not process input:\n" + contents.slice(0, contents.indexOf("\n"))); + } + contents = contents.slice(match.index!); + if (match[1] === "import") { + // TODO: we may want to do stuff with these + const i = contents.indexOf(";"); + contents = contents.slice(i + 1); + } else if (match[1] === "/*") { + const i = contents.indexOf("*/") + 2; + internal ||= contents.slice(0, i).includes("@internal"); + contents = contents.slice(i); + } else if (match[1] === "//") { + const i = contents.indexOf("\n") + 1; + internal ||= contents.slice(0, i).includes("@internal"); + contents = contents.slice(i); + } else if (match[1] === "type" || match[1] === "export type") { + const i = contents.search(consumeEndOfType); + contents = contents.slice(i + 1); + } else if (match[1] === "interface") { + contents = sliceSourceCode(contents, false).rest; + } else if (match[1] === "$") { + const directive = contents.match(/^\$([a-zA-Z0-9]+)(?:\s*=\s*([^\n]+?))?\s*;?\n/); + if (!directive) { + throw new SyntaxError("Could not parse directive:\n" + contents.slice(0, contents.indexOf("\n"))); + } + const name = directive[1]; + let value; + try { + value = directive[2] ? JSON.parse(directive[2]) : true; + } catch (error) { + throw new SyntaxError("Could not parse directive value " + directive[2] + " (must be JSON parsable)"); + } + if (name === "constructor") { + directives.ConstructAbility = "CanConstruct"; + } else if (name === "nakedConstructor") { + directives.ConstructAbility = "CanConstruct"; + directives.ConstructKind = "Naked"; + } else { + directives[name] = value; + } + contents = contents.slice(directive[0].length); + } else if (match[1] === "export function" || match[1] === "export async function") { + const declaration = contents.match( + /^export\s+(async\s+)?function\s+([a-zA-Z0-9]+)\s*\(([^)]*)\)(?:\s*:\s*([^{\n]+))?\s*{?/, + ); + if (!declaration) + throw new SyntaxError("Could not parse function declaration:\n" + contents.slice(0, contents.indexOf("\n"))); + + const async = !!declaration[1]; + const name = declaration[2]; + const paramString = declaration[3]; + const params = + paramString.trim().length === 0 ? [] : paramString.split(",").map(x => x.replace(/:.+$/, "").trim()); + if (params[0] === "this") { + params.shift(); + } + + const { result, rest } = sliceSourceCode(contents.slice(declaration[0].length - 1), true, x => + requireTransformer(x, SRC_DIR + "/" + basename), + ); + + functions.push({ + name, + params, + directives, + source: result.trim().slice(2, -1), + async, + }); + contents = rest; + directives = {}; + } else if (match[1] === "function" || match[1] === "async function") { + const fnname = contents.match(/^function ([a-zA-Z0-9]+)\(([^)]*)\)(?:\s*:\s*([^{\n]+))?\s*{?/)![1]; + throw new SyntaxError("All top level functions must be exported: " + fnname); + } else { + throw new Error("TODO: parse " + match[1]); + } + } + + for (const fn of functions) { + const tmpFile = path.join(TMP_DIR, `${basename}.${fn.name}.ts`); + + // not sure if this optimization works properly in jsc builtins + // const useThis = fn.usesThis; + const useThis = true; + + // TODO: we should use format=IIFE so we could bundle imports and extra functions. + await Bun.write( + tmpFile, + `// @ts-nocheck +// GENERATED TEMP FILE - DO NOT EDIT +// Sourced from ${path.relative(TMP_DIR, filename)} + +// do not allow the bundler to rename a symbol to $ +($); + +$$capture_start$$(${fn.async ? "async " : ""}${ + useThis + ? `function(${fn.params.join(",")})` + : `${fn.params.length === 1 ? fn.params[0] : `(${fn.params.join(",")})`}=>` + } {${fn.source}}).$$capture_end$$; +`, + ); + await Bun.sleep(1); + const build = await Bun.build({ + entrypoints: [tmpFile], + define, + minify: { syntax: true, whitespace: false }, + }); + if (!build.success) { + throw new AggregateError(build.logs, "Failed bundling builtin function " + fn.name + " from " + basename + ".ts"); + } + if (build.outputs.length !== 1) { + throw new Error("expected one output"); + } + const output = await build.outputs[0].text(); + const captured = output.match(/\$\$capture_start\$\$([\s\S]+)\.\$\$capture_end\$\$/)![1]; + const finalReplacement = + (fn.directives.sloppy ? captured : captured.replace(/function\s*\(.*?\)\s*{/, '$&"use strict";')) + .replace(/^\((async )?function\(/, "($1function (") + // .replace(/__intrinsic__lazy\(/g, "globalThis[globalThis.Symbol.for('Bun.lazy')](") + .replace(/__intrinsic__/g, "@") + "\n"; + + bundledFunctions.push({ + name: fn.name, + directives: fn.directives, + source: finalReplacement, + params: fn.params, + visibility: fn.directives.visibility ?? (fn.directives.linkTimeConstant ? "Private" : "Public"), + isGetter: !!fn.directives.getter, + constructAbility: fn.directives.ConstructAbility ?? "CannotConstruct", + constructKind: fn.directives.ConstructKind ?? "None", + isLinkTimeConstant: !!fn.directives.linkTimeConstant, + intrinsic: fn.directives.intrinsic ?? "NoIntrinsic", + overriddenName: fn.directives.getter + ? `"get ${fn.name}"_s` + : fn.directives.overriddenName + ? `"${fn.directives.overriddenName}"_s` + : "ASCIILiteral()", + }); + } + + return { + functions: bundledFunctions.sort((a, b) => a.name.localeCompare(b.name)), + internal, + }; +} + +const filesToProcess = readdirSync(SRC_DIR) + .filter(x => x.endsWith(".ts") && !x.endsWith(".d.ts")) + .sort(); + +const files: Array<{ basename: string; functions: BundledBuiltin[]; internal: boolean }> = []; +async function processFile(x: string) { + const basename = path.basename(x, ".ts"); + try { + files.push({ + basename, + ...(await processFileSplit(path.join(SRC_DIR, x))), + }); + } catch (error) { + console.error("Failed to process file: " + basename + ".ts"); + console.error(error); + process.exit(1); + } +} + +// Bun seems to crash if this is parallelized, :( +if (PARALLEL) { + await Promise.all(filesToProcess.map(processFile)); +} else { + for (const x of filesToProcess) { + await processFile(x); + } +} + +// C++ codegen +let bundledCPP = `// Generated by \`bun src/js/builtins/codegen\` +// Do not edit by hand. +namespace Zig { class GlobalObject; } +#include "root.h" +#include "config.h" +#include "JSDOMGlobalObject.h" +#include "WebCoreJSClientData.h" +#include <JavaScriptCore/JSObjectInlines.h> + +namespace WebCore { + +`; + +for (const { basename, functions } of files) { + bundledCPP += `/* ${basename}.ts */\n`; + const lowerBasename = low(basename); + for (const fn of functions) { + const name = `${lowerBasename}${cap(fn.name)}Code`; + bundledCPP += `// ${fn.name} +const JSC::ConstructAbility s_${name}ConstructAbility = JSC::ConstructAbility::${fn.constructAbility}; +const JSC::ConstructorKind s_${name}ConstructorKind = JSC::ConstructorKind::${fn.constructKind}; +const JSC::ImplementationVisibility s_${name}ImplementationVisibility = JSC::ImplementationVisibility::${fn.visibility}; +const int s_${name}Length = ${fn.source.length}; +static const JSC::Intrinsic s_${name}Intrinsic = JSC::NoIntrinsic; +const char* const s_${name} = ${fmtCPPString(fn.source)}; + +`; + } + bundledCPP += `#define DEFINE_BUILTIN_GENERATOR(codeName, functionName, overriddenName, argumentCount) \\ +JSC::FunctionExecutable* codeName##Generator(JSC::VM& vm) \\ +{\\ + JSVMClientData* clientData = static_cast<JSVMClientData*>(vm.clientData); \\ + return clientData->builtinFunctions().${lowerBasename}Builtins().codeName##Executable()->link(vm, nullptr, clientData->builtinFunctions().${lowerBasename}Builtins().codeName##Source(), std::nullopt, s_##codeName##Intrinsic); \\ +} +WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(DEFINE_BUILTIN_GENERATOR) +#undef DEFINE_BUILTIN_GENERATOR + +`; +} + +bundledCPP += ` + +JSBuiltinInternalFunctions::JSBuiltinInternalFunctions(JSC::VM& vm) + : m_vm(vm) +`; + +for (const { basename, internal } of files) { + if (internal) { + bundledCPP += ` , m_${low(basename)}(vm)\n`; + } +} + +bundledCPP += ` +{ + UNUSED_PARAM(vm); +} + +template<typename Visitor> +void JSBuiltinInternalFunctions::visit(Visitor& visitor) +{ +`; +for (const { basename, internal } of files) { + if (internal) bundledCPP += ` m_${low(basename)}.visit(visitor);\n`; +} + +bundledCPP += ` + UNUSED_PARAM(visitor); +} + +template void JSBuiltinInternalFunctions::visit(AbstractSlotVisitor&); +template void JSBuiltinInternalFunctions::visit(SlotVisitor&); + +SUPPRESS_ASAN void JSBuiltinInternalFunctions::initialize(Zig::GlobalObject& globalObject) +{ + UNUSED_PARAM(globalObject); +`; + +for (const { basename, internal } of files) { + if (internal) { + bundledCPP += ` m_${low(basename)}.init(globalObject);\n`; + } +} + +bundledCPP += ` + JSVMClientData& clientData = *static_cast<JSVMClientData*>(m_vm.clientData); + Zig::GlobalObject::GlobalPropertyInfo staticGlobals[] = { +`; + +for (const { basename, internal } of files) { + if (internal) { + bundledCPP += `#define DECLARE_GLOBAL_STATIC(name) \\ + Zig::GlobalObject::GlobalPropertyInfo( \\ + clientData.builtinFunctions().${low(basename)}Builtins().name##PrivateName(), ${low( + basename, + )}().m_##name##Function.get() , JSC::PropertyAttribute::DontDelete | JSC::PropertyAttribute::ReadOnly), + WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(DECLARE_GLOBAL_STATIC) + #undef DECLARE_GLOBAL_STATIC + `; + } +} + +bundledCPP += ` + }; + globalObject.addStaticGlobals(staticGlobals, std::size(staticGlobals)); + UNUSED_PARAM(clientData); +} + +} // namespace WebCore +`; + +// C++ Header codegen +let bundledHeader = `// Generated by \`bun src/js/builtins/codegen\` +// Do not edit by hand. +#pragma once +namespace Zig { class GlobalObject; } +#include "root.h" +#include <JavaScriptCore/BuiltinUtils.h> +#include <JavaScriptCore/Identifier.h> +#include <JavaScriptCore/JSFunction.h> +#include <JavaScriptCore/UnlinkedFunctionExecutable.h> +#include <JavaScriptCore/VM.h> +#include <JavaScriptCore/WeakInlines.h> + +namespace JSC { +class FunctionExecutable; +} + +namespace WebCore { +`; +for (const { basename, functions, internal } of files) { + bundledHeader += `/* ${basename}.ts */ +`; + const lowerBasename = low(basename); + + for (const fn of functions) { + const name = `${lowerBasename}${cap(fn.name)}Code`; + bundledHeader += `// ${fn.name} +#define WEBCORE_BUILTIN_${basename.toUpperCase()}_${fn.name.toUpperCase()} 1 +extern const char* const s_${name}; +extern const int s_${name}Length; +extern const JSC::ConstructAbility s_${name}ConstructAbility; +extern const JSC::ConstructorKind s_${name}ConstructorKind; +extern const JSC::ImplementationVisibility s_${name}ImplementationVisibility; + +`; + } + bundledHeader += `#define WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_DATA(macro) \\\n`; + for (const fn of functions) { + bundledHeader += ` macro(${fn.name}, ${lowerBasename}${cap(fn.name)}, ${fn.params.length}) \\\n`; + } + bundledHeader += "\n"; + bundledHeader += `#define WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(macro) \\\n`; + for (const fn of functions) { + const name = `${lowerBasename}${cap(fn.name)}Code`; + bundledHeader += ` macro(${name}, ${fn.name}, ${fn.overriddenName}, s_${name}Length) \\\n`; + } + bundledHeader += "\n"; + bundledHeader += `#define WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(macro) \\\n`; + for (const fn of functions) { + bundledHeader += ` macro(${fn.name}) \\\n`; + } + bundledHeader += ` +#define DECLARE_BUILTIN_GENERATOR(codeName, functionName, overriddenName, argumentCount) \\ + JSC::FunctionExecutable* codeName##Generator(JSC::VM&); + +WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(DECLARE_BUILTIN_GENERATOR) +#undef DECLARE_BUILTIN_GENERATOR + +class ${basename}BuiltinsWrapper : private JSC::WeakHandleOwner { +public: + explicit ${basename}BuiltinsWrapper(JSC::VM& vm) + : m_vm(vm) + WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(INITIALIZE_BUILTIN_NAMES) +#define INITIALIZE_BUILTIN_SOURCE_MEMBERS(name, functionName, overriddenName, length) , m_##name##Source(JSC::makeSource(StringImpl::createWithoutCopying(s_##name, length), { }, JSC::SourceTaintedOrigin::Untainted)) + WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(INITIALIZE_BUILTIN_SOURCE_MEMBERS) +#undef INITIALIZE_BUILTIN_SOURCE_MEMBERS + { + } + +#define EXPOSE_BUILTIN_EXECUTABLES(name, functionName, overriddenName, length) \\ + JSC::UnlinkedFunctionExecutable* name##Executable(); \\ + const JSC::SourceCode& name##Source() const { return m_##name##Source; } + WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(EXPOSE_BUILTIN_EXECUTABLES) +#undef EXPOSE_BUILTIN_EXECUTABLES + + WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(DECLARE_BUILTIN_IDENTIFIER_ACCESSOR) + + void exportNames(); + +private: + JSC::VM& m_vm; + + WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(DECLARE_BUILTIN_NAMES) + +#define DECLARE_BUILTIN_SOURCE_MEMBERS(name, functionName, overriddenName, length) \\ + JSC::SourceCode m_##name##Source;\\ + JSC::Weak<JSC::UnlinkedFunctionExecutable> m_##name##Executable; + WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(DECLARE_BUILTIN_SOURCE_MEMBERS) +#undef DECLARE_BUILTIN_SOURCE_MEMBERS + +}; + +#define DEFINE_BUILTIN_EXECUTABLES(name, functionName, overriddenName, length) \\ +inline JSC::UnlinkedFunctionExecutable* ${basename}BuiltinsWrapper::name##Executable() \\ +{\\ + if (!m_##name##Executable) {\\ + JSC::Identifier executableName = functionName##PublicName();\\ + if (overriddenName)\\ + executableName = JSC::Identifier::fromString(m_vm, overriddenName);\\ + m_##name##Executable = JSC::Weak<JSC::UnlinkedFunctionExecutable>(JSC::createBuiltinExecutable(m_vm, m_##name##Source, executableName, s_##name##ImplementationVisibility, s_##name##ConstructorKind, s_##name##ConstructAbility), this, &m_##name##Executable);\\ + }\\ + return m_##name##Executable.get();\\ +} +WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(DEFINE_BUILTIN_EXECUTABLES) +#undef DEFINE_BUILTIN_EXECUTABLES + +inline void ${basename}BuiltinsWrapper::exportNames() +{ +#define EXPORT_FUNCTION_NAME(name) m_vm.propertyNames->appendExternalName(name##PublicName(), name##PrivateName()); + WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(EXPORT_FUNCTION_NAME) +#undef EXPORT_FUNCTION_NAME +} +`; + + if (internal) { + bundledHeader += `class ${basename}BuiltinFunctions { +public: + explicit ${basename}BuiltinFunctions(JSC::VM& vm) : m_vm(vm) { } + + void init(JSC::JSGlobalObject&); + template<typename Visitor> void visit(Visitor&); + +public: + JSC::VM& m_vm; + +#define DECLARE_BUILTIN_SOURCE_MEMBERS(functionName) \\ + JSC::WriteBarrier<JSC::JSFunction> m_##functionName##Function; + WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(DECLARE_BUILTIN_SOURCE_MEMBERS) +#undef DECLARE_BUILTIN_SOURCE_MEMBERS +}; + +inline void ${basename}BuiltinFunctions::init(JSC::JSGlobalObject& globalObject) +{ +#define EXPORT_FUNCTION(codeName, functionName, overriddenName, length) \\ + m_##functionName##Function.set(m_vm, &globalObject, JSC::JSFunction::create(m_vm, codeName##Generator(m_vm), &globalObject)); + WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(EXPORT_FUNCTION) +#undef EXPORT_FUNCTION +} + +template<typename Visitor> +inline void ${basename}BuiltinFunctions::visit(Visitor& visitor) +{ +#define VISIT_FUNCTION(name) visitor.append(m_##name##Function); + WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(VISIT_FUNCTION) +#undef VISIT_FUNCTION +} + +template void ${basename}BuiltinFunctions::visit(JSC::AbstractSlotVisitor&); +template void ${basename}BuiltinFunctions::visit(JSC::SlotVisitor&); + `; + } +} +bundledHeader += `class JSBuiltinFunctions { +public: + explicit JSBuiltinFunctions(JSC::VM& vm) + : m_vm(vm) +`; + +for (const { basename } of files) { + bundledHeader += ` , m_${low(basename)}Builtins(m_vm)\n`; +} + +bundledHeader += ` + { +`; + +for (const { basename, internal } of files) { + if (internal) { + bundledHeader += ` m_${low(basename)}Builtins.exportNames();\n`; + } +} + +bundledHeader += ` } +`; + +for (const { basename } of files) { + bundledHeader += ` ${basename}BuiltinsWrapper& ${low(basename)}Builtins() { return m_${low( + basename, + )}Builtins; }\n`; +} + +bundledHeader += ` +private: + JSC::VM& m_vm; +`; + +for (const { basename } of files) { + bundledHeader += ` ${basename}BuiltinsWrapper m_${low(basename)}Builtins;\n`; +} + +bundledHeader += `; +}; + +class JSBuiltinInternalFunctions { +public: + explicit JSBuiltinInternalFunctions(JSC::VM&); + + template<typename Visitor> void visit(Visitor&); + void initialize(Zig::GlobalObject&); +`; + +for (const { basename, internal } of files) { + if (internal) { + bundledHeader += ` ${basename}BuiltinFunctions& ${low(basename)}() { return m_${low(basename)}; }\n`; + } +} + +bundledHeader += ` +private: + JSC::VM& m_vm; +`; + +for (const { basename, internal } of files) { + if (internal) { + bundledHeader += ` ${basename}BuiltinFunctions m_${low(basename)};\n`; + } +} + +bundledHeader += ` +}; + +} // namespace WebCore +`; + +writeIfNotChanged(path.join(OUT_DIR, "WebCoreJSBuiltins.h"), bundledHeader); +writeIfNotChanged(path.join(OUT_DIR, "WebCoreJSBuiltins.cpp"), bundledCPP); + +// Generate TS types +let dts = `// Generated by \`bun src/js/builtins/codegen\` +// Do not edit by hand. +type RemoveThis<F> = F extends (this: infer T, ...args: infer A) => infer R ? (...args: A) => R : F; +`; + +for (const { basename, functions, internal } of files) { + if (internal) { + dts += `\n// ${basename}.ts\n`; + for (const fn of functions) { + dts += `declare const \$${fn.name}: RemoveThis<typeof import("${path.relative( + OUT_DIR, + path.join(SRC_DIR, basename), + )}")[${JSON.stringify(fn.name)}]>;\n`; + } + } +} + +writeIfNotChanged(path.join(OUT_DIR, "WebCoreJSBuiltins.d.ts"), dts); + +const totalJSSize = files.reduce( + (acc, { functions }) => acc + functions.reduce((acc, fn) => acc + fn.source.length, 0), + 0, +); + +if (!KEEP_TMP) { + await rmSync(TMP_DIR, { recursive: true }); +} + +console.log( + `Embedded JS size: %s bytes (across %s functions, %s files)`, + totalJSSize, + files.reduce((acc, { functions }) => acc + functions.length, 0), + files.length, +); +console.log(`[${performance.now().toFixed(1)}ms]`); diff --git a/src/codegen/bundle-modules.ts b/src/codegen/bundle-modules.ts new file mode 100644 index 000000000..00865b17b --- /dev/null +++ b/src/codegen/bundle-modules.ts @@ -0,0 +1,398 @@ +// This script is run when you change anything in src/js/* +import fs from "fs"; +import path from "path"; +import { sliceSourceCode } from "./builtin-parser"; +import { cap, checkAscii, fmtCPPString, readdirRecursive, resolveSyncOrNull, writeIfNotChanged } from "./helpers"; +import { createAssertClientJS, createLogClientJS } from "./client-js"; +import { builtinModules } from "node:module"; +import { BuildConfig } from "bun"; +import { define } from "./replacements"; +import { createInternalModuleRegistry } from "./internal-module-registry-scanner"; + +const BASE = path.join(import.meta.dir, "../js"); +const CMAKE_BUILD_ROOT = process.argv[2]; + +if (!CMAKE_BUILD_ROOT) { + console.error("Usage: bun bundle-modules.ts <CMAKE_WORK_DIR>"); + process.exit(1); +} + +const TMP_DIR = path.join(CMAKE_BUILD_ROOT, "tmp"); +const OUT_DIR = path.join(CMAKE_BUILD_ROOT, "js"); + +const t = new Bun.Transpiler({ loader: "tsx" }); + +let start = performance.now(); +function mark(log: string) { + const now = performance.now(); + console.log(`${log} (${(now - start).toFixed(0)}ms)`); + start = now; +} + +const { + // + moduleList, + nativeModuleIds, + nativeModuleEnumToId, + nativeModuleEnums, + requireTransformer, +} = createInternalModuleRegistry(BASE); + +// Preprocess builtins +const bundledEntryPoints: string[] = []; +for (let i = 0; i < moduleList.length; i++) { + try { + let input = fs.readFileSync(path.join(BASE, moduleList[i]), "utf8"); + + const scannedImports = t.scanImports(input); + for (const imp of scannedImports) { + if (imp.kind === "import-statement") { + var isBuiltin = true; + try { + if (!builtinModules.includes(imp.path)) { + requireTransformer(imp.path, moduleList[i]); + } + } catch { + isBuiltin = false; + } + if (isBuiltin) { + throw new Error(`Cannot use ESM import on builtin modules. Use require("${imp.path}") instead.`); + } + } + } + + let importStatements: string[] = []; + + const processed = sliceSourceCode( + "{" + + input + .replace( + /\bimport(\s*type)?\s*(\{[^}]*\}|(\*\s*as)?\s[a-zA-Z0-9_$]+)\s*from\s*['"][^'"]+['"]/g, + stmt => (importStatements.push(stmt), ""), + ) + .replace(/export\s*{\s*}\s*;/g, ""), + true, + x => requireTransformer(x, moduleList[i]), + ); + let fileToTranspile = `// @ts-nocheck +// GENERATED TEMP FILE - DO NOT EDIT +// Sourced from src/js/${moduleList[i]} +${importStatements.join("\n")} + +${processed.result.slice(1).trim()} +$$EXPORT$$(__intrinsic__exports).$$EXPORT_END$$; +`; + + // Attempt to optimize "$exports = ..." to a variableless return + // otherwise, declare $exports so it works. + let exportOptimization = false; + fileToTranspile = fileToTranspile.replace( + /__intrinsic__exports\s*=\s*(.*|.*\{[^\}]*}|.*\([^\)]*\))\n+\s*\$\$EXPORT\$\$\(__intrinsic__exports\).\$\$EXPORT_END\$\$;/, + (_, a) => { + exportOptimization = true; + return "$$EXPORT$$(" + a.replace(/;$/, "") + ").$$EXPORT_END$$;"; + }, + ); + if (!exportOptimization) { + fileToTranspile = `var $;` + fileToTranspile.replaceAll("__intrinsic__exports", "$"); + } + const outputPath = path.join(TMP_DIR, moduleList[i].slice(0, -3) + ".ts"); + fs.mkdirSync(path.dirname(outputPath), { recursive: true }); + fs.writeFileSync(outputPath, fileToTranspile); + bundledEntryPoints.push(outputPath); + } catch (error) { + console.error(error); + console.error(`While processing: ${moduleList[i]}`); + process.exit(1); + } +} + +mark("Preprocess modules"); + +const config = ({ platform, debug }: { platform: string; debug?: boolean }) => + ({ + entrypoints: bundledEntryPoints, + // Whitespace and identifiers are not minified to give better error messages when an error happens in our builtins + minify: { syntax: !debug, whitespace: false }, + root: TMP_DIR, + target: "bun", + external: builtinModules, + define: { + ...define, + IS_BUN_DEVELOPMENT: String(!!debug), + __intrinsic__debug: debug ? "$debug_log_enabled" : "false", + "process.platform": JSON.stringify(platform), + }, + } satisfies BuildConfig); +const bundled_dev = await Bun.build(config({ platform: process.platform, debug: true })); +const bundled_linux = await Bun.build(config({ platform: "linux" })); +const bundled_darwin = await Bun.build(config({ platform: "darwin" })); +const bundled_win32 = await Bun.build(config({ platform: "win32" })); +for (const bundled of [bundled_dev, bundled_linux, bundled_darwin, bundled_win32]) { + if (!bundled.success) { + console.error(bundled.logs); + process.exit(1); + } +} + +mark("Bundle modules"); + +const bundledOutputs = { + host: new Map(), + linux: new Map(), + darwin: new Map(), + win32: new Map(), +}; + +for (const [name, bundle, outputs] of [ + ["modules_dev", bundled_dev, bundledOutputs.host], + ["modules_linux", bundled_linux, bundledOutputs.linux], + ["modules_darwin", bundled_darwin, bundledOutputs.darwin], + ["modules_win32", bundled_win32, bundledOutputs.win32], +] as const) { + for (const file of bundle.outputs) { + const output = await file.text(); + let captured = `(function (){${output.replace("// @bun\n", "").trim()}})`; + let usesDebug = output.includes("$debug_log"); + let usesAssert = output.includes("$assert"); + captured = + captured + .replace( + `var __require = (id) => { + return import.meta.require(id); +};`, + "", + ) + .replace(/var\s*__require\s*=\s*\(?id\)?\s*=>\s*{\s*return\s*import.meta.require\(id\)\s*};?/, "") + .replace(/var __require=\(?id\)?=>import.meta.require\(id\);?/, "") + .replace(/\$\$EXPORT\$\$\((.*)\).\$\$EXPORT_END\$\$;/, "return $1") + .replace(/]\s*,\s*__(debug|assert)_end__\)/g, ")") + .replace(/]\s*,\s*__debug_end__\)/g, ")") + // .replace(/__intrinsic__lazy\(/g, "globalThis[globalThis.Symbol.for('Bun.lazy')](") + .replace(/import.meta.require\((.*?)\)/g, (expr, specifier) => { + try { + const str = JSON.parse(specifier); + return globalThis.requireTransformer(str, file.path); + } catch { + throw new Error( + `Builtin Bundler: import.meta.require() must be called with a string literal. Found ${specifier}. (in ${file.path}))`, + ); + } + }) + .replace(/__intrinsic__/g, "@") + "\n"; + captured = captured.replace( + /function\s*\(.*?\)\s*{/, + '$&"use strict";' + + (usesDebug + ? createLogClientJS( + file.path.replace(".js", ""), + idToPublicSpecifierOrEnumName(file.path).replace(/^node:|^bun:/, ""), + ) + : "") + + (usesAssert ? createAssertClientJS(idToPublicSpecifierOrEnumName(file.path).replace(/^node:|^bun:/, "")) : ""), + ); + const outputPath = path.join(OUT_DIR, name, file.path); + if (name === "modules_dev") { + fs.mkdirSync(path.dirname(outputPath), { recursive: true }); + fs.writeFileSync(outputPath, captured); + } + outputs.set(file.path.replace(".js", ""), captured); + } +} + +mark("Postprocesss modules"); + +function idToEnumName(id: string) { + return id + .replace(/\.[mc]?[tj]s$/, "") + .replace(/[^a-zA-Z0-9]+/g, " ") + .split(" ") + .map(x => (["jsc", "ffi", "vm", "tls", "os", "ws", "fs", "dns"].includes(x) ? x.toUpperCase() : cap(x))) + .join(""); +} + +function idToPublicSpecifierOrEnumName(id: string) { + id = id.replace(/\.[mc]?[tj]s$/, ""); + if (id.startsWith("node/")) { + return "node:" + id.slice(5).replaceAll(".", "/"); + } else if (id.startsWith("bun/")) { + return "bun:" + id.slice(4).replaceAll(".", "/"); + } else if (id.startsWith("internal/")) { + return "internal:" + id.slice(9).replaceAll(".", "/"); + } else if (id.startsWith("thirdparty/")) { + return id.slice(11).replaceAll(".", "/"); + } + return idToEnumName(id); +} + +// This is a file with a single macro that is used in defining InternalModuleRegistry.h +writeIfNotChanged( + path.join(OUT_DIR, "InternalModuleRegistry+numberOfModules.h"), + `#define BUN_INTERNAL_MODULE_COUNT ${moduleList.length}\n`, +); + +// This code slice is used in InternalModuleRegistry.h for inlining the enum. I dont think we +// actually use this enum but it's probably a good thing to include. +writeIfNotChanged( + path.join(OUT_DIR, "InternalModuleRegistry+enum.h"), + `${ + moduleList + .map((id, n) => { + return `${idToEnumName(id)} = ${n},`; + }) + .join("\n") + "\n" + } +`, +); + +// This code slice is used in InternalModuleRegistry.cpp. It defines the loading function for modules. +writeIfNotChanged( + path.join(OUT_DIR, "InternalModuleRegistry+createInternalModuleById.h"), + `// clang-format off +JSValue InternalModuleRegistry::createInternalModuleById(JSGlobalObject* globalObject, VM& vm, Field id) +{ + switch (id) { + // JS internal modules + ${moduleList + .map((id, n) => { + return `case Field::${idToEnumName(id)}: { + INTERNAL_MODULE_REGISTRY_GENERATE(globalObject, vm, "${idToPublicSpecifierOrEnumName(id)}"_s, ${JSON.stringify( + id.replace(/\.[mc]?[tj]s$/, ".js"), + )}_s, InternalModuleRegistryConstants::${idToEnumName(id)}Code, "builtin://${id + .replace(/\.[mc]?[tj]s$/, "") + .replace(/[^a-zA-Z0-9]+/g, "/")}"_s); + }`; + }) + .join("\n ")} + } +} +`, +); + +// This header is used by InternalModuleRegistry.cpp, and should only be included in that file. +// It inlines all the strings for the module IDs. +// +// We cannot use ASCIILiteral's `_s` operator for the module source code because for long +// strings it fails a constexpr assert. Instead, we do that assert in JS before we format the string +writeIfNotChanged( + path.join(OUT_DIR, "InternalModuleRegistryConstants.h"), + `// clang-format off +#pragma once + +namespace Bun { +namespace InternalModuleRegistryConstants { + +#if __APPLE__ + ${moduleList + .map( + (id, n) => + `// +static constexpr ASCIILiteral ${idToEnumName(id)}Code = ASCIILiteral::fromLiteralUnsafe(${fmtCPPString( + checkAscii(bundledOutputs.darwin.get(id.slice(0, -3))), + )}); +// +`, + ) + .join("\n")} + #elif _WIN32 + ${moduleList + .map( + (id, n) => + `// +static constexpr ASCIILiteral ${idToEnumName(id)}Code = ASCIILiteral::fromLiteralUnsafe(${fmtCPPString( + checkAscii(bundledOutputs.win32.get(id.slice(0, -3))), + )}); +// +`, + ) + .join("\n")} + #else + // Not 100% accurate, but basically inlining linux on non-windows non-mac platforms. + ${moduleList + .map( + (id, n) => + `// +static constexpr ASCIILiteral ${idToEnumName(id)}Code = ASCIILiteral::fromLiteralUnsafe(${fmtCPPString( + checkAscii(bundledOutputs.linux.get(id.slice(0, -3))), + )}); +// +`, + ) + .join("\n")} +#endif + +} +}`, +); + +// This is a generated enum for zig code (exports.zig) +writeIfNotChanged( + path.join(OUT_DIR, "ResolvedSourceTag.zig"), + `// zig fmt: off +pub const ResolvedSourceTag = enum(u32) { + // Predefined + javascript = 0, + package_json_type_module = 1, + wasm = 2, + object = 3, + file = 4, + esm = 5, + json_for_object_loader = 6, + + // Built in modules are loaded through InternalModuleRegistry by numerical ID. + // In this enum are represented as \`(1 << 9) & id\` +${moduleList.map((id, n) => ` @"${idToPublicSpecifierOrEnumName(id)}" = ${(1 << 9) | n},`).join("\n")} + // Native modules run through a different system using ESM registry. +${Object.entries(nativeModuleIds) + .map(([id, n]) => ` @"${id}" = ${(1 << 10) | n},`) + .join("\n")} +}; +`, +); + +// This is a generated enum for c++ code (headers-handwritten.h) +writeIfNotChanged( + path.join(OUT_DIR, "SyntheticModuleType.h"), + `enum SyntheticModuleType : uint32_t { + JavaScript = 0, + PackageJSONTypeModule = 1, + Wasm = 2, + ObjectModule = 3, + File = 4, + ESM = 5, + JSONForObjectLoader = 6, + + // Built in modules are loaded through InternalModuleRegistry by numerical ID. + // In this enum are represented as \`(1 << 9) & id\` + InternalModuleRegistryFlag = 1 << 9, +${moduleList.map((id, n) => ` ${idToEnumName(id)} = ${(1 << 9) | n},`).join("\n")} + + // Native modules run through the same system, but with different underlying initializers. + // They also have bit 10 set to differentiate them from JS builtins. + NativeModuleFlag = (1 << 10) | (1 << 9), +${Object.entries(nativeModuleEnumToId) + .map(([id, n]) => ` ${id} = ${(1 << 10) | n},`) + .join("\n")} +}; + +`, +); + +// This is used in ModuleLoader.cpp to link to all the headers for native modules. +writeIfNotChanged( + path.join(OUT_DIR, "NativeModuleImpl.h"), + Object.values(nativeModuleEnums) + .map(value => `#include "../../bun.js/modules/${value}Module.h"`) + .join("\n") + "\n", +); + +// This is used for debug builds for the base path for dynamic loading +// fs.writeFileSync( +// path.join(OUT_DIR, "DebugPath.h"), +// `// Using __FILE__ does not give an absolute file path +// // This is a workaround for that. +// #define BUN_DYNAMIC_JS_LOAD_PATH "${path.join(OUT_DIR, "")}" +// `, +// ); + +mark("Generate Code"); diff --git a/src/codegen/client-js.ts b/src/codegen/client-js.ts new file mode 100644 index 000000000..4dfa6acf6 --- /dev/null +++ b/src/codegen/client-js.ts @@ -0,0 +1,47 @@ +// This is the implementation for $debug +export function createLogClientJS(filepath: string, publicName: string) { + return ` +let $debug_log_enabled = ((env) => ( + // The rationale for checking all these variables is just so you don't have to exactly remember which one you set. + (env.BUN_DEBUG_ALL && env.BUN_DEBUG_ALL !== '0') + || (env.BUN_DEBUG_JS && env.BUN_DEBUG_JS !== '0') + || (env.BUN_DEBUG_${filepath + .replace(/^.*?:/, "") + .split(/[-_./]/g) + .join("_") + .toUpperCase()}) + || (env.DEBUG_${filepath + .replace(/^.*?:/, "") + .split(/[-_./]/g) + .join("_") + .toUpperCase()}) +))(Bun.env); +let $debug_pid_prefix = Bun.env.SHOW_PID === '1'; +let $debug_log = $debug_log_enabled ? (...args) => { + // warn goes to stderr without colorizing + console.warn(($debug_pid_prefix ? \`[\${process.pid}] \` : '') + (Bun.enableANSIColors ? '\\x1b[90m[${publicName}]\\x1b[0m' : '[${publicName}]'), ...args); +} : () => {}; +`; +} + +export function createAssertClientJS(publicName: string) { + return ` +let $assert = function(check, sourceString, ...message) { + if (!check) { + const prevPrepareStackTrace = Error.prepareStackTrace; + Error.prepareStackTrace = (e, stack) => { + return e.name + ': ' + e.message + '\\n' + stack.slice(1).map(x => ' at ' + x.toString()).join('\\n'); + }; + const e = new Error(sourceString); + e.stack; // materialize stack + e.name = 'AssertionError'; + Error.prepareStackTrace = prevPrepareStackTrace; + console.error('[${publicName}] ASSERTION FAILED: ' + sourceString); + if (message.length) console.warn(...message); + console.warn(e.stack.split('\\n')[1] + '\\n'); + if (Bun.env.ASSERT === 'CRASH') process.exit(0xAA); + throw e; + } +} +`; +} diff --git a/src/codegen/create-hash-table.ts b/src/codegen/create-hash-table.ts index 776abfdea..0d9b08d41 100644 --- a/src/codegen/create-hash-table.ts +++ b/src/codegen/create-hash-table.ts @@ -1,12 +1,12 @@ import { spawn } from "bun"; import path from "path"; +import { writeIfNotChanged } from "./helpers"; const input = process.argv[2]; -const out_dir = process.argv[3]; +const output = process.argv[3]; const create_hash_table = path.join(import.meta.dir, "./create_hash_table"); -console.time("Generate LUT"); const { stdout, exited } = spawn({ cmd: [create_hash_table, input], stdout: "pipe", @@ -18,6 +18,6 @@ str = str.replaceAll(/^\/\/.*$/gm, ""); str = str.replaceAll(/^#include.*$/gm, ""); str = str.replaceAll(`namespace JSC {`, ""); str = str.replaceAll(`} // namespace JSC`, ""); -str = "// File generated via `make static-hash-table` / `make cpp`\n" + str.trim() + "\n"; -await Bun.write(input.replace(/\.cpp$/, ".lut.h").replace(/(\.lut)?\.txt$/, ".lut.h"), str); -console.log("Wrote", path.join(out_dir, path.basename(process.cwd(), input.replace(/\.cpp$/, ".lut.h")))); +str = "// File generated via `static-hash-table.ts`\n" + str.trim() + "\n"; + +writeIfNotChanged(output, str); diff --git a/src/codegen/generate-jssink.ts b/src/codegen/generate-jssink.ts index 8774105d2..9ce0e2313 100644 --- a/src/codegen/generate-jssink.ts +++ b/src/codegen/generate-jssink.ts @@ -480,7 +480,7 @@ JSC_DEFINE_HOST_FUNCTION(${name}__doClose, (JSC::JSGlobalObject * lexicalGlobalO } templ += ` -#include "JSSinkLookupTable.h" +#include "JSSink.lut.h" `; for (let name of classes) { @@ -960,5 +960,5 @@ Bun.spawnSync([ process.execPath, join(import.meta.dir, "create-hash-table.ts"), resolve(outDir + "/JSSink.cpp"), - outDir, + join(outDir, "JSSink.lut.h"), ]); diff --git a/src/codegen/helpers.ts b/src/codegen/helpers.ts new file mode 100644 index 000000000..d259a526e --- /dev/null +++ b/src/codegen/helpers.ts @@ -0,0 +1,64 @@ +import fs from "fs"; +import path from "path"; +import { isAscii } from "buffer"; + +export function fmtCPPString(str: string) { + return ( + '"' + + str + .replace(/\\/g, "\\\\") + .replace(/"/g, '\\"') + .replace(/\n/g, "\\n") + .replace(/\r/g, "\\r") + .replace(/\t/g, "\\t") + .replace(/\?/g, "\\?") + // https://stackoverflow.com/questions/1234582 + '"' + ); +} + +export function cap(str: string) { + return str[0].toUpperCase() + str.slice(1); +} + +export function low(str: string) { + if (str.startsWith("JS")) { + return "js" + str.slice(2); + } + + return str[0].toLowerCase() + str.slice(1); +} + +export function readdirRecursive(root: string): string[] { + const files = fs.readdirSync(root, { withFileTypes: true }); + return files.flatMap(file => { + const fullPath = path.join(root, file.name); + return file.isDirectory() ? readdirRecursive(fullPath) : fullPath; + }); +} + +export function resolveSyncOrNull(specifier: string, from: string) { + try { + return Bun.resolveSync(specifier, from); + } catch { + return null; + } +} + +export function checkAscii(str: string) { + if (!isAscii(Buffer.from(str))) { + throw new Error(`non-ascii character in string "${str}". this will not be a valid ASCIILiteral`); + } + + return str; +} + +export function writeIfNotChanged(file: string, contents: string) { + if (fs.existsSync(file)) { + const oldContents = fs.readFileSync(file, "utf8"); + if (oldContents === contents) { + return; + } + } + + fs.writeFileSync(file, contents); +} diff --git a/src/codegen/internal-module-registry-scanner.ts b/src/codegen/internal-module-registry-scanner.ts new file mode 100644 index 000000000..7bc2f9bdd --- /dev/null +++ b/src/codegen/internal-module-registry-scanner.ts @@ -0,0 +1,92 @@ +import fs from "fs"; +import path from "path"; +import { readdirRecursive, resolveSyncOrNull } from "./helpers"; + +export function createInternalModuleRegistry(basedir: string) { + const moduleList = ["bun", "node", "thirdparty", "internal"] + .flatMap(dir => readdirRecursive(path.join(basedir, dir))) + .filter(file => file.endsWith(".js") || (file.endsWith(".ts") && !file.endsWith(".d.ts"))) + .map(file => file.slice(basedir.length + 1)) + .sort(); + + // Create the Internal Module Registry + const internalRegistry = new Map(); + for (let i = 0; i < moduleList.length; i++) { + const prefix = moduleList[i].startsWith("node/") + ? "node:" + : moduleList[i].startsWith("bun:") + ? "bun:" + : moduleList[i].startsWith("internal/") + ? "internal/" + : undefined; + if (prefix) { + const id = prefix + moduleList[i].slice(prefix.length).replaceAll(".", "/").slice(0, -3); + internalRegistry.set(id, i); + } + } + + // Native Module registry + const nativeModuleH = fs.readFileSync(path.join(basedir, "../bun.js/modules/_NativeModule.h"), "utf8"); + const nativeModuleDefine = nativeModuleH.match(/BUN_FOREACH_NATIVE_MODULE\(macro\)\s*\\\n((.*\\\n)*\n)/); + if (!nativeModuleDefine) { + throw new Error( + "Could not find BUN_FOREACH_NATIVE_MODULE in _NativeModule.h. Knowing native module IDs is a part of the codegen process.", + ); + } + let nextNativeModuleId = 0; + const nativeModuleIds: Record<string, number> = {}; + const nativeModuleEnums: Record<string, string> = {}; + const nativeModuleEnumToId: Record<string, number> = {}; + for (const [_, idString, enumValue] of nativeModuleDefine[0].matchAll(/macro\((.*?),(.*?)\)/g)) { + const processedIdString = JSON.parse(idString.trim().replace(/_s$/, "")); + const processedEnumValue = enumValue.trim(); + const processedNumericId = nextNativeModuleId++; + nativeModuleIds[processedIdString] = processedNumericId; + nativeModuleEnums[processedIdString] = processedEnumValue; + nativeModuleEnumToId[processedEnumValue] = processedNumericId; + } + + function codegenRequireId(id: string) { + return `(__intrinsic__getInternalField(__intrinsic__internalModuleRegistry, ${id}) || __intrinsic__createInternalModuleById(${id}))`; + } + + function codegenRequireNativeModule(id: string) { + return `(__intrinsic__requireNativeModule(${id.replace(/node:/, "")}))`; + } + + const requireTransformer = (specifier: string, from: string) => { + // this one is deprecated + if (specifier === "$shared") specifier = "./internal/shared.ts"; + + const directMatch = internalRegistry.get(specifier); + if (directMatch) return codegenRequireId(`${directMatch}/*${specifier}*/`); + + if (specifier in nativeModuleIds) { + return codegenRequireNativeModule(JSON.stringify(specifier)); + } + + const relativeMatch = + resolveSyncOrNull(specifier, path.join(basedir, path.dirname(from))) ?? resolveSyncOrNull(specifier, basedir); + + if (relativeMatch) { + const found = moduleList.indexOf(path.relative(basedir, relativeMatch)); + if (found === -1) { + throw new Error( + `Builtin Bundler: "${specifier}" cannot be imported here because it doesn't get a module ID. Only files in "src/js" besides "src/js/builtins" can be used here. Note that the 'node:' or 'bun:' prefix is required here. `, + ); + } + return codegenRequireId(`${found}/*${path.relative(basedir, relativeMatch)}*/`); + } + + throw new Error(`Builtin Bundler: Could not resolve "${specifier}" in ${from}.`); + }; + + return { + requireTransformer, + nativeModuleIds, + nativeModuleEnums, + nativeModuleEnumToId, + internalRegistry, + moduleList, + } as const; +} diff --git a/src/codegen/replacements.ts b/src/codegen/replacements.ts new file mode 100644 index 000000000..fbe7478de --- /dev/null +++ b/src/codegen/replacements.ts @@ -0,0 +1,179 @@ +import { LoaderKeys } from "../api/schema"; +import { sliceSourceCode } from "./builtin-parser"; + +// This is a list of extra syntax replacements to do. Kind of like macros +// These are only run on code itself, not string contents or comments. +export const replacements: ReplacementRule[] = [ + { from: /\bthrow new TypeError\b/g, to: "$throwTypeError" }, + { from: /\bthrow new RangeError\b/g, to: "$throwRangeError" }, + { from: /\bthrow new OutOfMemoryError\b/g, to: "$throwOutOfMemoryError" }, + { from: /\bnew TypeError\b/g, to: "$makeTypeError" }, + { from: /\bexport\s*default/g, to: "$exports =" }, +]; + +// These rules are run on the entire file, including within strings. +export const globalReplacements: ReplacementRule[] = [ + { + from: /\bnotImplementedIssue\(\s*([0-9]+)\s*,\s*((?:"[^"]*"|'[^']+'))\s*\)/g, + to: "new TypeError(`${$2} is not implemented yet. See https://github.com/oven-sh/bun/issues/$1`)", + }, + { + from: /\bnotImplementedIssueFn\(\s*([0-9]+)\s*,\s*((?:"[^"]*"|'[^']+'))\s*\)/g, + to: "() => $throwTypeError(`${$2} is not implemented yet. See https://github.com/oven-sh/bun/issues/$1`)", + }, +]; + +// This is a list of globals we should access using @ notation +// This prevents a global override attacks. +// Note that the public `Bun` global is immutable. +// undefined -> __intrinsic__undefined -> @undefined +export const globalsToPrefix = [ + "AbortSignal", + "Array", + "ArrayBuffer", + "Buffer", + "Infinity", + "Loader", + "Promise", + "ReadableByteStreamController", + "ReadableStream", + "ReadableStreamBYOBReader", + "ReadableStreamBYOBRequest", + "ReadableStreamDefaultController", + "ReadableStreamDefaultReader", + "TransformStream", + "TransformStreamDefaultController", + "Uint8Array", + "String", + "Buffer", + "RegExp", + "WritableStream", + "WritableStreamDefaultController", + "WritableStreamDefaultWriter", + "isFinite", + "undefined", +]; + +// These enums map to $<enum>IdToLabel and $<enum>LabelToId +// Make sure to define in ./builtins.d.ts +export const enums = { + Loader: LoaderKeys, + ImportKind: [ + "entry-point", + "import-statement", + "require-call", + "dynamic-import", + "require-resolve", + "import-rule", + "url-token", + "internal", + ], +}; + +// These identifiers have typedef but not present at runtime (converted with replacements) +// If they are present in the bundle after runtime, we warn at the user. +// TODO: implement this check. +export const warnOnIdentifiersNotPresentAtRuntime = [ + // + "OutOfMemoryError", + "notImplementedIssue", + "notImplementedIssueFn", +]; + +// These are passed to --define to the bundler +export const define: Record<string, string> = { + "process.env.NODE_ENV": "production", + "IS_BUN_DEVELOPMENT": "false", + + $streamClosed: "1", + $streamClosing: "2", + $streamErrored: "3", + $streamReadable: "4", + $streamWaiting: "5", + $streamWritable: "6", +}; + +// ------------------------------ // + +for (const name in enums) { + const value = enums[name]; + if (typeof value !== "object") throw new Error("Invalid enum object " + name + " defined in " + import.meta.file); + if (typeof value === null) throw new Error("Invalid enum object " + name + " defined in " + import.meta.file); + const keys = Array.isArray(value) ? value : Object.keys(value).filter(k => !k.match(/^[0-9]+$/)); + define[`$${name}IdToLabel`] = "[" + keys.map(k => `"${k}"`).join(", ") + "]"; + define[`$${name}LabelToId`] = "{" + keys.map(k => `"${k}": ${keys.indexOf(k)}`).join(", ") + "}"; +} + +for (const name of globalsToPrefix) { + define[name] = "__intrinsic__" + name; +} + +for (const key in define) { + if (key.startsWith("$")) { + define["__intrinsic__" + key.slice(1)] = define[key]; + delete define[key]; + } +} + +export interface ReplacementRule { + from: RegExp; + to: string; + global?: boolean; +} + +/** Applies source code replacements as defined in `replacements` */ +export function applyReplacements(src: string, length: number) { + let slice = src.slice(0, length); + let rest = src.slice(length); + slice = slice.replace(/([^a-zA-Z0-9_\$])\$([a-zA-Z0-9_]+\b)/gm, `$1__intrinsic__$2`); + for (const replacement of replacements) { + slice = slice.replace(replacement.from, replacement.to.replaceAll("$", "__intrinsic__")); + } + let match; + if ((match = slice.match(/__intrinsic__(debug|assert)$/)) && rest.startsWith("(")) { + const name = match[1]; + if (name === "debug") { + const innerSlice = sliceSourceCode(rest, true); + return [ + slice.slice(0, match.index) + "(IS_BUN_DEVELOPMENT?$debug_log" + innerSlice.result + ":void 0)", + innerSlice.rest, + true, + ]; + } else if (name === "assert") { + const checkSlice = sliceSourceCode(rest, true, undefined, true); + let rest2 = checkSlice.rest; + let extraArgs = ""; + if (checkSlice.result.at(-1) === ",") { + const sliced = sliceSourceCode("(" + rest2.slice(1), true, undefined, false); + extraArgs = ", " + sliced.result.slice(1, -1); + rest2 = sliced.rest; + } + return [ + slice.slice(0, match.index) + + "(IS_BUN_DEVELOPMENT?$assert(" + + checkSlice.result.slice(1, -1) + + "," + + JSON.stringify( + checkSlice.result + .slice(1, -1) + .replace(/__intrinsic__/g, "$") + .trim(), + ) + + extraArgs + + "):void 0)", + rest2, + true, + ]; + } + } + return [slice, rest, false]; +} + +/** Applies source code replacements as defined in `globalReplacements` */ +export function applyGlobalReplacements(src: string) { + let result = src; + for (const replacement of globalReplacements) { + result = result.replace(replacement.from, replacement.to.replaceAll("$", "__intrinsic__")); + } + return result; +} |