aboutsummaryrefslogtreecommitdiff
path: root/src/codegen
diff options
context:
space:
mode:
authorGravatar dave caruso <me@paperdave.net> 2023-10-17 19:42:37 -0700
committerGravatar dave caruso <me@paperdave.net> 2023-10-17 19:42:37 -0700
commitcb5c4c71c866362dce24eff79251fed6add53e9f (patch)
tree5635cf21140ff2eac14539316f7c6d6704925bd3 /src/codegen
parentbf12268274faac1a38d33007be7a48af9e570761 (diff)
downloadbun-jarred/prepare-for-libuv.tar.gz
bun-jarred/prepare-for-libuv.tar.zst
bun-jarred/prepare-for-libuv.zip
Diffstat (limited to 'src/codegen')
-rw-r--r--src/codegen/builtin-parser.ts187
-rw-r--r--src/codegen/bundle-functions.ts627
-rw-r--r--src/codegen/bundle-modules.ts398
-rw-r--r--src/codegen/client-js.ts47
-rw-r--r--src/codegen/create-hash-table.ts10
-rw-r--r--src/codegen/generate-jssink.ts4
-rw-r--r--src/codegen/helpers.ts64
-rw-r--r--src/codegen/internal-module-registry-scanner.ts92
-rw-r--r--src/codegen/replacements.ts179
9 files changed, 1601 insertions, 7 deletions
diff --git a/src/codegen/builtin-parser.ts b/src/codegen/builtin-parser.ts
new file mode 100644
index 000000000..4e35f13dd
--- /dev/null
+++ b/src/codegen/builtin-parser.ts
@@ -0,0 +1,187 @@
+import { applyReplacements } from "./replacements";
+
+/**
+ * Slices a string until it hits a }, but keeping in mind JS comments,
+ * regex, template literals, comments, and matching {
+ *
+ * Used to extract function bodies without parsing the code.
+ *
+ * If you pass replace=true, it will run replacements on the code
+ */
+export function sliceSourceCode(
+ contents: string,
+ replace: boolean,
+ replaceRequire?: (specifier: string) => string,
+ endOnComma = false,
+): { result: string; rest: string } {
+ let bracketCount = 0;
+ let i = 0;
+ let result = "";
+ while (contents.length) {
+ const match = contents.match(
+ endOnComma && bracketCount <= 1
+ ? /((?:[(,=;:{]|return|\=\>)\s*)\/[^\/\*]|\/\*|\/\/|['"}`\),]|(?<!\$)\brequire\(|(\$assert\(|\$debug\()/
+ : /((?:[(,=;:{]|return|\=\>)\s*)\/[^\/\*]|\/\*|\/\/|['"}`\)]|(?<!\$)\brequire\(|(\$assert\(|\$debug\()/,
+ );
+ i = match?.index ?? contents.length;
+ if (match?.[2]) {
+ i += match[2].length - 1;
+ }
+ bracketCount += [...contents.slice(0, i).matchAll(/[({]/g)].length;
+ const chunk = replace ? applyReplacements(contents, i) : [contents.slice(0, i), contents.slice(i)];
+ result += chunk[0];
+ contents = chunk[1] as string;
+ if (chunk[2]) {
+ continue;
+ }
+ if (match?.[1]) {
+ if (match[1].startsWith("(") || match[1].startsWith(",")) {
+ bracketCount++;
+ }
+ const { result: result2, rest } = sliceRegularExpressionSourceCode(
+ contents.slice(match?.[1].length + 1),
+ replace,
+ );
+ result += contents.slice(0, match?.[1].length + 1) + result2;
+ contents = rest;
+ continue;
+ }
+ if (!contents.length) break;
+ if (contents.startsWith("/*")) {
+ i = contents.slice(2).indexOf("*/") + 2;
+ } else if (contents.startsWith("//")) {
+ i = contents.slice(2).indexOf("\n") + 2;
+ } else if (contents.startsWith("'")) {
+ i = getEndOfBasicString(contents.slice(1), "'") + 2;
+ } else if (contents.startsWith('"')) {
+ i = getEndOfBasicString(contents.slice(1), '"') + 2;
+ } else if (contents.startsWith("`")) {
+ const { result: result2, rest } = sliceTemplateLiteralSourceCode(contents.slice(1), replace);
+ result += "`" + result2;
+ contents = rest;
+ i = 0;
+ continue;
+ } else if (contents.startsWith("}")) {
+ bracketCount--;
+ if (bracketCount <= 0) {
+ result += "}";
+ contents = contents.slice(1);
+ break;
+ }
+ i = 1;
+ } else if (contents.startsWith(")")) {
+ bracketCount--;
+ if (bracketCount <= 0) {
+ result += ")";
+ contents = contents.slice(1);
+ break;
+ }
+ i = 1;
+ } else if (endOnComma && contents.startsWith(",")) {
+ if (bracketCount <= 1) {
+ contents = contents.slice(1);
+ // if the next non-whitespace character is ), we will treat it like a )
+ let match = contents.match(/^\s*\)/);
+ if (match) {
+ contents = contents.slice(match[0].length);
+ result += ")";
+ } else {
+ result += ",";
+ }
+ break;
+ }
+ i = 1;
+ } else if (contents.startsWith("require(")) {
+ if (replaceRequire) {
+ const staticSpecifier = contents.match(/\brequire\(["']([^"']+)["']\)/);
+ if (staticSpecifier) {
+ const specifier = staticSpecifier[1];
+ result += replaceRequire(specifier);
+ contents = contents.slice(staticSpecifier[0].length);
+ continue;
+ } else {
+ throw new Error("Require with dynamic specifier not supported here.");
+ }
+ } else {
+ throw new Error("Require is not supported here.");
+ }
+ } else {
+ console.error(contents.slice(0, 100));
+ throw new Error("TODO");
+ }
+ result += contents.slice(0, i);
+ contents = contents.slice(i);
+ }
+
+ return { result, rest: contents };
+}
+
+function sliceTemplateLiteralSourceCode(contents: string, replace: boolean) {
+ let i = 0;
+ let result = "";
+ while (contents.length) {
+ i = contents.match(/`|\${/)!.index!;
+ result += contents.slice(0, i);
+ contents = contents.slice(i);
+ if (!contents.length) break;
+ if (contents.startsWith("`")) {
+ result += "`";
+ contents = contents.slice(1);
+ break;
+ } else if (contents.startsWith("$")) {
+ const { result: result2, rest } = sliceSourceCode(contents.slice(1), replace);
+ result += "$" + result2;
+ contents = rest;
+ continue;
+ } else {
+ throw new Error("TODO");
+ }
+ }
+
+ return { result, rest: contents };
+}
+
+function sliceRegularExpressionSourceCode(contents: string, replace: boolean) {
+ let i = 0;
+ let result = "";
+ while (contents.length) {
+ i = contents.match(/\/(?!\/|\*)|\\|\[/)!.index!;
+ result += contents.slice(0, i);
+ contents = contents.slice(i);
+ if (!contents.length) break;
+ if (contents.startsWith("/")) {
+ result += "/";
+ contents = contents.slice(1);
+ break;
+ } else if (contents.startsWith("\\")) {
+ result += "\\";
+ contents = contents.slice(1);
+ if (!contents.length) break;
+ result += contents[0];
+ contents = contents.slice(1);
+ continue;
+ } else if (contents.startsWith("[")) {
+ let end = contents.match(/(?<!\\)]/)!.index!;
+ result += contents.slice(0, end + 1);
+ contents = contents.slice(end + 1);
+ continue;
+ } else {
+ throw new Error("TODO");
+ }
+ }
+
+ return { result, rest: contents };
+}
+
+function getEndOfBasicString(str: string, quote: "'" | '"') {
+ let i = 0;
+ while (i < str.length) {
+ if (str[i] === "\\") {
+ i++;
+ } else if (str[i] === quote) {
+ return i;
+ }
+ i++;
+ }
+ throw new Error("String did not end");
+}
diff --git a/src/codegen/bundle-functions.ts b/src/codegen/bundle-functions.ts
new file mode 100644
index 000000000..ebdf0d748
--- /dev/null
+++ b/src/codegen/bundle-functions.ts
@@ -0,0 +1,627 @@
+import { existsSync, mkdirSync, readdirSync, rmSync } from "fs";
+import path from "path";
+import { sliceSourceCode } from "./builtin-parser";
+import { applyGlobalReplacements, define } from "./replacements";
+import { cap, fmtCPPString, low, writeIfNotChanged } from "./helpers";
+import { createInternalModuleRegistry } from "./internal-module-registry-scanner";
+
+console.log("Bundling Bun builtin functions...");
+
+const PARALLEL = false;
+const KEEP_TMP = true;
+
+const CMAKE_BUILD_ROOT = process.argv[2];
+
+if (!CMAKE_BUILD_ROOT) {
+ console.error("Usage: bun bundle-functions.ts <CMAKE_WORK_DIR>");
+ process.exit(1);
+}
+
+const SRC_DIR = path.join(import.meta.dir, "../js/builtins");
+const OUT_DIR = path.join(CMAKE_BUILD_ROOT, "./js");
+const TMP_DIR = path.join(CMAKE_BUILD_ROOT, "./tmp");
+
+const {
+ //
+ requireTransformer,
+} = createInternalModuleRegistry(path.join(import.meta.dir, "../js"));
+
+if (existsSync(TMP_DIR)) rmSync(TMP_DIR, { recursive: true });
+mkdirSync(TMP_DIR, { recursive: true });
+
+interface ParsedBuiltin {
+ name: string;
+ params: string[];
+ directives: Record<string, any>;
+ source: string;
+ async: boolean;
+}
+interface BundledBuiltin {
+ name: string;
+ directives: Record<string, any>;
+ isGetter: boolean;
+ constructAbility: string;
+ constructKind: string;
+ isLinkTimeConstant: boolean;
+ intrinsic: string;
+ overriddenName: string;
+ source: string;
+ params: string[];
+ visibility: string;
+}
+
+/**
+ * Source .ts file --> Array<bundled js function code>
+ */
+async function processFileSplit(filename: string): Promise<{ functions: BundledBuiltin[]; internal: boolean }> {
+ const basename = path.basename(filename, ".ts");
+ let contents = await Bun.file(filename).text();
+
+ contents = applyGlobalReplacements(contents);
+
+ // first approach doesnt work perfectly because we actually need to split each function declaration
+ // and then compile those separately
+
+ const consumeWhitespace = /^\s*/;
+ const consumeTopLevelContent = /^(\/\*|\/\/|type|import|interface|\$|export (?:async )?function|(?:async )?function)/;
+ const consumeEndOfType = /;|.(?=export|type|interface|\$|\/\/|\/\*|function)/;
+
+ const functions: ParsedBuiltin[] = [];
+ let directives: Record<string, any> = {};
+ const bundledFunctions: BundledBuiltin[] = [];
+ let internal = false;
+
+ while (contents.length) {
+ contents = contents.replace(consumeWhitespace, "");
+ if (!contents.length) break;
+ const match = contents.match(consumeTopLevelContent);
+ if (!match) {
+ throw new SyntaxError("Could not process input:\n" + contents.slice(0, contents.indexOf("\n")));
+ }
+ contents = contents.slice(match.index!);
+ if (match[1] === "import") {
+ // TODO: we may want to do stuff with these
+ const i = contents.indexOf(";");
+ contents = contents.slice(i + 1);
+ } else if (match[1] === "/*") {
+ const i = contents.indexOf("*/") + 2;
+ internal ||= contents.slice(0, i).includes("@internal");
+ contents = contents.slice(i);
+ } else if (match[1] === "//") {
+ const i = contents.indexOf("\n") + 1;
+ internal ||= contents.slice(0, i).includes("@internal");
+ contents = contents.slice(i);
+ } else if (match[1] === "type" || match[1] === "export type") {
+ const i = contents.search(consumeEndOfType);
+ contents = contents.slice(i + 1);
+ } else if (match[1] === "interface") {
+ contents = sliceSourceCode(contents, false).rest;
+ } else if (match[1] === "$") {
+ const directive = contents.match(/^\$([a-zA-Z0-9]+)(?:\s*=\s*([^\n]+?))?\s*;?\n/);
+ if (!directive) {
+ throw new SyntaxError("Could not parse directive:\n" + contents.slice(0, contents.indexOf("\n")));
+ }
+ const name = directive[1];
+ let value;
+ try {
+ value = directive[2] ? JSON.parse(directive[2]) : true;
+ } catch (error) {
+ throw new SyntaxError("Could not parse directive value " + directive[2] + " (must be JSON parsable)");
+ }
+ if (name === "constructor") {
+ directives.ConstructAbility = "CanConstruct";
+ } else if (name === "nakedConstructor") {
+ directives.ConstructAbility = "CanConstruct";
+ directives.ConstructKind = "Naked";
+ } else {
+ directives[name] = value;
+ }
+ contents = contents.slice(directive[0].length);
+ } else if (match[1] === "export function" || match[1] === "export async function") {
+ const declaration = contents.match(
+ /^export\s+(async\s+)?function\s+([a-zA-Z0-9]+)\s*\(([^)]*)\)(?:\s*:\s*([^{\n]+))?\s*{?/,
+ );
+ if (!declaration)
+ throw new SyntaxError("Could not parse function declaration:\n" + contents.slice(0, contents.indexOf("\n")));
+
+ const async = !!declaration[1];
+ const name = declaration[2];
+ const paramString = declaration[3];
+ const params =
+ paramString.trim().length === 0 ? [] : paramString.split(",").map(x => x.replace(/:.+$/, "").trim());
+ if (params[0] === "this") {
+ params.shift();
+ }
+
+ const { result, rest } = sliceSourceCode(contents.slice(declaration[0].length - 1), true, x =>
+ requireTransformer(x, SRC_DIR + "/" + basename),
+ );
+
+ functions.push({
+ name,
+ params,
+ directives,
+ source: result.trim().slice(2, -1),
+ async,
+ });
+ contents = rest;
+ directives = {};
+ } else if (match[1] === "function" || match[1] === "async function") {
+ const fnname = contents.match(/^function ([a-zA-Z0-9]+)\(([^)]*)\)(?:\s*:\s*([^{\n]+))?\s*{?/)![1];
+ throw new SyntaxError("All top level functions must be exported: " + fnname);
+ } else {
+ throw new Error("TODO: parse " + match[1]);
+ }
+ }
+
+ for (const fn of functions) {
+ const tmpFile = path.join(TMP_DIR, `${basename}.${fn.name}.ts`);
+
+ // not sure if this optimization works properly in jsc builtins
+ // const useThis = fn.usesThis;
+ const useThis = true;
+
+ // TODO: we should use format=IIFE so we could bundle imports and extra functions.
+ await Bun.write(
+ tmpFile,
+ `// @ts-nocheck
+// GENERATED TEMP FILE - DO NOT EDIT
+// Sourced from ${path.relative(TMP_DIR, filename)}
+
+// do not allow the bundler to rename a symbol to $
+($);
+
+$$capture_start$$(${fn.async ? "async " : ""}${
+ useThis
+ ? `function(${fn.params.join(",")})`
+ : `${fn.params.length === 1 ? fn.params[0] : `(${fn.params.join(",")})`}=>`
+ } {${fn.source}}).$$capture_end$$;
+`,
+ );
+ await Bun.sleep(1);
+ const build = await Bun.build({
+ entrypoints: [tmpFile],
+ define,
+ minify: { syntax: true, whitespace: false },
+ });
+ if (!build.success) {
+ throw new AggregateError(build.logs, "Failed bundling builtin function " + fn.name + " from " + basename + ".ts");
+ }
+ if (build.outputs.length !== 1) {
+ throw new Error("expected one output");
+ }
+ const output = await build.outputs[0].text();
+ const captured = output.match(/\$\$capture_start\$\$([\s\S]+)\.\$\$capture_end\$\$/)![1];
+ const finalReplacement =
+ (fn.directives.sloppy ? captured : captured.replace(/function\s*\(.*?\)\s*{/, '$&"use strict";'))
+ .replace(/^\((async )?function\(/, "($1function (")
+ // .replace(/__intrinsic__lazy\(/g, "globalThis[globalThis.Symbol.for('Bun.lazy')](")
+ .replace(/__intrinsic__/g, "@") + "\n";
+
+ bundledFunctions.push({
+ name: fn.name,
+ directives: fn.directives,
+ source: finalReplacement,
+ params: fn.params,
+ visibility: fn.directives.visibility ?? (fn.directives.linkTimeConstant ? "Private" : "Public"),
+ isGetter: !!fn.directives.getter,
+ constructAbility: fn.directives.ConstructAbility ?? "CannotConstruct",
+ constructKind: fn.directives.ConstructKind ?? "None",
+ isLinkTimeConstant: !!fn.directives.linkTimeConstant,
+ intrinsic: fn.directives.intrinsic ?? "NoIntrinsic",
+ overriddenName: fn.directives.getter
+ ? `"get ${fn.name}"_s`
+ : fn.directives.overriddenName
+ ? `"${fn.directives.overriddenName}"_s`
+ : "ASCIILiteral()",
+ });
+ }
+
+ return {
+ functions: bundledFunctions.sort((a, b) => a.name.localeCompare(b.name)),
+ internal,
+ };
+}
+
+const filesToProcess = readdirSync(SRC_DIR)
+ .filter(x => x.endsWith(".ts") && !x.endsWith(".d.ts"))
+ .sort();
+
+const files: Array<{ basename: string; functions: BundledBuiltin[]; internal: boolean }> = [];
+async function processFile(x: string) {
+ const basename = path.basename(x, ".ts");
+ try {
+ files.push({
+ basename,
+ ...(await processFileSplit(path.join(SRC_DIR, x))),
+ });
+ } catch (error) {
+ console.error("Failed to process file: " + basename + ".ts");
+ console.error(error);
+ process.exit(1);
+ }
+}
+
+// Bun seems to crash if this is parallelized, :(
+if (PARALLEL) {
+ await Promise.all(filesToProcess.map(processFile));
+} else {
+ for (const x of filesToProcess) {
+ await processFile(x);
+ }
+}
+
+// C++ codegen
+let bundledCPP = `// Generated by \`bun src/js/builtins/codegen\`
+// Do not edit by hand.
+namespace Zig { class GlobalObject; }
+#include "root.h"
+#include "config.h"
+#include "JSDOMGlobalObject.h"
+#include "WebCoreJSClientData.h"
+#include <JavaScriptCore/JSObjectInlines.h>
+
+namespace WebCore {
+
+`;
+
+for (const { basename, functions } of files) {
+ bundledCPP += `/* ${basename}.ts */\n`;
+ const lowerBasename = low(basename);
+ for (const fn of functions) {
+ const name = `${lowerBasename}${cap(fn.name)}Code`;
+ bundledCPP += `// ${fn.name}
+const JSC::ConstructAbility s_${name}ConstructAbility = JSC::ConstructAbility::${fn.constructAbility};
+const JSC::ConstructorKind s_${name}ConstructorKind = JSC::ConstructorKind::${fn.constructKind};
+const JSC::ImplementationVisibility s_${name}ImplementationVisibility = JSC::ImplementationVisibility::${fn.visibility};
+const int s_${name}Length = ${fn.source.length};
+static const JSC::Intrinsic s_${name}Intrinsic = JSC::NoIntrinsic;
+const char* const s_${name} = ${fmtCPPString(fn.source)};
+
+`;
+ }
+ bundledCPP += `#define DEFINE_BUILTIN_GENERATOR(codeName, functionName, overriddenName, argumentCount) \\
+JSC::FunctionExecutable* codeName##Generator(JSC::VM& vm) \\
+{\\
+ JSVMClientData* clientData = static_cast<JSVMClientData*>(vm.clientData); \\
+ return clientData->builtinFunctions().${lowerBasename}Builtins().codeName##Executable()->link(vm, nullptr, clientData->builtinFunctions().${lowerBasename}Builtins().codeName##Source(), std::nullopt, s_##codeName##Intrinsic); \\
+}
+WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(DEFINE_BUILTIN_GENERATOR)
+#undef DEFINE_BUILTIN_GENERATOR
+
+`;
+}
+
+bundledCPP += `
+
+JSBuiltinInternalFunctions::JSBuiltinInternalFunctions(JSC::VM& vm)
+ : m_vm(vm)
+`;
+
+for (const { basename, internal } of files) {
+ if (internal) {
+ bundledCPP += ` , m_${low(basename)}(vm)\n`;
+ }
+}
+
+bundledCPP += `
+{
+ UNUSED_PARAM(vm);
+}
+
+template<typename Visitor>
+void JSBuiltinInternalFunctions::visit(Visitor& visitor)
+{
+`;
+for (const { basename, internal } of files) {
+ if (internal) bundledCPP += ` m_${low(basename)}.visit(visitor);\n`;
+}
+
+bundledCPP += `
+ UNUSED_PARAM(visitor);
+}
+
+template void JSBuiltinInternalFunctions::visit(AbstractSlotVisitor&);
+template void JSBuiltinInternalFunctions::visit(SlotVisitor&);
+
+SUPPRESS_ASAN void JSBuiltinInternalFunctions::initialize(Zig::GlobalObject& globalObject)
+{
+ UNUSED_PARAM(globalObject);
+`;
+
+for (const { basename, internal } of files) {
+ if (internal) {
+ bundledCPP += ` m_${low(basename)}.init(globalObject);\n`;
+ }
+}
+
+bundledCPP += `
+ JSVMClientData& clientData = *static_cast<JSVMClientData*>(m_vm.clientData);
+ Zig::GlobalObject::GlobalPropertyInfo staticGlobals[] = {
+`;
+
+for (const { basename, internal } of files) {
+ if (internal) {
+ bundledCPP += `#define DECLARE_GLOBAL_STATIC(name) \\
+ Zig::GlobalObject::GlobalPropertyInfo( \\
+ clientData.builtinFunctions().${low(basename)}Builtins().name##PrivateName(), ${low(
+ basename,
+ )}().m_##name##Function.get() , JSC::PropertyAttribute::DontDelete | JSC::PropertyAttribute::ReadOnly),
+ WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(DECLARE_GLOBAL_STATIC)
+ #undef DECLARE_GLOBAL_STATIC
+ `;
+ }
+}
+
+bundledCPP += `
+ };
+ globalObject.addStaticGlobals(staticGlobals, std::size(staticGlobals));
+ UNUSED_PARAM(clientData);
+}
+
+} // namespace WebCore
+`;
+
+// C++ Header codegen
+let bundledHeader = `// Generated by \`bun src/js/builtins/codegen\`
+// Do not edit by hand.
+#pragma once
+namespace Zig { class GlobalObject; }
+#include "root.h"
+#include <JavaScriptCore/BuiltinUtils.h>
+#include <JavaScriptCore/Identifier.h>
+#include <JavaScriptCore/JSFunction.h>
+#include <JavaScriptCore/UnlinkedFunctionExecutable.h>
+#include <JavaScriptCore/VM.h>
+#include <JavaScriptCore/WeakInlines.h>
+
+namespace JSC {
+class FunctionExecutable;
+}
+
+namespace WebCore {
+`;
+for (const { basename, functions, internal } of files) {
+ bundledHeader += `/* ${basename}.ts */
+`;
+ const lowerBasename = low(basename);
+
+ for (const fn of functions) {
+ const name = `${lowerBasename}${cap(fn.name)}Code`;
+ bundledHeader += `// ${fn.name}
+#define WEBCORE_BUILTIN_${basename.toUpperCase()}_${fn.name.toUpperCase()} 1
+extern const char* const s_${name};
+extern const int s_${name}Length;
+extern const JSC::ConstructAbility s_${name}ConstructAbility;
+extern const JSC::ConstructorKind s_${name}ConstructorKind;
+extern const JSC::ImplementationVisibility s_${name}ImplementationVisibility;
+
+`;
+ }
+ bundledHeader += `#define WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_DATA(macro) \\\n`;
+ for (const fn of functions) {
+ bundledHeader += ` macro(${fn.name}, ${lowerBasename}${cap(fn.name)}, ${fn.params.length}) \\\n`;
+ }
+ bundledHeader += "\n";
+ bundledHeader += `#define WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(macro) \\\n`;
+ for (const fn of functions) {
+ const name = `${lowerBasename}${cap(fn.name)}Code`;
+ bundledHeader += ` macro(${name}, ${fn.name}, ${fn.overriddenName}, s_${name}Length) \\\n`;
+ }
+ bundledHeader += "\n";
+ bundledHeader += `#define WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(macro) \\\n`;
+ for (const fn of functions) {
+ bundledHeader += ` macro(${fn.name}) \\\n`;
+ }
+ bundledHeader += `
+#define DECLARE_BUILTIN_GENERATOR(codeName, functionName, overriddenName, argumentCount) \\
+ JSC::FunctionExecutable* codeName##Generator(JSC::VM&);
+
+WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(DECLARE_BUILTIN_GENERATOR)
+#undef DECLARE_BUILTIN_GENERATOR
+
+class ${basename}BuiltinsWrapper : private JSC::WeakHandleOwner {
+public:
+ explicit ${basename}BuiltinsWrapper(JSC::VM& vm)
+ : m_vm(vm)
+ WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(INITIALIZE_BUILTIN_NAMES)
+#define INITIALIZE_BUILTIN_SOURCE_MEMBERS(name, functionName, overriddenName, length) , m_##name##Source(JSC::makeSource(StringImpl::createWithoutCopying(s_##name, length), { }, JSC::SourceTaintedOrigin::Untainted))
+ WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(INITIALIZE_BUILTIN_SOURCE_MEMBERS)
+#undef INITIALIZE_BUILTIN_SOURCE_MEMBERS
+ {
+ }
+
+#define EXPOSE_BUILTIN_EXECUTABLES(name, functionName, overriddenName, length) \\
+ JSC::UnlinkedFunctionExecutable* name##Executable(); \\
+ const JSC::SourceCode& name##Source() const { return m_##name##Source; }
+ WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(EXPOSE_BUILTIN_EXECUTABLES)
+#undef EXPOSE_BUILTIN_EXECUTABLES
+
+ WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(DECLARE_BUILTIN_IDENTIFIER_ACCESSOR)
+
+ void exportNames();
+
+private:
+ JSC::VM& m_vm;
+
+ WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(DECLARE_BUILTIN_NAMES)
+
+#define DECLARE_BUILTIN_SOURCE_MEMBERS(name, functionName, overriddenName, length) \\
+ JSC::SourceCode m_##name##Source;\\
+ JSC::Weak<JSC::UnlinkedFunctionExecutable> m_##name##Executable;
+ WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(DECLARE_BUILTIN_SOURCE_MEMBERS)
+#undef DECLARE_BUILTIN_SOURCE_MEMBERS
+
+};
+
+#define DEFINE_BUILTIN_EXECUTABLES(name, functionName, overriddenName, length) \\
+inline JSC::UnlinkedFunctionExecutable* ${basename}BuiltinsWrapper::name##Executable() \\
+{\\
+ if (!m_##name##Executable) {\\
+ JSC::Identifier executableName = functionName##PublicName();\\
+ if (overriddenName)\\
+ executableName = JSC::Identifier::fromString(m_vm, overriddenName);\\
+ m_##name##Executable = JSC::Weak<JSC::UnlinkedFunctionExecutable>(JSC::createBuiltinExecutable(m_vm, m_##name##Source, executableName, s_##name##ImplementationVisibility, s_##name##ConstructorKind, s_##name##ConstructAbility), this, &m_##name##Executable);\\
+ }\\
+ return m_##name##Executable.get();\\
+}
+WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(DEFINE_BUILTIN_EXECUTABLES)
+#undef DEFINE_BUILTIN_EXECUTABLES
+
+inline void ${basename}BuiltinsWrapper::exportNames()
+{
+#define EXPORT_FUNCTION_NAME(name) m_vm.propertyNames->appendExternalName(name##PublicName(), name##PrivateName());
+ WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(EXPORT_FUNCTION_NAME)
+#undef EXPORT_FUNCTION_NAME
+}
+`;
+
+ if (internal) {
+ bundledHeader += `class ${basename}BuiltinFunctions {
+public:
+ explicit ${basename}BuiltinFunctions(JSC::VM& vm) : m_vm(vm) { }
+
+ void init(JSC::JSGlobalObject&);
+ template<typename Visitor> void visit(Visitor&);
+
+public:
+ JSC::VM& m_vm;
+
+#define DECLARE_BUILTIN_SOURCE_MEMBERS(functionName) \\
+ JSC::WriteBarrier<JSC::JSFunction> m_##functionName##Function;
+ WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(DECLARE_BUILTIN_SOURCE_MEMBERS)
+#undef DECLARE_BUILTIN_SOURCE_MEMBERS
+};
+
+inline void ${basename}BuiltinFunctions::init(JSC::JSGlobalObject& globalObject)
+{
+#define EXPORT_FUNCTION(codeName, functionName, overriddenName, length) \\
+ m_##functionName##Function.set(m_vm, &globalObject, JSC::JSFunction::create(m_vm, codeName##Generator(m_vm), &globalObject));
+ WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(EXPORT_FUNCTION)
+#undef EXPORT_FUNCTION
+}
+
+template<typename Visitor>
+inline void ${basename}BuiltinFunctions::visit(Visitor& visitor)
+{
+#define VISIT_FUNCTION(name) visitor.append(m_##name##Function);
+ WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(VISIT_FUNCTION)
+#undef VISIT_FUNCTION
+}
+
+template void ${basename}BuiltinFunctions::visit(JSC::AbstractSlotVisitor&);
+template void ${basename}BuiltinFunctions::visit(JSC::SlotVisitor&);
+ `;
+ }
+}
+bundledHeader += `class JSBuiltinFunctions {
+public:
+ explicit JSBuiltinFunctions(JSC::VM& vm)
+ : m_vm(vm)
+`;
+
+for (const { basename } of files) {
+ bundledHeader += ` , m_${low(basename)}Builtins(m_vm)\n`;
+}
+
+bundledHeader += `
+ {
+`;
+
+for (const { basename, internal } of files) {
+ if (internal) {
+ bundledHeader += ` m_${low(basename)}Builtins.exportNames();\n`;
+ }
+}
+
+bundledHeader += ` }
+`;
+
+for (const { basename } of files) {
+ bundledHeader += ` ${basename}BuiltinsWrapper& ${low(basename)}Builtins() { return m_${low(
+ basename,
+ )}Builtins; }\n`;
+}
+
+bundledHeader += `
+private:
+ JSC::VM& m_vm;
+`;
+
+for (const { basename } of files) {
+ bundledHeader += ` ${basename}BuiltinsWrapper m_${low(basename)}Builtins;\n`;
+}
+
+bundledHeader += `;
+};
+
+class JSBuiltinInternalFunctions {
+public:
+ explicit JSBuiltinInternalFunctions(JSC::VM&);
+
+ template<typename Visitor> void visit(Visitor&);
+ void initialize(Zig::GlobalObject&);
+`;
+
+for (const { basename, internal } of files) {
+ if (internal) {
+ bundledHeader += ` ${basename}BuiltinFunctions& ${low(basename)}() { return m_${low(basename)}; }\n`;
+ }
+}
+
+bundledHeader += `
+private:
+ JSC::VM& m_vm;
+`;
+
+for (const { basename, internal } of files) {
+ if (internal) {
+ bundledHeader += ` ${basename}BuiltinFunctions m_${low(basename)};\n`;
+ }
+}
+
+bundledHeader += `
+};
+
+} // namespace WebCore
+`;
+
+writeIfNotChanged(path.join(OUT_DIR, "WebCoreJSBuiltins.h"), bundledHeader);
+writeIfNotChanged(path.join(OUT_DIR, "WebCoreJSBuiltins.cpp"), bundledCPP);
+
+// Generate TS types
+let dts = `// Generated by \`bun src/js/builtins/codegen\`
+// Do not edit by hand.
+type RemoveThis<F> = F extends (this: infer T, ...args: infer A) => infer R ? (...args: A) => R : F;
+`;
+
+for (const { basename, functions, internal } of files) {
+ if (internal) {
+ dts += `\n// ${basename}.ts\n`;
+ for (const fn of functions) {
+ dts += `declare const \$${fn.name}: RemoveThis<typeof import("${path.relative(
+ OUT_DIR,
+ path.join(SRC_DIR, basename),
+ )}")[${JSON.stringify(fn.name)}]>;\n`;
+ }
+ }
+}
+
+writeIfNotChanged(path.join(OUT_DIR, "WebCoreJSBuiltins.d.ts"), dts);
+
+const totalJSSize = files.reduce(
+ (acc, { functions }) => acc + functions.reduce((acc, fn) => acc + fn.source.length, 0),
+ 0,
+);
+
+if (!KEEP_TMP) {
+ await rmSync(TMP_DIR, { recursive: true });
+}
+
+console.log(
+ `Embedded JS size: %s bytes (across %s functions, %s files)`,
+ totalJSSize,
+ files.reduce((acc, { functions }) => acc + functions.length, 0),
+ files.length,
+);
+console.log(`[${performance.now().toFixed(1)}ms]`);
diff --git a/src/codegen/bundle-modules.ts b/src/codegen/bundle-modules.ts
new file mode 100644
index 000000000..00865b17b
--- /dev/null
+++ b/src/codegen/bundle-modules.ts
@@ -0,0 +1,398 @@
+// This script is run when you change anything in src/js/*
+import fs from "fs";
+import path from "path";
+import { sliceSourceCode } from "./builtin-parser";
+import { cap, checkAscii, fmtCPPString, readdirRecursive, resolveSyncOrNull, writeIfNotChanged } from "./helpers";
+import { createAssertClientJS, createLogClientJS } from "./client-js";
+import { builtinModules } from "node:module";
+import { BuildConfig } from "bun";
+import { define } from "./replacements";
+import { createInternalModuleRegistry } from "./internal-module-registry-scanner";
+
+const BASE = path.join(import.meta.dir, "../js");
+const CMAKE_BUILD_ROOT = process.argv[2];
+
+if (!CMAKE_BUILD_ROOT) {
+ console.error("Usage: bun bundle-modules.ts <CMAKE_WORK_DIR>");
+ process.exit(1);
+}
+
+const TMP_DIR = path.join(CMAKE_BUILD_ROOT, "tmp");
+const OUT_DIR = path.join(CMAKE_BUILD_ROOT, "js");
+
+const t = new Bun.Transpiler({ loader: "tsx" });
+
+let start = performance.now();
+function mark(log: string) {
+ const now = performance.now();
+ console.log(`${log} (${(now - start).toFixed(0)}ms)`);
+ start = now;
+}
+
+const {
+ //
+ moduleList,
+ nativeModuleIds,
+ nativeModuleEnumToId,
+ nativeModuleEnums,
+ requireTransformer,
+} = createInternalModuleRegistry(BASE);
+
+// Preprocess builtins
+const bundledEntryPoints: string[] = [];
+for (let i = 0; i < moduleList.length; i++) {
+ try {
+ let input = fs.readFileSync(path.join(BASE, moduleList[i]), "utf8");
+
+ const scannedImports = t.scanImports(input);
+ for (const imp of scannedImports) {
+ if (imp.kind === "import-statement") {
+ var isBuiltin = true;
+ try {
+ if (!builtinModules.includes(imp.path)) {
+ requireTransformer(imp.path, moduleList[i]);
+ }
+ } catch {
+ isBuiltin = false;
+ }
+ if (isBuiltin) {
+ throw new Error(`Cannot use ESM import on builtin modules. Use require("${imp.path}") instead.`);
+ }
+ }
+ }
+
+ let importStatements: string[] = [];
+
+ const processed = sliceSourceCode(
+ "{" +
+ input
+ .replace(
+ /\bimport(\s*type)?\s*(\{[^}]*\}|(\*\s*as)?\s[a-zA-Z0-9_$]+)\s*from\s*['"][^'"]+['"]/g,
+ stmt => (importStatements.push(stmt), ""),
+ )
+ .replace(/export\s*{\s*}\s*;/g, ""),
+ true,
+ x => requireTransformer(x, moduleList[i]),
+ );
+ let fileToTranspile = `// @ts-nocheck
+// GENERATED TEMP FILE - DO NOT EDIT
+// Sourced from src/js/${moduleList[i]}
+${importStatements.join("\n")}
+
+${processed.result.slice(1).trim()}
+$$EXPORT$$(__intrinsic__exports).$$EXPORT_END$$;
+`;
+
+ // Attempt to optimize "$exports = ..." to a variableless return
+ // otherwise, declare $exports so it works.
+ let exportOptimization = false;
+ fileToTranspile = fileToTranspile.replace(
+ /__intrinsic__exports\s*=\s*(.*|.*\{[^\}]*}|.*\([^\)]*\))\n+\s*\$\$EXPORT\$\$\(__intrinsic__exports\).\$\$EXPORT_END\$\$;/,
+ (_, a) => {
+ exportOptimization = true;
+ return "$$EXPORT$$(" + a.replace(/;$/, "") + ").$$EXPORT_END$$;";
+ },
+ );
+ if (!exportOptimization) {
+ fileToTranspile = `var $;` + fileToTranspile.replaceAll("__intrinsic__exports", "$");
+ }
+ const outputPath = path.join(TMP_DIR, moduleList[i].slice(0, -3) + ".ts");
+ fs.mkdirSync(path.dirname(outputPath), { recursive: true });
+ fs.writeFileSync(outputPath, fileToTranspile);
+ bundledEntryPoints.push(outputPath);
+ } catch (error) {
+ console.error(error);
+ console.error(`While processing: ${moduleList[i]}`);
+ process.exit(1);
+ }
+}
+
+mark("Preprocess modules");
+
+const config = ({ platform, debug }: { platform: string; debug?: boolean }) =>
+ ({
+ entrypoints: bundledEntryPoints,
+ // Whitespace and identifiers are not minified to give better error messages when an error happens in our builtins
+ minify: { syntax: !debug, whitespace: false },
+ root: TMP_DIR,
+ target: "bun",
+ external: builtinModules,
+ define: {
+ ...define,
+ IS_BUN_DEVELOPMENT: String(!!debug),
+ __intrinsic__debug: debug ? "$debug_log_enabled" : "false",
+ "process.platform": JSON.stringify(platform),
+ },
+ } satisfies BuildConfig);
+const bundled_dev = await Bun.build(config({ platform: process.platform, debug: true }));
+const bundled_linux = await Bun.build(config({ platform: "linux" }));
+const bundled_darwin = await Bun.build(config({ platform: "darwin" }));
+const bundled_win32 = await Bun.build(config({ platform: "win32" }));
+for (const bundled of [bundled_dev, bundled_linux, bundled_darwin, bundled_win32]) {
+ if (!bundled.success) {
+ console.error(bundled.logs);
+ process.exit(1);
+ }
+}
+
+mark("Bundle modules");
+
+const bundledOutputs = {
+ host: new Map(),
+ linux: new Map(),
+ darwin: new Map(),
+ win32: new Map(),
+};
+
+for (const [name, bundle, outputs] of [
+ ["modules_dev", bundled_dev, bundledOutputs.host],
+ ["modules_linux", bundled_linux, bundledOutputs.linux],
+ ["modules_darwin", bundled_darwin, bundledOutputs.darwin],
+ ["modules_win32", bundled_win32, bundledOutputs.win32],
+] as const) {
+ for (const file of bundle.outputs) {
+ const output = await file.text();
+ let captured = `(function (){${output.replace("// @bun\n", "").trim()}})`;
+ let usesDebug = output.includes("$debug_log");
+ let usesAssert = output.includes("$assert");
+ captured =
+ captured
+ .replace(
+ `var __require = (id) => {
+ return import.meta.require(id);
+};`,
+ "",
+ )
+ .replace(/var\s*__require\s*=\s*\(?id\)?\s*=>\s*{\s*return\s*import.meta.require\(id\)\s*};?/, "")
+ .replace(/var __require=\(?id\)?=>import.meta.require\(id\);?/, "")
+ .replace(/\$\$EXPORT\$\$\((.*)\).\$\$EXPORT_END\$\$;/, "return $1")
+ .replace(/]\s*,\s*__(debug|assert)_end__\)/g, ")")
+ .replace(/]\s*,\s*__debug_end__\)/g, ")")
+ // .replace(/__intrinsic__lazy\(/g, "globalThis[globalThis.Symbol.for('Bun.lazy')](")
+ .replace(/import.meta.require\((.*?)\)/g, (expr, specifier) => {
+ try {
+ const str = JSON.parse(specifier);
+ return globalThis.requireTransformer(str, file.path);
+ } catch {
+ throw new Error(
+ `Builtin Bundler: import.meta.require() must be called with a string literal. Found ${specifier}. (in ${file.path}))`,
+ );
+ }
+ })
+ .replace(/__intrinsic__/g, "@") + "\n";
+ captured = captured.replace(
+ /function\s*\(.*?\)\s*{/,
+ '$&"use strict";' +
+ (usesDebug
+ ? createLogClientJS(
+ file.path.replace(".js", ""),
+ idToPublicSpecifierOrEnumName(file.path).replace(/^node:|^bun:/, ""),
+ )
+ : "") +
+ (usesAssert ? createAssertClientJS(idToPublicSpecifierOrEnumName(file.path).replace(/^node:|^bun:/, "")) : ""),
+ );
+ const outputPath = path.join(OUT_DIR, name, file.path);
+ if (name === "modules_dev") {
+ fs.mkdirSync(path.dirname(outputPath), { recursive: true });
+ fs.writeFileSync(outputPath, captured);
+ }
+ outputs.set(file.path.replace(".js", ""), captured);
+ }
+}
+
+mark("Postprocesss modules");
+
+function idToEnumName(id: string) {
+ return id
+ .replace(/\.[mc]?[tj]s$/, "")
+ .replace(/[^a-zA-Z0-9]+/g, " ")
+ .split(" ")
+ .map(x => (["jsc", "ffi", "vm", "tls", "os", "ws", "fs", "dns"].includes(x) ? x.toUpperCase() : cap(x)))
+ .join("");
+}
+
+function idToPublicSpecifierOrEnumName(id: string) {
+ id = id.replace(/\.[mc]?[tj]s$/, "");
+ if (id.startsWith("node/")) {
+ return "node:" + id.slice(5).replaceAll(".", "/");
+ } else if (id.startsWith("bun/")) {
+ return "bun:" + id.slice(4).replaceAll(".", "/");
+ } else if (id.startsWith("internal/")) {
+ return "internal:" + id.slice(9).replaceAll(".", "/");
+ } else if (id.startsWith("thirdparty/")) {
+ return id.slice(11).replaceAll(".", "/");
+ }
+ return idToEnumName(id);
+}
+
+// This is a file with a single macro that is used in defining InternalModuleRegistry.h
+writeIfNotChanged(
+ path.join(OUT_DIR, "InternalModuleRegistry+numberOfModules.h"),
+ `#define BUN_INTERNAL_MODULE_COUNT ${moduleList.length}\n`,
+);
+
+// This code slice is used in InternalModuleRegistry.h for inlining the enum. I dont think we
+// actually use this enum but it's probably a good thing to include.
+writeIfNotChanged(
+ path.join(OUT_DIR, "InternalModuleRegistry+enum.h"),
+ `${
+ moduleList
+ .map((id, n) => {
+ return `${idToEnumName(id)} = ${n},`;
+ })
+ .join("\n") + "\n"
+ }
+`,
+);
+
+// This code slice is used in InternalModuleRegistry.cpp. It defines the loading function for modules.
+writeIfNotChanged(
+ path.join(OUT_DIR, "InternalModuleRegistry+createInternalModuleById.h"),
+ `// clang-format off
+JSValue InternalModuleRegistry::createInternalModuleById(JSGlobalObject* globalObject, VM& vm, Field id)
+{
+ switch (id) {
+ // JS internal modules
+ ${moduleList
+ .map((id, n) => {
+ return `case Field::${idToEnumName(id)}: {
+ INTERNAL_MODULE_REGISTRY_GENERATE(globalObject, vm, "${idToPublicSpecifierOrEnumName(id)}"_s, ${JSON.stringify(
+ id.replace(/\.[mc]?[tj]s$/, ".js"),
+ )}_s, InternalModuleRegistryConstants::${idToEnumName(id)}Code, "builtin://${id
+ .replace(/\.[mc]?[tj]s$/, "")
+ .replace(/[^a-zA-Z0-9]+/g, "/")}"_s);
+ }`;
+ })
+ .join("\n ")}
+ }
+}
+`,
+);
+
+// This header is used by InternalModuleRegistry.cpp, and should only be included in that file.
+// It inlines all the strings for the module IDs.
+//
+// We cannot use ASCIILiteral's `_s` operator for the module source code because for long
+// strings it fails a constexpr assert. Instead, we do that assert in JS before we format the string
+writeIfNotChanged(
+ path.join(OUT_DIR, "InternalModuleRegistryConstants.h"),
+ `// clang-format off
+#pragma once
+
+namespace Bun {
+namespace InternalModuleRegistryConstants {
+
+#if __APPLE__
+ ${moduleList
+ .map(
+ (id, n) =>
+ `//
+static constexpr ASCIILiteral ${idToEnumName(id)}Code = ASCIILiteral::fromLiteralUnsafe(${fmtCPPString(
+ checkAscii(bundledOutputs.darwin.get(id.slice(0, -3))),
+ )});
+//
+`,
+ )
+ .join("\n")}
+ #elif _WIN32
+ ${moduleList
+ .map(
+ (id, n) =>
+ `//
+static constexpr ASCIILiteral ${idToEnumName(id)}Code = ASCIILiteral::fromLiteralUnsafe(${fmtCPPString(
+ checkAscii(bundledOutputs.win32.get(id.slice(0, -3))),
+ )});
+//
+`,
+ )
+ .join("\n")}
+ #else
+ // Not 100% accurate, but basically inlining linux on non-windows non-mac platforms.
+ ${moduleList
+ .map(
+ (id, n) =>
+ `//
+static constexpr ASCIILiteral ${idToEnumName(id)}Code = ASCIILiteral::fromLiteralUnsafe(${fmtCPPString(
+ checkAscii(bundledOutputs.linux.get(id.slice(0, -3))),
+ )});
+//
+`,
+ )
+ .join("\n")}
+#endif
+
+}
+}`,
+);
+
+// This is a generated enum for zig code (exports.zig)
+writeIfNotChanged(
+ path.join(OUT_DIR, "ResolvedSourceTag.zig"),
+ `// zig fmt: off
+pub const ResolvedSourceTag = enum(u32) {
+ // Predefined
+ javascript = 0,
+ package_json_type_module = 1,
+ wasm = 2,
+ object = 3,
+ file = 4,
+ esm = 5,
+ json_for_object_loader = 6,
+
+ // Built in modules are loaded through InternalModuleRegistry by numerical ID.
+ // In this enum are represented as \`(1 << 9) & id\`
+${moduleList.map((id, n) => ` @"${idToPublicSpecifierOrEnumName(id)}" = ${(1 << 9) | n},`).join("\n")}
+ // Native modules run through a different system using ESM registry.
+${Object.entries(nativeModuleIds)
+ .map(([id, n]) => ` @"${id}" = ${(1 << 10) | n},`)
+ .join("\n")}
+};
+`,
+);
+
+// This is a generated enum for c++ code (headers-handwritten.h)
+writeIfNotChanged(
+ path.join(OUT_DIR, "SyntheticModuleType.h"),
+ `enum SyntheticModuleType : uint32_t {
+ JavaScript = 0,
+ PackageJSONTypeModule = 1,
+ Wasm = 2,
+ ObjectModule = 3,
+ File = 4,
+ ESM = 5,
+ JSONForObjectLoader = 6,
+
+ // Built in modules are loaded through InternalModuleRegistry by numerical ID.
+ // In this enum are represented as \`(1 << 9) & id\`
+ InternalModuleRegistryFlag = 1 << 9,
+${moduleList.map((id, n) => ` ${idToEnumName(id)} = ${(1 << 9) | n},`).join("\n")}
+
+ // Native modules run through the same system, but with different underlying initializers.
+ // They also have bit 10 set to differentiate them from JS builtins.
+ NativeModuleFlag = (1 << 10) | (1 << 9),
+${Object.entries(nativeModuleEnumToId)
+ .map(([id, n]) => ` ${id} = ${(1 << 10) | n},`)
+ .join("\n")}
+};
+
+`,
+);
+
+// This is used in ModuleLoader.cpp to link to all the headers for native modules.
+writeIfNotChanged(
+ path.join(OUT_DIR, "NativeModuleImpl.h"),
+ Object.values(nativeModuleEnums)
+ .map(value => `#include "../../bun.js/modules/${value}Module.h"`)
+ .join("\n") + "\n",
+);
+
+// This is used for debug builds for the base path for dynamic loading
+// fs.writeFileSync(
+// path.join(OUT_DIR, "DebugPath.h"),
+// `// Using __FILE__ does not give an absolute file path
+// // This is a workaround for that.
+// #define BUN_DYNAMIC_JS_LOAD_PATH "${path.join(OUT_DIR, "")}"
+// `,
+// );
+
+mark("Generate Code");
diff --git a/src/codegen/client-js.ts b/src/codegen/client-js.ts
new file mode 100644
index 000000000..4dfa6acf6
--- /dev/null
+++ b/src/codegen/client-js.ts
@@ -0,0 +1,47 @@
+// This is the implementation for $debug
+export function createLogClientJS(filepath: string, publicName: string) {
+ return `
+let $debug_log_enabled = ((env) => (
+ // The rationale for checking all these variables is just so you don't have to exactly remember which one you set.
+ (env.BUN_DEBUG_ALL && env.BUN_DEBUG_ALL !== '0')
+ || (env.BUN_DEBUG_JS && env.BUN_DEBUG_JS !== '0')
+ || (env.BUN_DEBUG_${filepath
+ .replace(/^.*?:/, "")
+ .split(/[-_./]/g)
+ .join("_")
+ .toUpperCase()})
+ || (env.DEBUG_${filepath
+ .replace(/^.*?:/, "")
+ .split(/[-_./]/g)
+ .join("_")
+ .toUpperCase()})
+))(Bun.env);
+let $debug_pid_prefix = Bun.env.SHOW_PID === '1';
+let $debug_log = $debug_log_enabled ? (...args) => {
+ // warn goes to stderr without colorizing
+ console.warn(($debug_pid_prefix ? \`[\${process.pid}] \` : '') + (Bun.enableANSIColors ? '\\x1b[90m[${publicName}]\\x1b[0m' : '[${publicName}]'), ...args);
+} : () => {};
+`;
+}
+
+export function createAssertClientJS(publicName: string) {
+ return `
+let $assert = function(check, sourceString, ...message) {
+ if (!check) {
+ const prevPrepareStackTrace = Error.prepareStackTrace;
+ Error.prepareStackTrace = (e, stack) => {
+ return e.name + ': ' + e.message + '\\n' + stack.slice(1).map(x => ' at ' + x.toString()).join('\\n');
+ };
+ const e = new Error(sourceString);
+ e.stack; // materialize stack
+ e.name = 'AssertionError';
+ Error.prepareStackTrace = prevPrepareStackTrace;
+ console.error('[${publicName}] ASSERTION FAILED: ' + sourceString);
+ if (message.length) console.warn(...message);
+ console.warn(e.stack.split('\\n')[1] + '\\n');
+ if (Bun.env.ASSERT === 'CRASH') process.exit(0xAA);
+ throw e;
+ }
+}
+`;
+}
diff --git a/src/codegen/create-hash-table.ts b/src/codegen/create-hash-table.ts
index 776abfdea..0d9b08d41 100644
--- a/src/codegen/create-hash-table.ts
+++ b/src/codegen/create-hash-table.ts
@@ -1,12 +1,12 @@
import { spawn } from "bun";
import path from "path";
+import { writeIfNotChanged } from "./helpers";
const input = process.argv[2];
-const out_dir = process.argv[3];
+const output = process.argv[3];
const create_hash_table = path.join(import.meta.dir, "./create_hash_table");
-console.time("Generate LUT");
const { stdout, exited } = spawn({
cmd: [create_hash_table, input],
stdout: "pipe",
@@ -18,6 +18,6 @@ str = str.replaceAll(/^\/\/.*$/gm, "");
str = str.replaceAll(/^#include.*$/gm, "");
str = str.replaceAll(`namespace JSC {`, "");
str = str.replaceAll(`} // namespace JSC`, "");
-str = "// File generated via `make static-hash-table` / `make cpp`\n" + str.trim() + "\n";
-await Bun.write(input.replace(/\.cpp$/, ".lut.h").replace(/(\.lut)?\.txt$/, ".lut.h"), str);
-console.log("Wrote", path.join(out_dir, path.basename(process.cwd(), input.replace(/\.cpp$/, ".lut.h"))));
+str = "// File generated via `static-hash-table.ts`\n" + str.trim() + "\n";
+
+writeIfNotChanged(output, str);
diff --git a/src/codegen/generate-jssink.ts b/src/codegen/generate-jssink.ts
index 8774105d2..9ce0e2313 100644
--- a/src/codegen/generate-jssink.ts
+++ b/src/codegen/generate-jssink.ts
@@ -480,7 +480,7 @@ JSC_DEFINE_HOST_FUNCTION(${name}__doClose, (JSC::JSGlobalObject * lexicalGlobalO
}
templ += `
-#include "JSSinkLookupTable.h"
+#include "JSSink.lut.h"
`;
for (let name of classes) {
@@ -960,5 +960,5 @@ Bun.spawnSync([
process.execPath,
join(import.meta.dir, "create-hash-table.ts"),
resolve(outDir + "/JSSink.cpp"),
- outDir,
+ join(outDir, "JSSink.lut.h"),
]);
diff --git a/src/codegen/helpers.ts b/src/codegen/helpers.ts
new file mode 100644
index 000000000..d259a526e
--- /dev/null
+++ b/src/codegen/helpers.ts
@@ -0,0 +1,64 @@
+import fs from "fs";
+import path from "path";
+import { isAscii } from "buffer";
+
+export function fmtCPPString(str: string) {
+ return (
+ '"' +
+ str
+ .replace(/\\/g, "\\\\")
+ .replace(/"/g, '\\"')
+ .replace(/\n/g, "\\n")
+ .replace(/\r/g, "\\r")
+ .replace(/\t/g, "\\t")
+ .replace(/\?/g, "\\?") + // https://stackoverflow.com/questions/1234582
+ '"'
+ );
+}
+
+export function cap(str: string) {
+ return str[0].toUpperCase() + str.slice(1);
+}
+
+export function low(str: string) {
+ if (str.startsWith("JS")) {
+ return "js" + str.slice(2);
+ }
+
+ return str[0].toLowerCase() + str.slice(1);
+}
+
+export function readdirRecursive(root: string): string[] {
+ const files = fs.readdirSync(root, { withFileTypes: true });
+ return files.flatMap(file => {
+ const fullPath = path.join(root, file.name);
+ return file.isDirectory() ? readdirRecursive(fullPath) : fullPath;
+ });
+}
+
+export function resolveSyncOrNull(specifier: string, from: string) {
+ try {
+ return Bun.resolveSync(specifier, from);
+ } catch {
+ return null;
+ }
+}
+
+export function checkAscii(str: string) {
+ if (!isAscii(Buffer.from(str))) {
+ throw new Error(`non-ascii character in string "${str}". this will not be a valid ASCIILiteral`);
+ }
+
+ return str;
+}
+
+export function writeIfNotChanged(file: string, contents: string) {
+ if (fs.existsSync(file)) {
+ const oldContents = fs.readFileSync(file, "utf8");
+ if (oldContents === contents) {
+ return;
+ }
+ }
+
+ fs.writeFileSync(file, contents);
+}
diff --git a/src/codegen/internal-module-registry-scanner.ts b/src/codegen/internal-module-registry-scanner.ts
new file mode 100644
index 000000000..7bc2f9bdd
--- /dev/null
+++ b/src/codegen/internal-module-registry-scanner.ts
@@ -0,0 +1,92 @@
+import fs from "fs";
+import path from "path";
+import { readdirRecursive, resolveSyncOrNull } from "./helpers";
+
+export function createInternalModuleRegistry(basedir: string) {
+ const moduleList = ["bun", "node", "thirdparty", "internal"]
+ .flatMap(dir => readdirRecursive(path.join(basedir, dir)))
+ .filter(file => file.endsWith(".js") || (file.endsWith(".ts") && !file.endsWith(".d.ts")))
+ .map(file => file.slice(basedir.length + 1))
+ .sort();
+
+ // Create the Internal Module Registry
+ const internalRegistry = new Map();
+ for (let i = 0; i < moduleList.length; i++) {
+ const prefix = moduleList[i].startsWith("node/")
+ ? "node:"
+ : moduleList[i].startsWith("bun:")
+ ? "bun:"
+ : moduleList[i].startsWith("internal/")
+ ? "internal/"
+ : undefined;
+ if (prefix) {
+ const id = prefix + moduleList[i].slice(prefix.length).replaceAll(".", "/").slice(0, -3);
+ internalRegistry.set(id, i);
+ }
+ }
+
+ // Native Module registry
+ const nativeModuleH = fs.readFileSync(path.join(basedir, "../bun.js/modules/_NativeModule.h"), "utf8");
+ const nativeModuleDefine = nativeModuleH.match(/BUN_FOREACH_NATIVE_MODULE\(macro\)\s*\\\n((.*\\\n)*\n)/);
+ if (!nativeModuleDefine) {
+ throw new Error(
+ "Could not find BUN_FOREACH_NATIVE_MODULE in _NativeModule.h. Knowing native module IDs is a part of the codegen process.",
+ );
+ }
+ let nextNativeModuleId = 0;
+ const nativeModuleIds: Record<string, number> = {};
+ const nativeModuleEnums: Record<string, string> = {};
+ const nativeModuleEnumToId: Record<string, number> = {};
+ for (const [_, idString, enumValue] of nativeModuleDefine[0].matchAll(/macro\((.*?),(.*?)\)/g)) {
+ const processedIdString = JSON.parse(idString.trim().replace(/_s$/, ""));
+ const processedEnumValue = enumValue.trim();
+ const processedNumericId = nextNativeModuleId++;
+ nativeModuleIds[processedIdString] = processedNumericId;
+ nativeModuleEnums[processedIdString] = processedEnumValue;
+ nativeModuleEnumToId[processedEnumValue] = processedNumericId;
+ }
+
+ function codegenRequireId(id: string) {
+ return `(__intrinsic__getInternalField(__intrinsic__internalModuleRegistry, ${id}) || __intrinsic__createInternalModuleById(${id}))`;
+ }
+
+ function codegenRequireNativeModule(id: string) {
+ return `(__intrinsic__requireNativeModule(${id.replace(/node:/, "")}))`;
+ }
+
+ const requireTransformer = (specifier: string, from: string) => {
+ // this one is deprecated
+ if (specifier === "$shared") specifier = "./internal/shared.ts";
+
+ const directMatch = internalRegistry.get(specifier);
+ if (directMatch) return codegenRequireId(`${directMatch}/*${specifier}*/`);
+
+ if (specifier in nativeModuleIds) {
+ return codegenRequireNativeModule(JSON.stringify(specifier));
+ }
+
+ const relativeMatch =
+ resolveSyncOrNull(specifier, path.join(basedir, path.dirname(from))) ?? resolveSyncOrNull(specifier, basedir);
+
+ if (relativeMatch) {
+ const found = moduleList.indexOf(path.relative(basedir, relativeMatch));
+ if (found === -1) {
+ throw new Error(
+ `Builtin Bundler: "${specifier}" cannot be imported here because it doesn't get a module ID. Only files in "src/js" besides "src/js/builtins" can be used here. Note that the 'node:' or 'bun:' prefix is required here. `,
+ );
+ }
+ return codegenRequireId(`${found}/*${path.relative(basedir, relativeMatch)}*/`);
+ }
+
+ throw new Error(`Builtin Bundler: Could not resolve "${specifier}" in ${from}.`);
+ };
+
+ return {
+ requireTransformer,
+ nativeModuleIds,
+ nativeModuleEnums,
+ nativeModuleEnumToId,
+ internalRegistry,
+ moduleList,
+ } as const;
+}
diff --git a/src/codegen/replacements.ts b/src/codegen/replacements.ts
new file mode 100644
index 000000000..fbe7478de
--- /dev/null
+++ b/src/codegen/replacements.ts
@@ -0,0 +1,179 @@
+import { LoaderKeys } from "../api/schema";
+import { sliceSourceCode } from "./builtin-parser";
+
+// This is a list of extra syntax replacements to do. Kind of like macros
+// These are only run on code itself, not string contents or comments.
+export const replacements: ReplacementRule[] = [
+ { from: /\bthrow new TypeError\b/g, to: "$throwTypeError" },
+ { from: /\bthrow new RangeError\b/g, to: "$throwRangeError" },
+ { from: /\bthrow new OutOfMemoryError\b/g, to: "$throwOutOfMemoryError" },
+ { from: /\bnew TypeError\b/g, to: "$makeTypeError" },
+ { from: /\bexport\s*default/g, to: "$exports =" },
+];
+
+// These rules are run on the entire file, including within strings.
+export const globalReplacements: ReplacementRule[] = [
+ {
+ from: /\bnotImplementedIssue\(\s*([0-9]+)\s*,\s*((?:"[^"]*"|'[^']+'))\s*\)/g,
+ to: "new TypeError(`${$2} is not implemented yet. See https://github.com/oven-sh/bun/issues/$1`)",
+ },
+ {
+ from: /\bnotImplementedIssueFn\(\s*([0-9]+)\s*,\s*((?:"[^"]*"|'[^']+'))\s*\)/g,
+ to: "() => $throwTypeError(`${$2} is not implemented yet. See https://github.com/oven-sh/bun/issues/$1`)",
+ },
+];
+
+// This is a list of globals we should access using @ notation
+// This prevents a global override attacks.
+// Note that the public `Bun` global is immutable.
+// undefined -> __intrinsic__undefined -> @undefined
+export const globalsToPrefix = [
+ "AbortSignal",
+ "Array",
+ "ArrayBuffer",
+ "Buffer",
+ "Infinity",
+ "Loader",
+ "Promise",
+ "ReadableByteStreamController",
+ "ReadableStream",
+ "ReadableStreamBYOBReader",
+ "ReadableStreamBYOBRequest",
+ "ReadableStreamDefaultController",
+ "ReadableStreamDefaultReader",
+ "TransformStream",
+ "TransformStreamDefaultController",
+ "Uint8Array",
+ "String",
+ "Buffer",
+ "RegExp",
+ "WritableStream",
+ "WritableStreamDefaultController",
+ "WritableStreamDefaultWriter",
+ "isFinite",
+ "undefined",
+];
+
+// These enums map to $<enum>IdToLabel and $<enum>LabelToId
+// Make sure to define in ./builtins.d.ts
+export const enums = {
+ Loader: LoaderKeys,
+ ImportKind: [
+ "entry-point",
+ "import-statement",
+ "require-call",
+ "dynamic-import",
+ "require-resolve",
+ "import-rule",
+ "url-token",
+ "internal",
+ ],
+};
+
+// These identifiers have typedef but not present at runtime (converted with replacements)
+// If they are present in the bundle after runtime, we warn at the user.
+// TODO: implement this check.
+export const warnOnIdentifiersNotPresentAtRuntime = [
+ //
+ "OutOfMemoryError",
+ "notImplementedIssue",
+ "notImplementedIssueFn",
+];
+
+// These are passed to --define to the bundler
+export const define: Record<string, string> = {
+ "process.env.NODE_ENV": "production",
+ "IS_BUN_DEVELOPMENT": "false",
+
+ $streamClosed: "1",
+ $streamClosing: "2",
+ $streamErrored: "3",
+ $streamReadable: "4",
+ $streamWaiting: "5",
+ $streamWritable: "6",
+};
+
+// ------------------------------ //
+
+for (const name in enums) {
+ const value = enums[name];
+ if (typeof value !== "object") throw new Error("Invalid enum object " + name + " defined in " + import.meta.file);
+ if (typeof value === null) throw new Error("Invalid enum object " + name + " defined in " + import.meta.file);
+ const keys = Array.isArray(value) ? value : Object.keys(value).filter(k => !k.match(/^[0-9]+$/));
+ define[`$${name}IdToLabel`] = "[" + keys.map(k => `"${k}"`).join(", ") + "]";
+ define[`$${name}LabelToId`] = "{" + keys.map(k => `"${k}": ${keys.indexOf(k)}`).join(", ") + "}";
+}
+
+for (const name of globalsToPrefix) {
+ define[name] = "__intrinsic__" + name;
+}
+
+for (const key in define) {
+ if (key.startsWith("$")) {
+ define["__intrinsic__" + key.slice(1)] = define[key];
+ delete define[key];
+ }
+}
+
+export interface ReplacementRule {
+ from: RegExp;
+ to: string;
+ global?: boolean;
+}
+
+/** Applies source code replacements as defined in `replacements` */
+export function applyReplacements(src: string, length: number) {
+ let slice = src.slice(0, length);
+ let rest = src.slice(length);
+ slice = slice.replace(/([^a-zA-Z0-9_\$])\$([a-zA-Z0-9_]+\b)/gm, `$1__intrinsic__$2`);
+ for (const replacement of replacements) {
+ slice = slice.replace(replacement.from, replacement.to.replaceAll("$", "__intrinsic__"));
+ }
+ let match;
+ if ((match = slice.match(/__intrinsic__(debug|assert)$/)) && rest.startsWith("(")) {
+ const name = match[1];
+ if (name === "debug") {
+ const innerSlice = sliceSourceCode(rest, true);
+ return [
+ slice.slice(0, match.index) + "(IS_BUN_DEVELOPMENT?$debug_log" + innerSlice.result + ":void 0)",
+ innerSlice.rest,
+ true,
+ ];
+ } else if (name === "assert") {
+ const checkSlice = sliceSourceCode(rest, true, undefined, true);
+ let rest2 = checkSlice.rest;
+ let extraArgs = "";
+ if (checkSlice.result.at(-1) === ",") {
+ const sliced = sliceSourceCode("(" + rest2.slice(1), true, undefined, false);
+ extraArgs = ", " + sliced.result.slice(1, -1);
+ rest2 = sliced.rest;
+ }
+ return [
+ slice.slice(0, match.index) +
+ "(IS_BUN_DEVELOPMENT?$assert(" +
+ checkSlice.result.slice(1, -1) +
+ "," +
+ JSON.stringify(
+ checkSlice.result
+ .slice(1, -1)
+ .replace(/__intrinsic__/g, "$")
+ .trim(),
+ ) +
+ extraArgs +
+ "):void 0)",
+ rest2,
+ true,
+ ];
+ }
+ }
+ return [slice, rest, false];
+}
+
+/** Applies source code replacements as defined in `globalReplacements` */
+export function applyGlobalReplacements(src: string) {
+ let result = src;
+ for (const replacement of globalReplacements) {
+ result = result.replace(replacement.from, replacement.to.replaceAll("$", "__intrinsic__"));
+ }
+ return result;
+}