diff options
Diffstat (limited to 'src/js/_codegen')
-rw-r--r-- | src/js/_codegen/build-functions.ts | 90 | ||||
-rw-r--r-- | src/js/_codegen/build-modules.ts | 34 | ||||
-rw-r--r-- | src/js/_codegen/builtin-parser.ts | 6 | ||||
-rw-r--r-- | src/js/_codegen/client-js.ts | 17 | ||||
-rw-r--r-- | src/js/_codegen/helpers.ts | 9 | ||||
-rw-r--r-- | src/js/_codegen/replacements.ts | 55 | ||||
-rw-r--r-- | src/js/_codegen/static-hash-tables.ts | 43 |
7 files changed, 168 insertions, 86 deletions
diff --git a/src/js/_codegen/build-functions.ts b/src/js/_codegen/build-functions.ts index 761682d44..275cd2e1a 100644 --- a/src/js/_codegen/build-functions.ts +++ b/src/js/_codegen/build-functions.ts @@ -1,40 +1,9 @@ -import { existsSync, mkdirSync, readdirSync, rmSync, writeFileSync } from "fs"; +import { existsSync, mkdirSync, readdirSync, rmSync } from "fs"; import path from "path"; import { sliceSourceCode } from "./builtin-parser"; -import { applyGlobalReplacements, enums, globalsToPrefix } from "./replacements"; +import { applyGlobalReplacements, define } from "./replacements"; import { cap, fmtCPPString, low } from "./helpers"; -import { spawn } from "bun"; - -async function createStaticHashtables() { - const STATIC_HASH_TABLES = ["src/bun.js/bindings/Process.cpp", "src/bun.js/bindings/BunObject.cpp"]; - console.time("Creating static hash tables..."); - const create_hash_table = path.join(import.meta.dir, "../../../src/bun.js/scripts/create_hash_table"); - if (!create_hash_table) { - console.warn( - "Could not find create_hash_table executable. Run `bun i` or clone webkit to build static hash tables", - ); - return; - } - for (let cpp of STATIC_HASH_TABLES) { - cpp = path.join(import.meta.dir, "../../../", cpp); - const { stdout, exited } = spawn({ - cmd: [create_hash_table, cpp], - stdout: "pipe", - stderr: "inherit", - }); - await exited; - let str = await new Response(stdout).text(); - str = str.replaceAll(/^\/\/.*$/gm, ""); - str = str.replaceAll(/^#include.*$/gm, ""); - str = str.replaceAll(`namespace JSC {`, ""); - str = str.replaceAll(`} // namespace JSC`, ""); - str = "// File generated via `make generate-builtins`\n" + str.trim() + "\n"; - await Bun.write(cpp.replace(/\.cpp$/, ".lut.h"), str); - } - console.timeEnd("Creating static hash tables..."); -} -const staticHashTablePromise = createStaticHashtables(); console.log("Bundling Bun builtin functions..."); const MINIFY = process.argv.includes("--minify") || process.argv.includes("-m"); @@ -48,24 +17,6 @@ const TMP_DIR = path.join(SRC_DIR, "../out/tmp/builtins"); if (existsSync(TMP_DIR)) rmSync(TMP_DIR, { recursive: true }); mkdirSync(TMP_DIR, { recursive: true }); -const define = { - "process.env.NODE_ENV": "production", - "IS_BUN_DEVELOPMENT": "false", -}; - -for (const name in enums) { - const value = enums[name]; - if (typeof value !== "object") throw new Error("Invalid enum object " + name + " defined in " + import.meta.file); - if (typeof value === null) throw new Error("Invalid enum object " + name + " defined in " + import.meta.file); - const keys = Array.isArray(value) ? value : Object.keys(value).filter(k => !k.match(/^[0-9]+$/)); - define[`__intrinsic__${name}IdToLabel`] = "[" + keys.map(k => `"${k}"`).join(", ") + "]"; - define[`__intrinsic__${name}LabelToId`] = "{" + keys.map(k => `"${k}": ${keys.indexOf(k)}`).join(", ") + "}"; -} - -for (const name of globalsToPrefix) { - define[name] = "__intrinsic__" + name; -} - interface ParsedBuiltin { name: string; params: string[]; @@ -77,9 +28,9 @@ interface BundledBuiltin { name: string; directives: Record<string, any>; isGetter: boolean; - isConstructor: boolean; + constructAbility: string; + constructKind: string; isLinkTimeConstant: boolean; - isNakedConstructor: boolean; intrinsic: string; overriddenName: string; source: string; @@ -146,12 +97,13 @@ async function processFileSplit(filename: string): Promise<{ functions: BundledB throw new SyntaxError("Could not parse directive value " + directive[2] + " (must be JSON parsable)"); } if (name === "constructor") { - throw new SyntaxError("$constructor not implemented"); - } - if (name === "nakedConstructor") { - throw new SyntaxError("$nakedConstructor not implemented"); + directives.ConstructAbility = "CanConstruct"; + } else if (name === "nakedConstructor") { + directives.ConstructAbility = "CanConstruct"; + directives.ConstructKind = "Naked"; + } else { + directives[name] = value; } - directives[name] = value; contents = contents.slice(directive[0].length); } else if (match[1] === "export function" || match[1] === "export async function") { const declaration = contents.match( @@ -218,7 +170,7 @@ $$capture_start$$(${fn.async ? "async " : ""}${ const build = await Bun.build({ entrypoints: [tmpFile], define, - minify: { syntax: true, whitespace: true }, + minify: { syntax: true, whitespace: false }, }); if (!build.success) { throw new AggregateError(build.logs, "Failed bundling builtin function " + fn.name + " from " + basename + ".ts"); @@ -231,7 +183,7 @@ $$capture_start$$(${fn.async ? "async " : ""}${ const finalReplacement = (fn.directives.sloppy ? captured : captured.replace(/function\s*\(.*?\)\s*{/, '$&"use strict";')) .replace(/^\((async )?function\(/, "($1function (") - .replace(/__intrinsic__lazy\(/g, "globalThis[globalThis.Symbol.for('Bun.lazy')](") + // .replace(/__intrinsic__lazy\(/g, "globalThis[globalThis.Symbol.for('Bun.lazy')](") .replace(/__intrinsic__/g, "@") + "\n"; bundledFunctions.push({ @@ -241,9 +193,9 @@ $$capture_start$$(${fn.async ? "async " : ""}${ params: fn.params, visibility: fn.directives.visibility ?? (fn.directives.linkTimeConstant ? "Private" : "Public"), isGetter: !!fn.directives.getter, - isConstructor: !!fn.directives.constructor, + constructAbility: fn.directives.ConstructAbility ?? "CannotConstruct", + constructKind: fn.directives.ConstructKind ?? "None", isLinkTimeConstant: !!fn.directives.linkTimeConstant, - isNakedConstructor: !!fn.directives.nakedConstructor, intrinsic: fn.directives.intrinsic ?? "NoIntrinsic", overriddenName: fn.directives.getter ? `"get ${fn.name}"_s` @@ -254,12 +206,14 @@ $$capture_start$$(${fn.async ? "async " : ""}${ } return { - functions: bundledFunctions, + functions: bundledFunctions.sort((a, b) => a.name.localeCompare(b.name)), internal, }; } -const filesToProcess = readdirSync(SRC_DIR).filter(x => x.endsWith(".ts") && !x.endsWith(".d.ts")); +const filesToProcess = readdirSync(SRC_DIR) + .filter(x => x.endsWith(".ts") && !x.endsWith(".d.ts")) + .sort(); const files: Array<{ basename: string; functions: BundledBuiltin[]; internal: boolean }> = []; async function processFile(x: string) { @@ -305,8 +259,8 @@ for (const { basename, functions } of files) { for (const fn of functions) { const name = `${lowerBasename}${cap(fn.name)}Code`; bundledCPP += `// ${fn.name} -const JSC::ConstructAbility s_${name}ConstructAbility = JSC::ConstructAbility::CannotConstruct; -const JSC::ConstructorKind s_${name}ConstructorKind = JSC::ConstructorKind::None; +const JSC::ConstructAbility s_${name}ConstructAbility = JSC::ConstructAbility::${fn.constructAbility}; +const JSC::ConstructorKind s_${name}ConstructorKind = JSC::ConstructorKind::${fn.constructKind}; const JSC::ImplementationVisibility s_${name}ImplementationVisibility = JSC::ImplementationVisibility::${fn.visibility}; const int s_${name}Length = ${fn.source.length}; static const JSC::Intrinsic s_${name}Intrinsic = JSC::NoIntrinsic; @@ -459,7 +413,7 @@ public: explicit ${basename}BuiltinsWrapper(JSC::VM& vm) : m_vm(vm) WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_FUNCTION_NAME(INITIALIZE_BUILTIN_NAMES) -#define INITIALIZE_BUILTIN_SOURCE_MEMBERS(name, functionName, overriddenName, length) , m_##name##Source(JSC::makeSource(StringImpl::createWithoutCopying(s_##name, length), { })) +#define INITIALIZE_BUILTIN_SOURCE_MEMBERS(name, functionName, overriddenName, length) , m_##name##Source(JSC::makeSource(StringImpl::createWithoutCopying(s_##name, length), { }, JSC::SourceTaintedOrigin::Untainted)) WEBCORE_FOREACH_${basename.toUpperCase()}_BUILTIN_CODE(INITIALIZE_BUILTIN_SOURCE_MEMBERS) #undef INITIALIZE_BUILTIN_SOURCE_MEMBERS { @@ -652,8 +606,6 @@ if (!KEEP_TMP) { await rmSync(TMP_DIR, { recursive: true }); } -await staticHashTablePromise; - console.log( `Embedded JS size: %s bytes (across %s functions, %s files)`, totalJSSize, diff --git a/src/js/_codegen/build-modules.ts b/src/js/_codegen/build-modules.ts index 3443db6f6..3591d812e 100644 --- a/src/js/_codegen/build-modules.ts +++ b/src/js/_codegen/build-modules.ts @@ -1,10 +1,11 @@ import fs from "fs"; import path from "path"; import { sliceSourceCode } from "./builtin-parser"; -import { cap, fmtCPPString, readdirRecursive, resolveSyncOrNull } from "./helpers"; +import { cap, checkAscii, fmtCPPString, readdirRecursive, resolveSyncOrNull } from "./helpers"; import { createAssertClientJS, createLogClientJS } from "./client-js"; import { builtinModules } from "node:module"; import { BuildConfig } from "bun"; +import { define } from "./replacements"; const t = new Bun.Transpiler({ loader: "tsx" }); @@ -28,7 +29,13 @@ const internalRegistry = new Map(); // Build Registry for (let i = 0; i < moduleList.length; i++) { - const prefix = moduleList[i].startsWith("node/") ? "node:" : moduleList[i].startsWith("bun:") ? "bun/" : undefined; + const prefix = moduleList[i].startsWith("node/") + ? "node:" + : moduleList[i].startsWith("bun:") + ? "bun/" + : moduleList[i].startsWith("internal/") + ? "internal/" + : undefined; if (prefix) { const id = prefix + moduleList[i].slice(prefix.length).replaceAll(".", "/").slice(0, -3); internalRegistry.set(id, i); @@ -90,7 +97,7 @@ globalThis.requireTransformer = (specifier: string, from: string) => { return codegenRequireId(`${found}/*${path.relative(BASE, relativeMatch)}*/`); } - throw new Error(`Builtin Bundler: Could not resolve "${specifier}" in ${from}. These cannot be relative.`); + throw new Error(`Builtin Bundler: Could not resolve "${specifier}" in ${from}.`); }; // Preprocess builtins @@ -173,6 +180,7 @@ const config = ({ platform, debug }: { platform: string; debug?: boolean }) => target: "bun", external: builtinModules, define: { + ...define, IS_BUN_DEVELOPMENT: String(!!debug), __intrinsic__debug: debug ? "$debug_log_enabled" : "false", "process.platform": JSON.stringify(platform), @@ -222,7 +230,7 @@ for (const [name, bundle, outputs] of [ .replace(/\$\$EXPORT\$\$\((.*)\).\$\$EXPORT_END\$\$;/, "return $1") .replace(/]\s*,\s*__(debug|assert)_end__\)/g, ")") .replace(/]\s*,\s*__debug_end__\)/g, ")") - .replace(/__intrinsic__lazy\(/g, "globalThis[globalThis.Symbol.for('Bun.lazy')](") + // .replace(/__intrinsic__lazy\(/g, "globalThis[globalThis.Symbol.for('Bun.lazy')](") .replace(/import.meta.require\((.*?)\)/g, (expr, specifier) => { try { const str = JSON.parse(specifier); @@ -325,6 +333,9 @@ JSValue InternalModuleRegistry::createInternalModuleById(JSGlobalObject* globalO // This header is used by InternalModuleRegistry.cpp, and should only be included in that file. // It inlines all the strings for the module IDs. +// +// We cannot use ASCIILiteral's `_s` operator for the module source code because for long +// strings it fails a constexpr assert. Instead, we do that assert in JS before we format the string fs.writeFileSync( path.join(BASE, "out/InternalModuleRegistryConstants.h"), `// clang-format off @@ -338,7 +349,9 @@ namespace InternalModuleRegistryConstants { .map( (id, n) => `// -static constexpr ASCIILiteral ${idToEnumName(id)}Code = ${fmtCPPString(bundledOutputs.darwin.get(id.slice(0, -3)))}_s; +static constexpr ASCIILiteral ${idToEnumName(id)}Code = ASCIILiteral::fromLiteralUnsafe(${fmtCPPString( + checkAscii(bundledOutputs.darwin.get(id.slice(0, -3))), + )}); // `, ) @@ -348,7 +361,9 @@ static constexpr ASCIILiteral ${idToEnumName(id)}Code = ${fmtCPPString(bundledOu .map( (id, n) => `// -static constexpr ASCIILiteral ${idToEnumName(id)}Code = ${fmtCPPString(bundledOutputs.win32.get(id.slice(0, -3)))}_s; +static constexpr ASCIILiteral ${idToEnumName(id)}Code = ASCIILiteral::fromLiteralUnsafe(${fmtCPPString( + checkAscii(bundledOutputs.win32.get(id.slice(0, -3))), + )}); // `, ) @@ -359,7 +374,9 @@ static constexpr ASCIILiteral ${idToEnumName(id)}Code = ${fmtCPPString(bundledOu .map( (id, n) => `// -static constexpr ASCIILiteral ${idToEnumName(id)}Code = ${fmtCPPString(bundledOutputs.linux.get(id.slice(0, -3)))}_s; +static constexpr ASCIILiteral ${idToEnumName(id)}Code = ASCIILiteral::fromLiteralUnsafe(${fmtCPPString( + checkAscii(bundledOutputs.linux.get(id.slice(0, -3))), + )}); // `, ) @@ -373,7 +390,8 @@ static constexpr ASCIILiteral ${idToEnumName(id)}Code = ${fmtCPPString(bundledOu // This is a generated enum for zig code (exports.zig) fs.writeFileSync( path.join(BASE, "out/ResolvedSourceTag.zig"), - `pub const ResolvedSourceTag = enum(u32) { + `// zig fmt: off +pub const ResolvedSourceTag = enum(u32) { // Predefined javascript = 0, package_json_type_module = 1, diff --git a/src/js/_codegen/builtin-parser.ts b/src/js/_codegen/builtin-parser.ts index ffd5671c1..4e35f13dd 100644 --- a/src/js/_codegen/builtin-parser.ts +++ b/src/js/_codegen/builtin-parser.ts @@ -79,12 +79,14 @@ export function sliceSourceCode( i = 1; } else if (endOnComma && contents.startsWith(",")) { if (bracketCount <= 1) { - result += ","; contents = contents.slice(1); - // if the next non-whitespace character is ), also consume + // if the next non-whitespace character is ), we will treat it like a ) let match = contents.match(/^\s*\)/); if (match) { contents = contents.slice(match[0].length); + result += ")"; + } else { + result += ","; } break; } diff --git a/src/js/_codegen/client-js.ts b/src/js/_codegen/client-js.ts index 849240c1f..4dfa6acf6 100644 --- a/src/js/_codegen/client-js.ts +++ b/src/js/_codegen/client-js.ts @@ -15,10 +15,11 @@ let $debug_log_enabled = ((env) => ( .split(/[-_./]/g) .join("_") .toUpperCase()}) -))(@Bun.env); +))(Bun.env); +let $debug_pid_prefix = Bun.env.SHOW_PID === '1'; let $debug_log = $debug_log_enabled ? (...args) => { // warn goes to stderr without colorizing - console.warn(Bun.enableANSIColors ? '\\x1b[90m[${publicName}]\\x1b[0m' : '[${publicName}]', ...args); + console.warn(($debug_pid_prefix ? \`[\${process.pid}] \` : '') + (Bun.enableANSIColors ? '\\x1b[90m[${publicName}]\\x1b[0m' : '[${publicName}]'), ...args); } : () => {}; `; } @@ -27,10 +28,18 @@ export function createAssertClientJS(publicName: string) { return ` let $assert = function(check, sourceString, ...message) { if (!check) { - console.error('[${publicName}] ASSERTION FAILED: ' + sourceString); - if(message.length)console.warn (' ${" ".repeat(publicName.length)}', ...message); + const prevPrepareStackTrace = Error.prepareStackTrace; + Error.prepareStackTrace = (e, stack) => { + return e.name + ': ' + e.message + '\\n' + stack.slice(1).map(x => ' at ' + x.toString()).join('\\n'); + }; const e = new Error(sourceString); + e.stack; // materialize stack e.name = 'AssertionError'; + Error.prepareStackTrace = prevPrepareStackTrace; + console.error('[${publicName}] ASSERTION FAILED: ' + sourceString); + if (message.length) console.warn(...message); + console.warn(e.stack.split('\\n')[1] + '\\n'); + if (Bun.env.ASSERT === 'CRASH') process.exit(0xAA); throw e; } } diff --git a/src/js/_codegen/helpers.ts b/src/js/_codegen/helpers.ts index 7f0c29ba0..74680b956 100644 --- a/src/js/_codegen/helpers.ts +++ b/src/js/_codegen/helpers.ts @@ -1,5 +1,6 @@ import fs from "fs"; import path from "path"; +import { isAscii } from "buffer"; export function fmtCPPString(str: string) { return ( @@ -42,3 +43,11 @@ export function resolveSyncOrNull(specifier: string, from: string) { return null; } } + +export function checkAscii(str: string) { + if (!isAscii(Buffer.from(str))) { + throw new Error(`non-ascii character in string "${str}". this will not be a valid ASCIILiteral`); + } + + return str; +} diff --git a/src/js/_codegen/replacements.ts b/src/js/_codegen/replacements.ts index 4621d6134..45f2426b5 100644 --- a/src/js/_codegen/replacements.ts +++ b/src/js/_codegen/replacements.ts @@ -24,13 +24,14 @@ export const globalReplacements: ReplacementRule[] = [ ]; // This is a list of globals we should access using @ notation +// This prevents a global override attacks. +// Note that the public `Bun` global is immutable. // undefined -> __intrinsic__undefined -> @undefined export const globalsToPrefix = [ "AbortSignal", "Array", "ArrayBuffer", "Buffer", - "Bun", "Infinity", "Loader", "Promise", @@ -79,6 +80,41 @@ export const warnOnIdentifiersNotPresentAtRuntime = [ "notImplementedIssueFn", ]; +// These are passed to --define to the bundler +export const define: Record<string, string> = { + "process.env.NODE_ENV": "production", + "IS_BUN_DEVELOPMENT": "false", + + $streamClosed: "1", + $streamClosing: "2", + $streamErrored: "3", + $streamReadable: "4", + $streamWaiting: "5", + $streamWritable: "6", +}; + +// ------------------------------ // + +for (const name in enums) { + const value = enums[name]; + if (typeof value !== "object") throw new Error("Invalid enum object " + name + " defined in " + import.meta.file); + if (typeof value === null) throw new Error("Invalid enum object " + name + " defined in " + import.meta.file); + const keys = Array.isArray(value) ? value : Object.keys(value).filter(k => !k.match(/^[0-9]+$/)); + define[`$${name}IdToLabel`] = "[" + keys.map(k => `"${k}"`).join(", ") + "]"; + define[`$${name}LabelToId`] = "{" + keys.map(k => `"${k}": ${keys.indexOf(k)}`).join(", ") + "}"; +} + +for (const name of globalsToPrefix) { + define[name] = "__intrinsic__" + name; +} + +for (const key in define) { + if (key.startsWith("$")) { + define["__intrinsic__" + key.slice(1)] = define[key]; + delete define[key]; + } +} + export interface ReplacementRule { from: RegExp; to: string; @@ -105,14 +141,27 @@ export function applyReplacements(src: string, length: number) { ]; } else if (name === "assert") { const checkSlice = sliceSourceCode(rest, true, undefined, true); + let rest2 = checkSlice.rest; + let extraArgs = ""; + if (checkSlice.result.at(-1) === ",") { + const sliced = sliceSourceCode("(" + rest2.slice(1), true, undefined, false); + extraArgs = ", " + sliced.result.slice(1, -1); + rest2 = sliced.rest; + } return [ slice.slice(0, match.index) + "(IS_BUN_DEVELOPMENT?$assert(" + checkSlice.result.slice(1, -1) + "," + - JSON.stringify(checkSlice.result.slice(1, -1).replace(/__intrinsic__/g, "$")) + + JSON.stringify( + checkSlice.result + .slice(1, -1) + .replace(/__intrinsic__/g, "$") + .trim(), + ) + + extraArgs + "):void 0)", - checkSlice.rest, + rest2, true, ]; } diff --git a/src/js/_codegen/static-hash-tables.ts b/src/js/_codegen/static-hash-tables.ts new file mode 100644 index 000000000..858974aa1 --- /dev/null +++ b/src/js/_codegen/static-hash-tables.ts @@ -0,0 +1,43 @@ +// TODO: move this file somewhere else. it doesnt make sense in src/js/ +// it generates C++ code not related to javascript at all +import { spawn } from "bun"; +import path from "../node/path"; + +const STATIC_HASH_TABLES = [ + // + "src/bun.js/bindings/BunObject.cpp", + "src/bun.js/bindings/ZigGlobalObject.lut.txt", + "src/bun.js/bindings/JSBuffer.cpp", + "src/bun.js/bindings/Process.cpp", + "src/bun.js/bindings/ProcessBindingConstants.cpp", + "src/bun.js/bindings/ProcessBindingNatives.cpp", +]; + +console.time("Creating static hash tables..."); +const create_hash_table = path.join(import.meta.dir, "../../../src/bun.js/scripts/create_hash_table"); +if (!create_hash_table) { + console.warn("Could not find create_hash_table executable. Run `bun i` or clone webkit to build static hash tables"); + process.exit(1); +} + +await Promise.all( + STATIC_HASH_TABLES.map(async cpp => { + cpp = path.join(import.meta.dir, "../../../", cpp); + const { stdout, exited } = spawn({ + cmd: [create_hash_table, cpp], + stdout: "pipe", + stderr: "inherit", + }); + await exited; + let str = await new Response(stdout).text(); + str = str.replaceAll(/^\/\/.*$/gm, ""); + str = str.replaceAll(/^#include.*$/gm, ""); + str = str.replaceAll(`namespace JSC {`, ""); + str = str.replaceAll(`} // namespace JSC`, ""); + str = "// File generated via `make static-hash-table` / `make cpp`\n" + str.trim() + "\n"; + await Bun.write(cpp.replace(/\.cpp$/, ".lut.h").replace(/(\.lut)?\.txt$/, ".lut.h"), str); + console.log("Wrote", path.relative(process.cwd(), cpp.replace(/\.cpp$/, ".lut.h"))); + }), +); + +console.timeEnd("Creating static hash tables..."); |