diff options
Diffstat (limited to 'packages/markdown/remark/src')
-rw-r--r-- | packages/markdown/remark/src/frontmatter.ts | 86 | ||||
-rw-r--r-- | packages/markdown/remark/src/highlight.ts | 102 | ||||
-rw-r--r-- | packages/markdown/remark/src/import-plugin-browser.ts | 8 | ||||
-rw-r--r-- | packages/markdown/remark/src/import-plugin-default.ts | 22 | ||||
-rw-r--r-- | packages/markdown/remark/src/index.ts | 200 | ||||
-rw-r--r-- | packages/markdown/remark/src/load-plugins.ts | 34 | ||||
-rw-r--r-- | packages/markdown/remark/src/rehype-collect-headings.ts | 140 | ||||
-rw-r--r-- | packages/markdown/remark/src/rehype-images.ts | 44 | ||||
-rw-r--r-- | packages/markdown/remark/src/rehype-prism.ts | 20 | ||||
-rw-r--r-- | packages/markdown/remark/src/rehype-shiki.ts | 32 | ||||
-rw-r--r-- | packages/markdown/remark/src/remark-collect-images.ts | 48 | ||||
-rw-r--r-- | packages/markdown/remark/src/shiki.ts | 202 | ||||
-rw-r--r-- | packages/markdown/remark/src/types.ts | 103 |
13 files changed, 1041 insertions, 0 deletions
diff --git a/packages/markdown/remark/src/frontmatter.ts b/packages/markdown/remark/src/frontmatter.ts new file mode 100644 index 000000000..16c103f73 --- /dev/null +++ b/packages/markdown/remark/src/frontmatter.ts @@ -0,0 +1,86 @@ +import yaml from 'js-yaml'; +import * as toml from 'smol-toml'; + +export function isFrontmatterValid(frontmatter: Record<string, any>) { + try { + // ensure frontmatter is JSON-serializable + JSON.stringify(frontmatter); + } catch { + return false; + } + return typeof frontmatter === 'object' && frontmatter !== null; +} + +// Capture frontmatter wrapped with `---` or `+++`, including any characters and new lines within it. +// Only capture if `---` or `+++` exists near the top of the file, including: +// 1. Start of file (including if has BOM encoding) +// 2. Start of file with any whitespace (but `---` or `+++` must still start on a new line) +const frontmatterRE = /(?:^\uFEFF?|^\s*\n)(?:---|\+\+\+)([\s\S]*?\n)(?:---|\+\+\+)/; +const frontmatterTypeRE = /(?:^\uFEFF?|^\s*\n)(---|\+\+\+)/; +export function extractFrontmatter(code: string): string | undefined { + return frontmatterRE.exec(code)?.[1]; +} + +function getFrontmatterParser(code: string): [string, (str: string) => unknown] { + return frontmatterTypeRE.exec(code)?.[1] === '+++' ? ['+++', toml.parse] : ['---', yaml.load]; +} +export interface ParseFrontmatterOptions { + /** + * How the frontmatter should be handled in the returned `content` string. + * - `preserve`: Keep the frontmatter. + * - `remove`: Remove the frontmatter. + * - `empty-with-spaces`: Replace the frontmatter with empty spaces. (preserves sourcemap line/col/offset) + * - `empty-with-lines`: Replace the frontmatter with empty line breaks. (preserves sourcemap line/col) + * + * @default 'remove' + */ + frontmatter: 'preserve' | 'remove' | 'empty-with-spaces' | 'empty-with-lines'; +} + +export interface ParseFrontmatterResult { + frontmatter: Record<string, any>; + rawFrontmatter: string; + content: string; +} + +export function parseFrontmatter( + code: string, + options?: ParseFrontmatterOptions, +): ParseFrontmatterResult { + const rawFrontmatter = extractFrontmatter(code); + + if (rawFrontmatter == null) { + return { frontmatter: {}, rawFrontmatter: '', content: code }; + } + const [delims, parser] = getFrontmatterParser(code); + const parsed = parser(rawFrontmatter); + const frontmatter = (parsed && typeof parsed === 'object' ? parsed : {}) as Record<string, any>; + + let content: string; + switch (options?.frontmatter ?? 'remove') { + case 'preserve': + content = code; + break; + case 'remove': + content = code.replace(`${delims}${rawFrontmatter}${delims}`, ''); + break; + case 'empty-with-spaces': + content = code.replace( + `${delims}${rawFrontmatter}${delims}`, + ` ${rawFrontmatter.replace(/[^\r\n]/g, ' ')} `, + ); + break; + case 'empty-with-lines': + content = code.replace( + `${delims}${rawFrontmatter}${delims}`, + rawFrontmatter.replace(/[^\r\n]/g, ''), + ); + break; + } + + return { + frontmatter, + rawFrontmatter, + content, + }; +} diff --git a/packages/markdown/remark/src/highlight.ts b/packages/markdown/remark/src/highlight.ts new file mode 100644 index 000000000..d4457c7d2 --- /dev/null +++ b/packages/markdown/remark/src/highlight.ts @@ -0,0 +1,102 @@ +import type { Element, Parent, Root } from 'hast'; +import { fromHtml } from 'hast-util-from-html'; +import { toText } from 'hast-util-to-text'; +import { removePosition } from 'unist-util-remove-position'; +import { visitParents } from 'unist-util-visit-parents'; + +type Highlighter = ( + code: string, + language: string, + options?: { meta?: string }, +) => Promise<Root | string>; + +const languagePattern = /\blanguage-(\S+)\b/; +// Don’t highlight math code blocks by default. +export const defaultExcludeLanguages = ['math']; + +/** + * A hast utility to syntax highlight code blocks with a given syntax highlighter. + * + * @param tree + * The hast tree in which to syntax highlight code blocks. + * @param highlighter + * A function which receives the code and language, and returns the HTML of a syntax + * highlighted `<pre>` element. + */ +export async function highlightCodeBlocks( + tree: Root, + highlighter: Highlighter, + excludeLanguages: string[] = [], +) { + const nodes: Array<{ + node: Element; + language: string; + parent: Element; + grandParent: Parent; + }> = []; + + // We’re looking for `<code>` elements + visitParents(tree, { type: 'element', tagName: 'code' }, (node, ancestors) => { + const parent = ancestors.at(-1); + + // Whose parent is a `<pre>`. + if (parent?.type !== 'element' || parent.tagName !== 'pre') { + return; + } + + // Where the `<code>` is the only child. + if (parent.children.length !== 1) { + return; + } + + // And the `<code>` has a class name that starts with `language-`. + let languageMatch: RegExpMatchArray | null | undefined; + let { className } = node.properties; + if (typeof className === 'string') { + languageMatch = languagePattern.exec(className); + } else if (Array.isArray(className)) { + for (const cls of className) { + if (typeof cls !== 'string') { + continue; + } + + languageMatch = languagePattern.exec(cls); + if (languageMatch) { + break; + } + } + } + + const language = languageMatch?.[1] || 'plaintext'; + if (excludeLanguages.includes(language) || defaultExcludeLanguages.includes(language)) { + return; + } + + nodes.push({ + node, + language, + parent, + grandParent: ancestors.at(-2)!, + }); + }); + + for (const { node, language, grandParent, parent } of nodes) { + const meta = (node.data as any)?.meta ?? node.properties.metastring ?? undefined; + const code = toText(node, { whitespace: 'pre' }); + const result = await highlighter(code, language, { meta }); + + let replacement: Element; + if (typeof result === 'string') { + // The replacement returns a root node with 1 child, the `<pre>` element replacement. + replacement = fromHtml(result, { fragment: true }).children[0] as Element; + // We just generated this node, so any positional information is invalid. + removePosition(replacement); + } else { + replacement = result.children[0] as Element; + } + + // We replace the parent in its parent with the new `<pre>` element. + const index = grandParent.children.indexOf(parent); + grandParent.children[index] = replacement; + } +} diff --git a/packages/markdown/remark/src/import-plugin-browser.ts b/packages/markdown/remark/src/import-plugin-browser.ts new file mode 100644 index 000000000..5fb90ad0b --- /dev/null +++ b/packages/markdown/remark/src/import-plugin-browser.ts @@ -0,0 +1,8 @@ +// This file should be imported as `#import-plugin` +import type * as unified from 'unified'; + +// In the browser, we can try to do a plain import +export async function importPlugin(p: string): Promise<unified.Plugin> { + const importResult = await import(p); + return importResult.default; +} diff --git a/packages/markdown/remark/src/import-plugin-default.ts b/packages/markdown/remark/src/import-plugin-default.ts new file mode 100644 index 000000000..2b898aa93 --- /dev/null +++ b/packages/markdown/remark/src/import-plugin-default.ts @@ -0,0 +1,22 @@ +import path from 'node:path'; +import { pathToFileURL } from 'node:url'; +// This file should be imported as `#import-plugin` +import { resolve as importMetaResolve } from 'import-meta-resolve'; +import type * as unified from 'unified'; + +let cwdUrlStr: string | undefined; + +// In non-browser environments, we can try to resolve from the filesystem too +export async function importPlugin(p: string): Promise<unified.Plugin> { + // Try import from this package first + try { + const importResult = await import(/* @vite-ignore */ p); + return importResult.default; + } catch {} + + // Try import from user project + cwdUrlStr ??= pathToFileURL(path.join(process.cwd(), 'package.json')).toString(); + const resolved = importMetaResolve(p, cwdUrlStr); + const importResult = await import(/* @vite-ignore */ resolved); + return importResult.default; +} diff --git a/packages/markdown/remark/src/index.ts b/packages/markdown/remark/src/index.ts new file mode 100644 index 000000000..1aa713956 --- /dev/null +++ b/packages/markdown/remark/src/index.ts @@ -0,0 +1,200 @@ +import type { + AstroMarkdownOptions, + AstroMarkdownProcessorOptions, + MarkdownProcessor, + SyntaxHighlightConfig, +} from './types.js'; + +import { loadPlugins } from './load-plugins.js'; +import { rehypeHeadingIds } from './rehype-collect-headings.js'; +import { rehypePrism } from './rehype-prism.js'; +import { rehypeShiki } from './rehype-shiki.js'; +import { remarkCollectImages } from './remark-collect-images.js'; + +import rehypeRaw from 'rehype-raw'; +import rehypeStringify from 'rehype-stringify'; +import remarkGfm from 'remark-gfm'; +import remarkParse from 'remark-parse'; +import remarkRehype from 'remark-rehype'; +import remarkSmartypants from 'remark-smartypants'; +import { unified } from 'unified'; +import { VFile } from 'vfile'; +import { defaultExcludeLanguages } from './highlight.js'; +import { rehypeImages } from './rehype-images.js'; +export { rehypeHeadingIds } from './rehype-collect-headings.js'; +export { remarkCollectImages } from './remark-collect-images.js'; +export { rehypePrism } from './rehype-prism.js'; +export { rehypeShiki } from './rehype-shiki.js'; +export { + isFrontmatterValid, + extractFrontmatter, + parseFrontmatter, + type ParseFrontmatterOptions, + type ParseFrontmatterResult, +} from './frontmatter.js'; +export { + createShikiHighlighter, + type ShikiHighlighter, + type CreateShikiHighlighterOptions, + type ShikiHighlighterHighlightOptions, +} from './shiki.js'; +export * from './types.js'; + +export const syntaxHighlightDefaults: Required<SyntaxHighlightConfig> = { + type: 'shiki', + excludeLangs: defaultExcludeLanguages, +}; + +export const markdownConfigDefaults: Required<AstroMarkdownOptions> = { + syntaxHighlight: syntaxHighlightDefaults, + shikiConfig: { + langs: [], + theme: 'github-dark', + themes: {}, + wrap: false, + transformers: [], + langAlias: {}, + }, + remarkPlugins: [], + rehypePlugins: [], + remarkRehype: {}, + gfm: true, + smartypants: true, +}; + +// Skip nonessential plugins during performance benchmark runs +const isPerformanceBenchmark = Boolean(process.env.ASTRO_PERFORMANCE_BENCHMARK); + +/** + * Create a markdown preprocessor to render multiple markdown files + */ +export async function createMarkdownProcessor( + opts?: AstroMarkdownProcessorOptions, +): Promise<MarkdownProcessor> { + const { + syntaxHighlight = markdownConfigDefaults.syntaxHighlight, + shikiConfig = markdownConfigDefaults.shikiConfig, + remarkPlugins = markdownConfigDefaults.remarkPlugins, + rehypePlugins = markdownConfigDefaults.rehypePlugins, + remarkRehype: remarkRehypeOptions = markdownConfigDefaults.remarkRehype, + gfm = markdownConfigDefaults.gfm, + smartypants = markdownConfigDefaults.smartypants, + experimentalHeadingIdCompat = false, + } = opts ?? {}; + + const loadedRemarkPlugins = await Promise.all(loadPlugins(remarkPlugins)); + const loadedRehypePlugins = await Promise.all(loadPlugins(rehypePlugins)); + + const parser = unified().use(remarkParse); + + // gfm and smartypants + if (!isPerformanceBenchmark) { + if (gfm) { + parser.use(remarkGfm); + } + if (smartypants) { + parser.use(remarkSmartypants); + } + } + + // User remark plugins + for (const [plugin, pluginOpts] of loadedRemarkPlugins) { + parser.use(plugin, pluginOpts); + } + + if (!isPerformanceBenchmark) { + // Apply later in case user plugins resolve relative image paths + parser.use(remarkCollectImages, opts?.image); + } + + // Remark -> Rehype + parser.use(remarkRehype, { + allowDangerousHtml: true, + passThrough: [], + ...remarkRehypeOptions, + }); + + if (syntaxHighlight && !isPerformanceBenchmark) { + const syntaxHighlightType = + typeof syntaxHighlight === 'string' ? syntaxHighlight : syntaxHighlight?.type; + const excludeLangs = + typeof syntaxHighlight === 'object' ? syntaxHighlight?.excludeLangs : undefined; + // Syntax highlighting + if (syntaxHighlightType === 'shiki') { + parser.use(rehypeShiki, shikiConfig, excludeLangs); + } else if (syntaxHighlightType === 'prism') { + parser.use(rehypePrism, excludeLangs); + } + } + + // User rehype plugins + for (const [plugin, pluginOpts] of loadedRehypePlugins) { + parser.use(plugin, pluginOpts); + } + + // Images / Assets support + parser.use(rehypeImages); + + // Headings + if (!isPerformanceBenchmark) { + parser.use(rehypeHeadingIds, { experimentalHeadingIdCompat }); + } + + // Stringify to HTML + parser.use(rehypeRaw).use(rehypeStringify, { allowDangerousHtml: true }); + + return { + async render(content, renderOpts) { + const vfile = new VFile({ + value: content, + path: renderOpts?.fileURL, + data: { + astro: { + frontmatter: renderOpts?.frontmatter ?? {}, + }, + }, + }); + + const result = await parser.process(vfile).catch((err) => { + // Ensure that the error message contains the input filename + // to make it easier for the user to fix the issue + err = prefixError(err, `Failed to parse Markdown file "${vfile.path}"`); + console.error(err); + throw err; + }); + + return { + code: String(result.value), + metadata: { + headings: result.data.astro?.headings ?? [], + localImagePaths: result.data.astro?.localImagePaths ?? [], + remoteImagePaths: result.data.astro?.remoteImagePaths ?? [], + frontmatter: result.data.astro?.frontmatter ?? {}, + }, + }; + }, + }; +} + +function prefixError(err: any, prefix: string) { + // If the error is an object with a `message` property, attempt to prefix the message + if (err?.message) { + try { + err.message = `${prefix}:\n${err.message}`; + return err; + } catch { + // Any errors here are ok, there's fallback code below + } + } + + // If that failed, create a new error with the desired message and attempt to keep the stack + const wrappedError = new Error(`${prefix}${err ? `: ${err}` : ''}`); + try { + wrappedError.stack = err.stack; + wrappedError.cause = err; + } catch { + // It's ok if we could not set the stack or cause - the message is the most important part + } + + return wrappedError; +} diff --git a/packages/markdown/remark/src/load-plugins.ts b/packages/markdown/remark/src/load-plugins.ts new file mode 100644 index 000000000..37a83827d --- /dev/null +++ b/packages/markdown/remark/src/load-plugins.ts @@ -0,0 +1,34 @@ +import type * as unified from 'unified'; +import { importPlugin as _importPlugin } from '#import-plugin'; + +async function importPlugin(p: string | unified.Plugin<any[], any>) { + if (typeof p === 'string') { + return await _importPlugin(p); + } else { + return p; + } +} + +export function loadPlugins( + items: ( + | string + | [string, any] + | unified.Plugin<any[], any> + | [unified.Plugin<any[], any>, any] + )[], +): Promise<[unified.Plugin, any?]>[] { + return items.map((p) => { + return new Promise((resolve, reject) => { + if (Array.isArray(p)) { + const [plugin, opts] = p; + return importPlugin(plugin) + .then((m) => resolve([m, opts])) + .catch((e) => reject(e)); + } + + return importPlugin(p) + .then((m) => resolve([m])) + .catch((e) => reject(e)); + }); + }); +} diff --git a/packages/markdown/remark/src/rehype-collect-headings.ts b/packages/markdown/remark/src/rehype-collect-headings.ts new file mode 100644 index 000000000..24823afc3 --- /dev/null +++ b/packages/markdown/remark/src/rehype-collect-headings.ts @@ -0,0 +1,140 @@ +import type { Expression, Super } from 'estree'; +import Slugger from 'github-slugger'; +import type { MdxTextExpression } from 'mdast-util-mdx-expression'; +import type { Node } from 'unist'; +import { visit } from 'unist-util-visit'; +import type { VFile } from 'vfile'; +import type { MarkdownHeading, RehypePlugin } from './types.js'; + +const rawNodeTypes = new Set(['text', 'raw', 'mdxTextExpression']); +const codeTagNames = new Set(['code', 'pre']); + +/** + * Rehype plugin that adds `id` attributes to headings based on their text content. + * + * @param options Optional configuration object for the plugin. + * + * @see https://docs.astro.build/en/guides/markdown-content/#heading-ids-and-plugins + */ +export function rehypeHeadingIds({ + experimentalHeadingIdCompat, +}: { experimentalHeadingIdCompat?: boolean } = {}): ReturnType<RehypePlugin> { + return function (tree, file) { + const headings: MarkdownHeading[] = []; + const frontmatter = file.data.astro?.frontmatter; + const slugger = new Slugger(); + const isMDX = isMDXFile(file); + visit(tree, (node) => { + if (node.type !== 'element') return; + const { tagName } = node; + if (tagName[0] !== 'h') return; + const [, level] = /h([0-6])/.exec(tagName) ?? []; + if (!level) return; + const depth = Number.parseInt(level); + + let text = ''; + visit(node, (child, __, parent) => { + if (child.type === 'element' || parent == null) { + return; + } + if (child.type === 'raw') { + if (/^\n?<.*>\n?$/.test(child.value)) { + return; + } + } + if (rawNodeTypes.has(child.type)) { + if (isMDX || codeTagNames.has(parent.tagName)) { + let value = child.value; + if (isMdxTextExpression(child) && frontmatter) { + const frontmatterPath = getMdxFrontmatterVariablePath(child); + if (Array.isArray(frontmatterPath) && frontmatterPath.length > 0) { + const frontmatterValue = getMdxFrontmatterVariableValue( + frontmatter, + frontmatterPath, + ); + if (typeof frontmatterValue === 'string') { + value = frontmatterValue; + } + } + } + text += value; + } else { + text += child.value.replace(/\{/g, '${'); + } + } + }); + + node.properties = node.properties || {}; + if (typeof node.properties.id !== 'string') { + let slug = slugger.slug(text); + + if (!experimentalHeadingIdCompat) { + if (slug.endsWith('-')) slug = slug.slice(0, -1); + } + + node.properties.id = slug; + } + + headings.push({ depth, slug: node.properties.id, text }); + }); + + file.data.astro ??= {}; + file.data.astro.headings = headings; + }; +} + +function isMDXFile(file: VFile) { + return Boolean(file.history[0]?.endsWith('.mdx')); +} + +/** + * Check if an ESTree entry is `frontmatter.*.VARIABLE`. + * If it is, return the variable path (i.e. `["*", ..., "VARIABLE"]`) minus the `frontmatter` prefix. + */ +function getMdxFrontmatterVariablePath(node: MdxTextExpression): string[] | Error { + if (!node.data?.estree || node.data.estree.body.length !== 1) return new Error(); + + const statement = node.data.estree.body[0]; + + // Check for "[ANYTHING].[ANYTHING]". + if (statement?.type !== 'ExpressionStatement' || statement.expression.type !== 'MemberExpression') + return new Error(); + + let expression: Expression | Super = statement.expression; + const expressionPath: string[] = []; + + // Traverse the expression, collecting the variable path. + while ( + expression.type === 'MemberExpression' && + expression.property.type === (expression.computed ? 'Literal' : 'Identifier') + ) { + expressionPath.push( + expression.property.type === 'Literal' + ? String(expression.property.value) + : expression.property.name, + ); + + expression = expression.object; + } + + // Check for "frontmatter.[ANYTHING]". + if (expression.type !== 'Identifier' || expression.name !== 'frontmatter') return new Error(); + + return expressionPath.reverse(); +} + +function getMdxFrontmatterVariableValue(frontmatter: Record<string, any>, path: string[]) { + let value = frontmatter; + + for (const key of path) { + if (!value[key]) return undefined; + + value = value[key]; + } + + return value; +} + +function isMdxTextExpression(node: Node): node is MdxTextExpression { + return node.type === 'mdxTextExpression'; +} diff --git a/packages/markdown/remark/src/rehype-images.ts b/packages/markdown/remark/src/rehype-images.ts new file mode 100644 index 000000000..92043b5e3 --- /dev/null +++ b/packages/markdown/remark/src/rehype-images.ts @@ -0,0 +1,44 @@ +import type { Properties, Root } from 'hast'; +import { visit } from 'unist-util-visit'; +import type { VFile } from 'vfile'; + +export function rehypeImages() { + return function (tree: Root, file: VFile) { + if (!file.data.astro?.localImagePaths?.length && !file.data.astro?.remoteImagePaths?.length) { + // No images to transform, nothing to do. + return; + } + + const imageOccurrenceMap = new Map(); + + visit(tree, 'element', (node) => { + if (node.tagName !== 'img') return; + if (typeof node.properties?.src !== 'string') return; + + const src = decodeURI(node.properties.src); + let newProperties: Properties; + + if (file.data.astro?.localImagePaths?.includes(src)) { + // Override the original `src` with the new, decoded `src` that Astro will better understand. + newProperties = { ...node.properties, src }; + } else if (file.data.astro?.remoteImagePaths?.includes(src)) { + newProperties = { + // By default, markdown images won't have width and height set. However, just in case another user plugin does set these, we should respect them. + inferSize: 'width' in node.properties && 'height' in node.properties ? undefined : true, + ...node.properties, + src, + }; + } else { + // Not in localImagePaths or remoteImagePaths, we should not transform. + return; + } + + // Initialize or increment occurrence count for this image + const index = imageOccurrenceMap.get(node.properties.src) || 0; + imageOccurrenceMap.set(node.properties.src, index + 1); + + // Set a special property on the image so later Astro code knows to process this image. + node.properties = { __ASTRO_IMAGE_: JSON.stringify({ ...newProperties, index }) }; + }); + }; +} diff --git a/packages/markdown/remark/src/rehype-prism.ts b/packages/markdown/remark/src/rehype-prism.ts new file mode 100644 index 000000000..887a0a4b9 --- /dev/null +++ b/packages/markdown/remark/src/rehype-prism.ts @@ -0,0 +1,20 @@ +import { runHighlighterWithAstro } from '@astrojs/prism/dist/highlighter'; +import type { Root } from 'hast'; +import type { Plugin } from 'unified'; +import { highlightCodeBlocks } from './highlight.js'; + +export const rehypePrism: Plugin<[string[]?], Root> = (excludeLangs) => { + return async (tree) => { + await highlightCodeBlocks( + tree, + (code, language) => { + let { html, classLanguage } = runHighlighterWithAstro(language, code); + + return Promise.resolve( + `<pre class="${classLanguage}" data-language="${language}"><code is:raw class="${classLanguage}">${html}</code></pre>`, + ); + }, + excludeLangs, + ); + }; +}; diff --git a/packages/markdown/remark/src/rehype-shiki.ts b/packages/markdown/remark/src/rehype-shiki.ts new file mode 100644 index 000000000..c4185eb6a --- /dev/null +++ b/packages/markdown/remark/src/rehype-shiki.ts @@ -0,0 +1,32 @@ +import type { Root } from 'hast'; +import type { Plugin } from 'unified'; +import { highlightCodeBlocks } from './highlight.js'; +import { type ShikiHighlighter, createShikiHighlighter } from './shiki.js'; +import type { ShikiConfig } from './types.js'; + +export const rehypeShiki: Plugin<[ShikiConfig, string[]?], Root> = (config, excludeLangs) => { + let highlighterAsync: Promise<ShikiHighlighter> | undefined; + + return async (tree) => { + highlighterAsync ??= createShikiHighlighter({ + langs: config?.langs, + theme: config?.theme, + themes: config?.themes, + langAlias: config?.langAlias, + }); + const highlighter = await highlighterAsync; + + await highlightCodeBlocks( + tree, + (code, language, options) => { + return highlighter.codeToHast(code, language, { + meta: options?.meta, + wrap: config?.wrap, + defaultColor: config?.defaultColor, + transformers: config?.transformers, + }); + }, + excludeLangs, + ); + }; +}; diff --git a/packages/markdown/remark/src/remark-collect-images.ts b/packages/markdown/remark/src/remark-collect-images.ts new file mode 100644 index 000000000..0fa946670 --- /dev/null +++ b/packages/markdown/remark/src/remark-collect-images.ts @@ -0,0 +1,48 @@ +import { isRemoteAllowed } from '@astrojs/internal-helpers/remote'; +import type { Root } from 'mdast'; +import { definitions } from 'mdast-util-definitions'; +import { visit } from 'unist-util-visit'; +import type { VFile } from 'vfile'; +import type { AstroMarkdownProcessorOptions } from './types.js'; + +export function remarkCollectImages(opts: AstroMarkdownProcessorOptions['image']) { + const domains = opts?.domains ?? []; + const remotePatterns = opts?.remotePatterns ?? []; + + return function (tree: Root, vfile: VFile) { + if (typeof vfile?.path !== 'string') return; + + const definition = definitions(tree); + const localImagePaths = new Set<string>(); + const remoteImagePaths = new Set<string>(); + visit(tree, (node) => { + let url: string | undefined; + if (node.type === 'image') { + url = decodeURI(node.url); + } else if (node.type === 'imageReference') { + const imageDefinition = definition(node.identifier); + if (imageDefinition) { + url = decodeURI(imageDefinition.url); + } + } + + if (!url) return; + + if (URL.canParse(url)) { + if (isRemoteAllowed(url, { domains, remotePatterns })) { + remoteImagePaths.add(url); + } + } else if (!url.startsWith('/')) { + // If: + // + not a valid URL + // + AND not an absolute path + // Then it's a local image. + localImagePaths.add(url); + } + }); + + vfile.data.astro ??= {}; + vfile.data.astro.localImagePaths = Array.from(localImagePaths); + vfile.data.astro.remoteImagePaths = Array.from(remoteImagePaths); + }; +} diff --git a/packages/markdown/remark/src/shiki.ts b/packages/markdown/remark/src/shiki.ts new file mode 100644 index 000000000..b8284c471 --- /dev/null +++ b/packages/markdown/remark/src/shiki.ts @@ -0,0 +1,202 @@ +import type { Properties, Root } from 'hast'; +import { + type BundledLanguage, + type HighlighterCoreOptions, + type LanguageRegistration, + type ShikiTransformer, + type ThemeRegistration, + type ThemeRegistrationRaw, + createCssVariablesTheme, + createHighlighter, + isSpecialLang, +} from 'shiki'; +import type { ThemePresets } from './types.js'; + +export interface ShikiHighlighter { + codeToHast( + code: string, + lang?: string, + options?: ShikiHighlighterHighlightOptions, + ): Promise<Root>; + codeToHtml( + code: string, + lang?: string, + options?: ShikiHighlighterHighlightOptions, + ): Promise<string>; +} + +export interface CreateShikiHighlighterOptions { + langs?: LanguageRegistration[]; + theme?: ThemePresets | ThemeRegistration | ThemeRegistrationRaw; + themes?: Record<string, ThemePresets | ThemeRegistration | ThemeRegistrationRaw>; + langAlias?: HighlighterCoreOptions['langAlias']; +} + +export interface ShikiHighlighterHighlightOptions { + /** + * Generate inline code element only, without the pre element wrapper. + */ + inline?: boolean; + /** + * Enable word wrapping. + * - true: enabled. + * - false: disabled. + * - null: All overflow styling removed. Code will overflow the element by default. + */ + wrap?: boolean | null; + /** + * Chooses a theme from the "themes" option that you've defined as the default styling theme. + */ + defaultColor?: 'light' | 'dark' | string | false; + /** + * Shiki transformers to customize the generated HTML by manipulating the hast tree. + */ + transformers?: ShikiTransformer[]; + /** + * Additional attributes to be added to the root code block element. + */ + attributes?: Record<string, string>; + /** + * Raw `meta` information to be used by Shiki transformers. + */ + meta?: string; +} + +let _cssVariablesTheme: ReturnType<typeof createCssVariablesTheme>; +const cssVariablesTheme = () => + _cssVariablesTheme ?? + (_cssVariablesTheme = createCssVariablesTheme({ + variablePrefix: '--astro-code-', + })); + +export async function createShikiHighlighter({ + langs = [], + theme = 'github-dark', + themes = {}, + langAlias = {}, +}: CreateShikiHighlighterOptions = {}): Promise<ShikiHighlighter> { + theme = theme === 'css-variables' ? cssVariablesTheme() : theme; + + const highlighter = await createHighlighter({ + langs: ['plaintext', ...langs], + langAlias, + themes: Object.values(themes).length ? Object.values(themes) : [theme], + }); + + async function highlight( + code: string, + lang = 'plaintext', + options: ShikiHighlighterHighlightOptions, + to: 'hast' | 'html', + ) { + const resolvedLang = langAlias[lang] ?? lang; + const loadedLanguages = highlighter.getLoadedLanguages(); + + if (!isSpecialLang(lang) && !loadedLanguages.includes(resolvedLang)) { + try { + await highlighter.loadLanguage(resolvedLang as BundledLanguage); + } catch (_err) { + const langStr = + lang === resolvedLang ? `"${lang}"` : `"${lang}" (aliased to "${resolvedLang}")`; + console.warn(`[Shiki] The language ${langStr} doesn't exist, falling back to "plaintext".`); + lang = 'plaintext'; + } + } + + code = code.replace(/(?:\r\n|\r|\n)$/, ''); + + const themeOptions = Object.values(themes).length ? { themes } : { theme }; + const inline = options?.inline ?? false; + + return highlighter[to === 'html' ? 'codeToHtml' : 'codeToHast'](code, { + ...themeOptions, + defaultColor: options.defaultColor, + lang, + // NOTE: while we can spread `options.attributes` here so that Shiki can auto-serialize this as rendered + // attributes on the top-level tag, it's not clear whether it is fine to pass all attributes as meta, as + // they're technically not meta, nor parsed from Shiki's `parseMetaString` API. + meta: options?.meta ? { __raw: options?.meta } : undefined, + transformers: [ + { + pre(node) { + // Swap to `code` tag if inline + if (inline) { + node.tagName = 'code'; + } + + const { + class: attributesClass, + style: attributesStyle, + ...rest + } = options?.attributes ?? {}; + Object.assign(node.properties, rest); + + const classValue = + (normalizePropAsString(node.properties.class) ?? '') + + (attributesClass ? ` ${attributesClass}` : ''); + const styleValue = + (normalizePropAsString(node.properties.style) ?? '') + + (attributesStyle ? `; ${attributesStyle}` : ''); + + // Replace "shiki" class naming with "astro-code" + node.properties.class = classValue.replace(/shiki/g, 'astro-code'); + + // Add data-language attribute + node.properties.dataLanguage = lang; + + // Handle code wrapping + // if wrap=null, do nothing. + if (options.wrap === false || options.wrap === undefined) { + node.properties.style = styleValue + '; overflow-x: auto;'; + } else if (options.wrap === true) { + node.properties.style = + styleValue + '; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;'; + } + }, + line(node) { + // Add "user-select: none;" for "+"/"-" diff symbols. + // Transform `<span class="line"><span style="...">+ something</span></span> + // into `<span class="line"><span style="..."><span style="user-select: none;">+</span> something</span></span>` + if (resolvedLang === 'diff') { + const innerSpanNode = node.children[0]; + const innerSpanTextNode = + innerSpanNode?.type === 'element' && innerSpanNode.children?.[0]; + + if (innerSpanTextNode && innerSpanTextNode.type === 'text') { + const start = innerSpanTextNode.value[0]; + if (start === '+' || start === '-') { + innerSpanTextNode.value = innerSpanTextNode.value.slice(1); + innerSpanNode.children.unshift({ + type: 'element', + tagName: 'span', + properties: { style: 'user-select: none;' }, + children: [{ type: 'text', value: start }], + }); + } + } + } + }, + code(node) { + if (inline) { + return node.children[0] as typeof node; + } + }, + }, + ...(options.transformers ?? []), + ], + }); + } + + return { + codeToHast(code, lang, options = {}) { + return highlight(code, lang, options, 'hast') as Promise<Root>; + }, + codeToHtml(code, lang, options = {}) { + return highlight(code, lang, options, 'html') as Promise<string>; + }, + }; +} + +function normalizePropAsString(value: Properties[string]): string | null { + return Array.isArray(value) ? value.join(' ') : (value as string | null); +} diff --git a/packages/markdown/remark/src/types.ts b/packages/markdown/remark/src/types.ts new file mode 100644 index 000000000..57d7ca77d --- /dev/null +++ b/packages/markdown/remark/src/types.ts @@ -0,0 +1,103 @@ +import type { RemotePattern } from '@astrojs/internal-helpers/remote'; +import type * as hast from 'hast'; +import type * as mdast from 'mdast'; +import type { Options as RemarkRehypeOptions } from 'remark-rehype'; +import type { BuiltinTheme } from 'shiki'; +import type * as unified from 'unified'; +import type { CreateShikiHighlighterOptions, ShikiHighlighterHighlightOptions } from './shiki.js'; + +export type { Node } from 'unist'; + +declare module 'vfile' { + interface DataMap { + astro: { + headings?: MarkdownHeading[]; + localImagePaths?: string[]; + remoteImagePaths?: string[]; + frontmatter?: Record<string, any>; + }; + } +} + +export type RemarkPlugin<PluginParameters extends any[] = any[]> = unified.Plugin< + PluginParameters, + mdast.Root +>; + +export type RemarkPlugins = (string | [string, any] | RemarkPlugin | [RemarkPlugin, any])[]; + +export type RehypePlugin<PluginParameters extends any[] = any[]> = unified.Plugin< + PluginParameters, + hast.Root +>; + +export type RehypePlugins = (string | [string, any] | RehypePlugin | [RehypePlugin, any])[]; + +export type RemarkRehype = RemarkRehypeOptions; + +export type ThemePresets = BuiltinTheme | 'css-variables'; + +export type SyntaxHighlightConfigType = 'shiki' | 'prism'; + +export interface SyntaxHighlightConfig { + type: SyntaxHighlightConfigType; + excludeLangs?: string[]; +} + +export interface ShikiConfig + extends Pick<CreateShikiHighlighterOptions, 'langs' | 'theme' | 'themes' | 'langAlias'>, + Pick<ShikiHighlighterHighlightOptions, 'defaultColor' | 'wrap' | 'transformers'> {} + +/** + * Configuration options that end up in the markdown section of AstroConfig + */ +export interface AstroMarkdownOptions { + syntaxHighlight?: SyntaxHighlightConfig | SyntaxHighlightConfigType | false; + shikiConfig?: ShikiConfig; + remarkPlugins?: RemarkPlugins; + rehypePlugins?: RehypePlugins; + remarkRehype?: RemarkRehype; + gfm?: boolean; + smartypants?: boolean; +} + +/** + * Extra configuration options from other parts of AstroConfig that get injected into this plugin + */ +export interface AstroMarkdownProcessorOptions extends AstroMarkdownOptions { + image?: { + domains?: string[]; + remotePatterns?: RemotePattern[]; + }; + experimentalHeadingIdCompat?: boolean; +} + +export interface MarkdownProcessor { + render: ( + content: string, + opts?: MarkdownProcessorRenderOptions, + ) => Promise<MarkdownProcessorRenderResult>; +} + +export interface MarkdownProcessorRenderOptions { + /** @internal */ + fileURL?: URL; + /** Used for frontmatter injection plugins */ + frontmatter?: Record<string, any>; +} + +export interface MarkdownProcessorRenderResult { + code: string; + metadata: { + headings: MarkdownHeading[]; + localImagePaths: string[]; + remoteImagePaths: string[]; + frontmatter: Record<string, any>; + }; +} + +export interface MarkdownHeading { + depth: number; + slug: string; + text: string; +} |