diff options
Diffstat (limited to 'packages/markdown/remark/src/highlight.ts')
-rw-r--r-- | packages/markdown/remark/src/highlight.ts | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/packages/markdown/remark/src/highlight.ts b/packages/markdown/remark/src/highlight.ts new file mode 100644 index 000000000..d4457c7d2 --- /dev/null +++ b/packages/markdown/remark/src/highlight.ts @@ -0,0 +1,102 @@ +import type { Element, Parent, Root } from 'hast'; +import { fromHtml } from 'hast-util-from-html'; +import { toText } from 'hast-util-to-text'; +import { removePosition } from 'unist-util-remove-position'; +import { visitParents } from 'unist-util-visit-parents'; + +type Highlighter = ( + code: string, + language: string, + options?: { meta?: string }, +) => Promise<Root | string>; + +const languagePattern = /\blanguage-(\S+)\b/; +// Don’t highlight math code blocks by default. +export const defaultExcludeLanguages = ['math']; + +/** + * A hast utility to syntax highlight code blocks with a given syntax highlighter. + * + * @param tree + * The hast tree in which to syntax highlight code blocks. + * @param highlighter + * A function which receives the code and language, and returns the HTML of a syntax + * highlighted `<pre>` element. + */ +export async function highlightCodeBlocks( + tree: Root, + highlighter: Highlighter, + excludeLanguages: string[] = [], +) { + const nodes: Array<{ + node: Element; + language: string; + parent: Element; + grandParent: Parent; + }> = []; + + // We’re looking for `<code>` elements + visitParents(tree, { type: 'element', tagName: 'code' }, (node, ancestors) => { + const parent = ancestors.at(-1); + + // Whose parent is a `<pre>`. + if (parent?.type !== 'element' || parent.tagName !== 'pre') { + return; + } + + // Where the `<code>` is the only child. + if (parent.children.length !== 1) { + return; + } + + // And the `<code>` has a class name that starts with `language-`. + let languageMatch: RegExpMatchArray | null | undefined; + let { className } = node.properties; + if (typeof className === 'string') { + languageMatch = languagePattern.exec(className); + } else if (Array.isArray(className)) { + for (const cls of className) { + if (typeof cls !== 'string') { + continue; + } + + languageMatch = languagePattern.exec(cls); + if (languageMatch) { + break; + } + } + } + + const language = languageMatch?.[1] || 'plaintext'; + if (excludeLanguages.includes(language) || defaultExcludeLanguages.includes(language)) { + return; + } + + nodes.push({ + node, + language, + parent, + grandParent: ancestors.at(-2)!, + }); + }); + + for (const { node, language, grandParent, parent } of nodes) { + const meta = (node.data as any)?.meta ?? node.properties.metastring ?? undefined; + const code = toText(node, { whitespace: 'pre' }); + const result = await highlighter(code, language, { meta }); + + let replacement: Element; + if (typeof result === 'string') { + // The replacement returns a root node with 1 child, the `<pre>` element replacement. + replacement = fromHtml(result, { fragment: true }).children[0] as Element; + // We just generated this node, so any positional information is invalid. + removePosition(replacement); + } else { + replacement = result.children[0] as Element; + } + + // We replace the parent in its parent with the new `<pre>` element. + const index = grandParent.children.indexOf(parent); + grandParent.children[index] = replacement; + } +} |