aboutsummaryrefslogtreecommitdiff
path: root/packages/markdown/remark/src/highlight.ts
diff options
context:
space:
mode:
Diffstat (limited to 'packages/markdown/remark/src/highlight.ts')
-rw-r--r--packages/markdown/remark/src/highlight.ts102
1 files changed, 102 insertions, 0 deletions
diff --git a/packages/markdown/remark/src/highlight.ts b/packages/markdown/remark/src/highlight.ts
new file mode 100644
index 000000000..d4457c7d2
--- /dev/null
+++ b/packages/markdown/remark/src/highlight.ts
@@ -0,0 +1,102 @@
+import type { Element, Parent, Root } from 'hast';
+import { fromHtml } from 'hast-util-from-html';
+import { toText } from 'hast-util-to-text';
+import { removePosition } from 'unist-util-remove-position';
+import { visitParents } from 'unist-util-visit-parents';
+
+type Highlighter = (
+ code: string,
+ language: string,
+ options?: { meta?: string },
+) => Promise<Root | string>;
+
+const languagePattern = /\blanguage-(\S+)\b/;
+// Don’t highlight math code blocks by default.
+export const defaultExcludeLanguages = ['math'];
+
+/**
+ * A hast utility to syntax highlight code blocks with a given syntax highlighter.
+ *
+ * @param tree
+ * The hast tree in which to syntax highlight code blocks.
+ * @param highlighter
+ * A function which receives the code and language, and returns the HTML of a syntax
+ * highlighted `<pre>` element.
+ */
+export async function highlightCodeBlocks(
+ tree: Root,
+ highlighter: Highlighter,
+ excludeLanguages: string[] = [],
+) {
+ const nodes: Array<{
+ node: Element;
+ language: string;
+ parent: Element;
+ grandParent: Parent;
+ }> = [];
+
+ // We’re looking for `<code>` elements
+ visitParents(tree, { type: 'element', tagName: 'code' }, (node, ancestors) => {
+ const parent = ancestors.at(-1);
+
+ // Whose parent is a `<pre>`.
+ if (parent?.type !== 'element' || parent.tagName !== 'pre') {
+ return;
+ }
+
+ // Where the `<code>` is the only child.
+ if (parent.children.length !== 1) {
+ return;
+ }
+
+ // And the `<code>` has a class name that starts with `language-`.
+ let languageMatch: RegExpMatchArray | null | undefined;
+ let { className } = node.properties;
+ if (typeof className === 'string') {
+ languageMatch = languagePattern.exec(className);
+ } else if (Array.isArray(className)) {
+ for (const cls of className) {
+ if (typeof cls !== 'string') {
+ continue;
+ }
+
+ languageMatch = languagePattern.exec(cls);
+ if (languageMatch) {
+ break;
+ }
+ }
+ }
+
+ const language = languageMatch?.[1] || 'plaintext';
+ if (excludeLanguages.includes(language) || defaultExcludeLanguages.includes(language)) {
+ return;
+ }
+
+ nodes.push({
+ node,
+ language,
+ parent,
+ grandParent: ancestors.at(-2)!,
+ });
+ });
+
+ for (const { node, language, grandParent, parent } of nodes) {
+ const meta = (node.data as any)?.meta ?? node.properties.metastring ?? undefined;
+ const code = toText(node, { whitespace: 'pre' });
+ const result = await highlighter(code, language, { meta });
+
+ let replacement: Element;
+ if (typeof result === 'string') {
+ // The replacement returns a root node with 1 child, the `<pre>` element replacement.
+ replacement = fromHtml(result, { fragment: true }).children[0] as Element;
+ // We just generated this node, so any positional information is invalid.
+ removePosition(replacement);
+ } else {
+ replacement = result.children[0] as Element;
+ }
+
+ // We replace the parent in its parent with the new `<pre>` element.
+ const index = grandParent.children.indexOf(parent);
+ grandParent.children[index] = replacement;
+ }
+}