packages/astro-prism/src/plugin.ts


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187

export function addAstro(Prism: typeof import('prismjs')) {
	if (Prism.languages.astro) {
		return;
	}

	let scriptLang: string;
	if (Prism.languages.typescript) {
		scriptLang = 'typescript';
	} else {
		scriptLang = 'javascript';
		console.warn(
			'Prism TypeScript language not loaded, Astro scripts will be treated as JavaScript.',
		);
	}

	let script = Prism.util.clone(Prism.languages[scriptLang]);

	// eslint-disable-next-line regexp/no-useless-assertions
	let space = /(?:\s|\/\/.*(?!.)|\/\*(?:[^*]|\*(?!\/))\*\/)/.source;
	let braces = /(?:\{(?:\{(?:\{[^{}]*\}|[^{}])*\}|[^{}])*\})/.source;
	let spread = /(?:\{<S>*\.{3}(?:[^{}]|<BRACES>)*\})/.source;

	function re(source: string, flags?: string) {
		source = source
			.replace(/<S>/g, function () {
				return space;
			})
			.replace(/<BRACES>/g, function () {
				return braces;
			})
			.replace(/<SPREAD>/g, function () {
				return spread;
			});
		return RegExp(source, flags);
	}

	spread = re(spread).source;

	Prism.languages.astro = Prism.languages.extend('markup', script);

	(Prism.languages.astro as any).tag.pattern = re(
		/<\/?(?:[\w.:-]+(?:<S>+(?:[\w.:$-]+(?:=(?:"(?:\\[\s\S]|[^\\"])*"|'(?:\\[\s\S]|[^\\'])*'|[^\s{'"/>=]+|<BRACES>))?|<SPREAD>))*<S>*\/?)?>/
			.source,
	);

	(Prism.languages.astro as any).tag.inside['tag'].pattern = /^<\/?[^\s>/]*/;
	(Prism.languages.astro as any).tag.inside['attr-value'].pattern =
		/=(?!\{)(?:"(?:\\[\s\S]|[^\\"])*"|'(?:\\[\s\S]|[^\\'])*'|[^\s'">]+)/;
	(Prism.languages.astro as any).tag.inside['tag'].inside['class-name'] =
		/^[A-Z]\w*(?:\.[A-Z]\w*)*$/;
	(Prism.languages.astro as any).tag.inside['comment'] = script['comment'];

	Prism.languages.insertBefore(
		'inside',
		'attr-name',
		{
			spread: {
				pattern: re(/<SPREAD>/.source),
				inside: Prism.languages.astro,
			},
		},
		(Prism.languages.astro as any).tag,
	);

	Prism.languages.insertBefore(
		'inside',
		'special-attr',
		{
			script: {
				// Allow for two levels of nesting
				pattern: re(/=<BRACES>/.source),
				inside: {
					'script-punctuation': {
						pattern: /^=(?=\{)/,
						alias: 'punctuation',
					},
					rest: Prism.languages.astro,
				},
				alias: `language-${scriptLang}`,
			},
		},
		(Prism.languages.astro as any).tag,
	);

	// The following will handle plain text inside tags
	let stringifyToken = function (token: any) {
		if (!token) {
			return '';
		}
		if (typeof token === 'string') {
			return token;
		}
		if (typeof token.content === 'string') {
			return token.content;
		}
		return token.content.map(stringifyToken).join('');
	};

	let walkTokens = function (tokens: any) {
		let openedTags: any[] = [];
		for (let i = 0; i < tokens.length; i++) {
			let token = tokens[i];

			// This breaks styles, not sure why
			if (token.type === 'style') {
				return;
			}

			let notTagNorBrace = false;

			if (typeof token !== 'string') {
				if (token.type === 'tag' && token.content[0] && token.content[0].type === 'tag') {
					// We found a tag, now find its kind

					if (token.content[0].content[0].content === '</') {
						// Closing tag
						if (
							openedTags.length > 0 &&
							openedTags[openedTags.length - 1].tagName ===
								stringifyToken(token.content[0].content[1])
						) {
							// Pop matching opening tag
							openedTags.pop();
						}
					} else {
						if (token.content[token.content.length - 1].content === '/>') {
							// Autoclosed tag, ignore
						} else {
							// Opening tag
							openedTags.push({
								tagName: stringifyToken(token.content[0].content[1]),
								openedBraces: 0,
							});
						}
					}
				} else if (openedTags.length > 0 && token.type === 'punctuation' && token.content === '{') {
					// Here we might have entered a Astro context inside a tag
					openedTags[openedTags.length - 1].openedBraces++;
				} else if (
					openedTags.length > 0 &&
					openedTags[openedTags.length - 1].openedBraces > 0 &&
					token.type === 'punctuation' &&
					token.content === '}'
				) {
					// Here we might have left a Astro context inside a tag
					openedTags[openedTags.length - 1].openedBraces--;
				} else {
					notTagNorBrace = true;
				}
			}
			if (notTagNorBrace || typeof token === 'string') {
				if (openedTags.length > 0 && openedTags[openedTags.length - 1].openedBraces === 0) {
					// Here we are inside a tag, and not inside a Astro context.
					// That's plain text: drop any tokens matched.
					let plainText = stringifyToken(token);

					// And merge text with adjacent text
					if (
						i < tokens.length - 1 &&
						(typeof tokens[i + 1] === 'string' || tokens[i + 1].type === 'plain-text')
					) {
						plainText += stringifyToken(tokens[i + 1]);
						tokens.splice(i + 1, 1);
					}
					if (i > 0 && (typeof tokens[i - 1] === 'string' || tokens[i - 1].type === 'plain-text')) {
						plainText = stringifyToken(tokens[i - 1]) + plainText;
						tokens.splice(i - 1, 1);
						i--;
					}

					tokens[i] = new Prism.Token('plain-text', plainText, undefined, plainText);
				}
			}

			if (token.content && typeof token.content !== 'string') {
				walkTokens(token.content);
			}
		}
	};

	Prism.hooks.add('after-tokenize', function (env: any) {
		if (env.language !== 'astro') {
			return;
		}
		walkTokens(env.tokens);
	});
}