diff options
Diffstat (limited to 'packages/astro-parser/src/parse')
-rw-r--r-- | packages/astro-parser/src/parse/index.ts | 270 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/read/context.ts | 72 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/read/expression.ts | 251 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/read/script.ts | 60 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/read/style.ts | 40 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/state/fragment.ts | 21 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/state/mustache.ts | 413 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/state/setup.ts | 35 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/state/tag.ts | 579 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/state/text.ts | 24 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/utils/bracket.ts | 27 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/utils/entities.ts | 2034 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/utils/html.ts | 143 | ||||
-rw-r--r-- | packages/astro-parser/src/parse/utils/node.ts | 30 |
14 files changed, 3999 insertions, 0 deletions
diff --git a/packages/astro-parser/src/parse/index.ts b/packages/astro-parser/src/parse/index.ts new file mode 100644 index 000000000..124e125ef --- /dev/null +++ b/packages/astro-parser/src/parse/index.ts @@ -0,0 +1,270 @@ +// @ts-nocheck + +import { isIdentifierStart, isIdentifierChar } from 'acorn'; +import fragment from './state/fragment.js'; +import { whitespace } from '../utils/patterns.js'; +import { reserved } from '../utils/names.js'; +import full_char_code_at from '../utils/full_char_code_at.js'; +import { TemplateNode, Ast, ParserOptions, Fragment, Style, Script } from '../interfaces.js'; +import error from '../utils/error.js'; + +type ParserState = (parser: Parser) => ParserState | void; + +interface LastAutoClosedTag { + tag: string; + reason: string; + depth: number; +} + +export class Parser { + readonly template: string; + readonly filename?: string; + readonly customElement: boolean; + + index = 0; + stack: TemplateNode[] = []; + + html: Fragment; + css: Style[] = []; + js: Script[] = []; + meta_tags = {}; + last_auto_closed_tag?: LastAutoClosedTag; + + constructor(template: string, options: ParserOptions) { + if (typeof template !== 'string') { + throw new TypeError('Template must be a string'); + } + + this.template = template.replace(/\s+$/, ''); + this.filename = options.filename; + this.customElement = options.customElement; + + this.html = { + start: null, + end: null, + type: 'Fragment', + children: [], + }; + + this.stack.push(this.html); + + let state: ParserState = fragment; + + while (this.index < this.template.length) { + state = state(this) || fragment; + } + + if (this.stack.length > 1) { + const current = this.current(); + + const type = current.type === 'Element' ? `<${current.name}>` : 'Block'; + const slug = current.type === 'Element' ? 'element' : 'block'; + + this.error( + { + code: `unclosed-${slug}`, + message: `${type} was left open`, + }, + current.start + ); + } + + if (state !== fragment) { + this.error({ + code: 'unexpected-eof', + message: 'Unexpected end of input', + }); + } + + if (this.html.children.length) { + let start = this.html.children[0].start; + while (whitespace.test(template[start])) start += 1; + + let end = this.html.children[this.html.children.length - 1].end; + while (whitespace.test(template[end - 1])) end -= 1; + + this.html.start = start; + this.html.end = end; + } else { + this.html.start = this.html.end = null; + } + } + + current() { + return this.stack[this.stack.length - 1]; + } + + acorn_error(err: any) { + this.error( + { + code: 'parse-error', + message: err.message.replace(/ \(\d+:\d+\)$/, ''), + }, + err.pos + ); + } + + error({ code, message }: { code: string; message: string }, index = this.index) { + error(message, { + name: 'ParseError', + code, + source: this.template, + start: index, + filename: this.filename, + }); + } + + eat(str: string, required?: boolean, message?: string) { + if (this.match(str)) { + this.index += str.length; + return true; + } + + if (required) { + this.error({ + code: `unexpected-${this.index === this.template.length ? 'eof' : 'token'}`, + message: message || `Expected ${str}`, + }); + } + + return false; + } + + match(str: string) { + return this.template.slice(this.index, this.index + str.length) === str; + } + + match_regex(pattern: RegExp) { + const match = pattern.exec(this.template.slice(this.index)); + if (!match || match.index !== 0) return null; + + return match[0]; + } + + allow_whitespace() { + while (this.index < this.template.length && whitespace.test(this.template[this.index])) { + this.index++; + } + } + + read(pattern: RegExp) { + const result = this.match_regex(pattern); + if (result) this.index += result.length; + return result; + } + + read_identifier(allow_reserved = false) { + const start = this.index; + + let i = this.index; + + const code = full_char_code_at(this.template, i); + if (!isIdentifierStart(code, true)) return null; + + i += code <= 0xffff ? 1 : 2; + + while (i < this.template.length) { + const code = full_char_code_at(this.template, i); + + if (!isIdentifierChar(code, true)) break; + i += code <= 0xffff ? 1 : 2; + } + + const identifier = this.template.slice(this.index, (this.index = i)); + + if (!allow_reserved && reserved.has(identifier)) { + this.error( + { + code: 'unexpected-reserved-word', + message: `'${identifier}' is a reserved word in JavaScript and cannot be used here`, + }, + start + ); + } + + return identifier; + } + + read_until(pattern: RegExp) { + if (this.index >= this.template.length) { + this.error({ + code: 'unexpected-eof', + message: 'Unexpected end of input', + }); + } + + const start = this.index; + const match = pattern.exec(this.template.slice(start)); + + if (match) { + this.index = start + match.index; + return this.template.slice(start, this.index); + } + + this.index = this.template.length; + return this.template.slice(start); + } + + require_whitespace() { + if (!whitespace.test(this.template[this.index])) { + this.error({ + code: 'missing-whitespace', + message: 'Expected whitespace', + }); + } + + this.allow_whitespace(); + } +} + +/** + * Parse + * Step 1/3 in Astro SSR. + * This is the first pass over .astro files and the step at which we convert a string to an AST for us to crawl. + */ +export default function parse(template: string, options: ParserOptions = {}): Ast { + const parser = new Parser(template, options); + + // TODO we may want to allow multiple <style> tags — + // one scoped, one global. for now, only allow one + if (parser.css.length > 1) { + parser.error( + { + code: 'duplicate-style', + message: 'You can only have one <style> tag per Astro file', + }, + parser.css[1].start + ); + } + + // const instance_scripts = parser.js.filter((script) => script.context === 'default'); + // const module_scripts = parser.js.filter((script) => script.context === 'module'); + const astro_scripts = parser.js.filter((script) => script.context === 'setup'); + + if (astro_scripts.length > 1) { + parser.error( + { + code: 'invalid-script', + message: 'A component can only have one frontmatter (---) script', + }, + astro_scripts[1].start + ); + } + + // if (module_scripts.length > 1) { + // parser.error( + // { + // code: 'invalid-script', + // message: 'A component can only have one <script context="module"> element', + // }, + // module_scripts[1].start + // ); + // } + + return { + html: parser.html, + css: parser.css[0], + // instance: instance_scripts[0], + module: astro_scripts[0], + }; +} diff --git a/packages/astro-parser/src/parse/read/context.ts b/packages/astro-parser/src/parse/read/context.ts new file mode 100644 index 000000000..565c66d18 --- /dev/null +++ b/packages/astro-parser/src/parse/read/context.ts @@ -0,0 +1,72 @@ +// @ts-nocheck + +import { Parser } from '../index.js'; +import { isIdentifierStart } from 'acorn'; +import full_char_code_at from '../../utils/full_char_code_at.js'; +import { is_bracket_open, is_bracket_close, is_bracket_pair, get_bracket_close } from '../utils/bracket.js'; +import { parse_expression_at } from './expression.js'; +import { Pattern } from 'estree'; + +export default function read_context(parser: Parser): Pattern & { start: number; end: number } { + const start = parser.index; + let i = parser.index; + + const code = full_char_code_at(parser.template, i); + if (isIdentifierStart(code, true)) { + return { + type: 'Identifier', + name: parser.read_identifier(), + start, + end: parser.index, + }; + } + + if (!is_bracket_open(code)) { + parser.error({ + code: 'unexpected-token', + message: 'Expected identifier or destructure pattern', + }); + } + + const bracket_stack = [code]; + i += code <= 0xffff ? 1 : 2; + + while (i < parser.template.length) { + const code = full_char_code_at(parser.template, i); + if (is_bracket_open(code)) { + bracket_stack.push(code); + } else if (is_bracket_close(code)) { + if (!is_bracket_pair(bracket_stack[bracket_stack.length - 1], code)) { + parser.error({ + code: 'unexpected-token', + message: `Expected ${String.fromCharCode(get_bracket_close(bracket_stack[bracket_stack.length - 1]))}`, + }); + } + bracket_stack.pop(); + if (bracket_stack.length === 0) { + i += code <= 0xffff ? 1 : 2; + break; + } + } + i += code <= 0xffff ? 1 : 2; + } + + parser.index = i; + + const pattern_string = parser.template.slice(start, i); + try { + // the length of the `space_with_newline` has to be start - 1 + // because we added a `(` in front of the pattern_string, + // which shifted the entire string to right by 1 + // so we offset it by removing 1 character in the `space_with_newline` + // to achieve that, we remove the 1st space encountered, + // so it will not affect the `column` of the node + let space_with_newline = parser.template.slice(0, start).replace(/[^\n]/g, ' '); + const first_space = space_with_newline.indexOf(' '); + space_with_newline = space_with_newline.slice(0, first_space) + space_with_newline.slice(first_space + 1); + + return (parse_expression_at(`${space_with_newline}(${pattern_string} = 1)`, start - 1) as any).left; + } catch (error) { + parser.acorn_error(error); + } +} diff --git a/packages/astro-parser/src/parse/read/expression.ts b/packages/astro-parser/src/parse/read/expression.ts new file mode 100644 index 000000000..9d0d09175 --- /dev/null +++ b/packages/astro-parser/src/parse/read/expression.ts @@ -0,0 +1,251 @@ +import type { BaseNode, Expression } from '../../interfaces'; +import { Parser } from '../index.js'; +import parseAstro from '../index.js'; + +interface ParseState { + source: string; + start: number; + index: number; + curlyCount: number; + bracketCount: number; + root: Expression; +} + +function peek_char(state: ParseState) { + return state.source[state.index]; +} + +function peek_nonwhitespace(state: ParseState) { + let index = state.index; + do { + let char = state.source[index]; + if (!/\s/.test(char)) { + return char; + } + index++; + } while (index < state.source.length); +} + +function next_char(state: ParseState) { + return state.source[state.index++]; +} + +function in_bounds(state: ParseState) { + return state.index < state.source.length; +} + +function consume_string(state: ParseState, stringChar: string) { + let inEscape; + do { + const char = next_char(state); + + if (inEscape) { + inEscape = false; + } else if (char === '\\') { + inEscape = true; + } else if (char === stringChar) { + break; + } + } while (in_bounds(state)); +} + +function consume_multiline_comment(state: ParseState) { + do { + const char = next_char(state); + + if (char === '*' && peek_char(state) === '/') { + break; + } + } while (in_bounds(state)); +} + +function consume_line_comment(state: ParseState) { + do { + const char = next_char(state); + if (char === '\n') { + break; + } + } while (in_bounds(state)); +} + +const voidElements = new Set(['area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']); + +function consume_tag(state: ParseState) { + const start = state.index - 1; + let tagName = ''; + let inTag = false; + let inStart = true; + let selfClosed = false; + let inClose = false; + + let bracketIndex = 1; + do { + const char = next_char(state); + + switch (char) { + case "'": + case '"': { + consume_string(state, char); + break; + } + case '<': { + inTag = false; + tagName = ''; + + if (peek_nonwhitespace(state) === '/') { + inClose = true; + bracketIndex--; + } else { + inStart = true; + bracketIndex++; + } + break; + } + case '>': { + // An arrow function, probably + if (!inStart && !inClose) { + break; + } + + bracketIndex--; + + const addExpectedBrackets = + // Void elements don't need a closing + !voidElements.has(tagName.toLowerCase()) && + // Self-closing don't need a closing + !selfClosed && + // If we're in a start tag, we expect to find 2 more brackets + !inClose; + + if (addExpectedBrackets) { + bracketIndex += 2; + } + + inTag = false; + selfClosed = false; + inStart = false; + inClose = false; + break; + } + case ' ': { + inTag = true; + break; + } + case '/': { + if (inStart) { + selfClosed = true; + } + break; + } + default: { + if (!inTag) { + tagName += char; + } + break; + } + } + + // Unclosed tags + if (state.curlyCount <= 0) { + break; + } + + if (bracketIndex === 0) { + break; + } + } while (in_bounds(state)); + + const source = state.source.substring(start, state.index); + + const ast = parseAstro(source); + const fragment = ast.html; + + return fragment; +} + +function consume_expression(source: string, start: number): Expression { + const expr: Expression = { + type: 'Expression', + start, + end: Number.NaN, + codeChunks: [], + children: [], + }; + + let codeStart: number = start; + + const state: ParseState = { + source, + start, + index: start, + curlyCount: 1, + bracketCount: 0, + root: expr, + }; + + do { + const char = next_char(state); + + switch (char) { + case '{': { + state.curlyCount++; + break; + } + case '}': { + state.curlyCount--; + break; + } + case '<': { + const chunk = source.substring(codeStart, state.index - 1); + expr.codeChunks.push(chunk); + const tag = consume_tag(state); + expr.children.push(tag); + codeStart = state.index; + break; + } + case "'": + case '"': + case '`': { + consume_string(state, char); + break; + } + case '/': { + switch (peek_char(state)) { + case '/': { + consume_line_comment(state); + break; + } + case '*': { + consume_multiline_comment(state); + break; + } + } + } + } + } while (in_bounds(state) && state.curlyCount > 0); + + expr.end = state.index - 1; + + if (expr.children.length || !expr.codeChunks.length) { + expr.codeChunks.push(source.substring(codeStart, expr.end)); + } + + return expr; +} + +export const parse_expression_at = (source: string, index: number): Expression => { + const expression = consume_expression(source, index); + + return expression; +}; + +// @ts-ignore +export default function read_expression(parser: Parser) { + try { + const expression = parse_expression_at(parser.template, parser.index); + parser.index = expression.end; + return expression; + } catch (err) { + parser.acorn_error(err); + } +} diff --git a/packages/astro-parser/src/parse/read/script.ts b/packages/astro-parser/src/parse/read/script.ts new file mode 100644 index 000000000..9b8d71110 --- /dev/null +++ b/packages/astro-parser/src/parse/read/script.ts @@ -0,0 +1,60 @@ +// @ts-nocheck + +import type { Node } from 'estree'; +import { Parser } from '../index.js'; +import { Script } from '../../interfaces.js'; + +const script_closing_tag = '</script>'; + +function get_context(parser: Parser, attributes: any[], start: number): 'runtime' | 'setup' { + const context = attributes.find((attribute) => attribute.name === 'astro'); + if (!context) return 'runtime'; + if (context.value === true) return 'setup'; + + if (context.value.length !== 1 || context.value[0].type !== 'Text') { + parser.error( + { + code: 'invalid-script', + message: 'astro attribute must be static', + }, + start + ); + } + + const value = context.value[0].data; + + if (value !== 'setup') { + parser.error( + { + code: 'invalid-script', + message: 'If the "astro" attribute has a value, its value must be "setup"', + }, + context.start + ); + } + + return value; +} + +export default function read_script(parser: Parser, start: number, attributes: Node[]): Script { + const script_start = parser.index; + const script_end = parser.template.indexOf(script_closing_tag, script_start); + + if (script_end === -1) { + parser.error({ + code: 'unclosed-script', + message: '<script> must have a closing tag', + }); + } + + const source = parser.template.slice(0, script_start).replace(/[^\n]/g, ' ') + parser.template.slice(script_start, script_end); + parser.index = script_end + script_closing_tag.length; + + return { + type: 'Script', + start, + end: parser.index, + context: get_context(parser, attributes, start), + content: source, + }; +} diff --git a/packages/astro-parser/src/parse/read/style.ts b/packages/astro-parser/src/parse/read/style.ts new file mode 100644 index 000000000..f23d7b10e --- /dev/null +++ b/packages/astro-parser/src/parse/read/style.ts @@ -0,0 +1,40 @@ +import { Parser } from '../index.js'; +import { Style } from '../../interfaces.js'; + +interface Attribute { + start: number; + end: number; + type: 'Attribute'; + name: string; + value: { + raw: string; + data: string; + }[]; +} + +export default function read_style(parser: Parser, start: number, attributes: Attribute[]): Style { + const content_start = parser.index; + const styles = parser.read_until(/<\/style>/); + const content_end = parser.index; + parser.eat('</style>', true); + const end = parser.index; + + return { + type: 'Style', + start, + end, + attributes, + content: { + start: content_start, + end: content_end, + styles, + }, + }; +} + +function is_ref_selector(a: any, b: any) { + // TODO add CSS node types + if (!b) return false; + + return a.type === 'TypeSelector' && a.name === 'ref' && b.type === 'PseudoClassSelector'; +} diff --git a/packages/astro-parser/src/parse/state/fragment.ts b/packages/astro-parser/src/parse/state/fragment.ts new file mode 100644 index 000000000..97398b227 --- /dev/null +++ b/packages/astro-parser/src/parse/state/fragment.ts @@ -0,0 +1,21 @@ +import tag from './tag.js'; +import setup from './setup.js'; +import mustache from './mustache.js'; +import text from './text.js'; +import { Parser } from '../index.js'; + +export default function fragment(parser: Parser) { + if (parser.html.children.length === 0 && parser.match_regex(/^---/m)) { + return setup; + } + + if (parser.match('<')) { + return tag; + } + + if (parser.match('{')) { + return mustache; + } + + return text; +} diff --git a/packages/astro-parser/src/parse/state/mustache.ts b/packages/astro-parser/src/parse/state/mustache.ts new file mode 100644 index 000000000..79372d8d9 --- /dev/null +++ b/packages/astro-parser/src/parse/state/mustache.ts @@ -0,0 +1,413 @@ +import read_context from '../read/context.js'; +import read_expression from '../read/expression.js'; +import { closing_tag_omitted } from '../utils/html.js'; +import { whitespace } from '../../utils/patterns.js'; +import { trim_start, trim_end } from '../../utils/trim.js'; +import { to_string } from '../utils/node.js'; +import { Parser } from '../index.js'; +import { TemplateNode } from '../../interfaces.js'; + +type TODO = any; + +function trim_whitespace(block: TemplateNode, trim_before: boolean, trim_after: boolean) { + if (!block.children || block.children.length === 0) return; // AwaitBlock + + const first_child = block.children[0]; + const last_child = block.children[block.children.length - 1]; + + if (first_child.type === 'Text' && trim_before) { + first_child.data = trim_start(first_child.data); + if (!first_child.data) block.children.shift(); + } + + if (last_child.type === 'Text' && trim_after) { + last_child.data = trim_end(last_child.data); + if (!last_child.data) block.children.pop(); + } + + if (block.else) { + trim_whitespace(block.else, trim_before, trim_after); + } + + if (first_child.elseif) { + trim_whitespace(first_child, trim_before, trim_after); + } +} + +export default function mustache(parser: Parser) { + const start = parser.index; + parser.index += 1; + + parser.allow_whitespace(); + + // {/if}, {/each}, {/await} or {/key} + if (parser.eat('/')) { + let block = parser.current(); + let expected: TODO; + + if (closing_tag_omitted(block.name)) { + block.end = start; + parser.stack.pop(); + block = parser.current(); + } + + if (block.type === 'ElseBlock' || block.type === 'PendingBlock' || block.type === 'ThenBlock' || block.type === 'CatchBlock') { + block.end = start; + parser.stack.pop(); + block = parser.current(); + + expected = 'await'; + } + + if (block.type === 'IfBlock') { + expected = 'if'; + } else if (block.type === 'EachBlock') { + expected = 'each'; + } else if (block.type === 'AwaitBlock') { + expected = 'await'; + } else if (block.type === 'KeyBlock') { + expected = 'key'; + } else { + parser.error({ + code: 'unexpected-block-close', + message: 'Unexpected block closing tag', + }); + } + + parser.eat(expected, true); + parser.allow_whitespace(); + parser.eat('}', true); + + while (block.elseif) { + block.end = parser.index; + parser.stack.pop(); + block = parser.current(); + + if (block.else) { + block.else.end = start; + } + } + + // strip leading/trailing whitespace as necessary + const char_before = parser.template[block.start - 1]; + const char_after = parser.template[parser.index]; + const trim_before = !char_before || whitespace.test(char_before); + const trim_after = !char_after || whitespace.test(char_after); + + trim_whitespace(block, trim_before, trim_after); + + block.end = parser.index; + parser.stack.pop(); + } else if (parser.eat(':else')) { + if (parser.eat('if')) { + parser.error({ + code: 'invalid-elseif', + message: "'elseif' should be 'else if'", + }); + } + + parser.allow_whitespace(); + + // :else if + if (parser.eat('if')) { + const block = parser.current(); + if (block.type !== 'IfBlock') { + parser.error({ + code: 'invalid-elseif-placement', + message: parser.stack.some((block) => block.type === 'IfBlock') + ? `Expected to close ${to_string(block)} before seeing {:else if ...} block` + : 'Cannot have an {:else if ...} block outside an {#if ...} block', + }); + } + + parser.require_whitespace(); + + const expression = read_expression(parser); + + parser.allow_whitespace(); + parser.eat('}', true); + + block.else = { + start: parser.index, + end: null, + type: 'ElseBlock', + children: [ + { + start: parser.index, + end: null, + type: 'IfBlock', + elseif: true, + expression, + children: [], + }, + ], + }; + + parser.stack.push(block.else.children[0]); + } else { + // :else + const block = parser.current(); + if (block.type !== 'IfBlock' && block.type !== 'EachBlock') { + parser.error({ + code: 'invalid-else-placement', + message: parser.stack.some((block) => block.type === 'IfBlock' || block.type === 'EachBlock') + ? `Expected to close ${to_string(block)} before seeing {:else} block` + : 'Cannot have an {:else} block outside an {#if ...} or {#each ...} block', + }); + } + + parser.allow_whitespace(); + parser.eat('}', true); + + block.else = { + start: parser.index, + end: null, + type: 'ElseBlock', + children: [], + }; + + parser.stack.push(block.else); + } + } else if (parser.match(':then') || parser.match(':catch')) { + const block = parser.current(); + const is_then = parser.eat(':then') || !parser.eat(':catch'); + + if (is_then) { + if (block.type !== 'PendingBlock') { + parser.error({ + code: 'invalid-then-placement', + message: parser.stack.some((block) => block.type === 'PendingBlock') + ? `Expected to close ${to_string(block)} before seeing {:then} block` + : 'Cannot have an {:then} block outside an {#await ...} block', + }); + } + } else { + if (block.type !== 'ThenBlock' && block.type !== 'PendingBlock') { + parser.error({ + code: 'invalid-catch-placement', + message: parser.stack.some((block) => block.type === 'ThenBlock' || block.type === 'PendingBlock') + ? `Expected to close ${to_string(block)} before seeing {:catch} block` + : 'Cannot have an {:catch} block outside an {#await ...} block', + }); + } + } + + block.end = start; + parser.stack.pop(); + const await_block = parser.current(); + + if (!parser.eat('}')) { + parser.require_whitespace(); + await_block[is_then ? 'value' : 'error'] = read_context(parser); + parser.allow_whitespace(); + parser.eat('}', true); + } + + const new_block: TemplateNode = { + start, + // @ts-ignore + end: null, + type: is_then ? 'ThenBlock' : 'CatchBlock', + children: [], + skip: false, + }; + + await_block[is_then ? 'then' : 'catch'] = new_block; + parser.stack.push(new_block); + } else if (parser.eat('#')) { + // {#if foo}, {#each foo} or {#await foo} + let type; + + if (parser.eat('if')) { + type = 'IfBlock'; + } else if (parser.eat('each')) { + type = 'EachBlock'; + } else if (parser.eat('await')) { + type = 'AwaitBlock'; + } else if (parser.eat('key')) { + type = 'KeyBlock'; + } else { + parser.error({ + code: 'expected-block-type', + message: 'Expected if, each, await or key', + }); + } + + parser.require_whitespace(); + + const expression = read_expression(parser); + + // @ts-ignore + const block: TemplateNode = + type === 'AwaitBlock' + ? { + start, + end: null, + type, + expression, + value: null, + error: null, + pending: { + start: null, + end: null, + type: 'PendingBlock', + children: [], + skip: true, + }, + then: { + start: null, + end: null, + type: 'ThenBlock', + children: [], + skip: true, + }, + catch: { + start: null, + end: null, + type: 'CatchBlock', + children: [], + skip: true, + }, + } + : { + start, + end: null, + type, + expression, + children: [], + }; + + parser.allow_whitespace(); + + // {#each} blocks must declare a context – {#each list as item} + if (type === 'EachBlock') { + parser.eat('as', true); + parser.require_whitespace(); + + block.context = read_context(parser); + + parser.allow_whitespace(); + + if (parser.eat(',')) { + parser.allow_whitespace(); + block.index = parser.read_identifier(); + if (!block.index) { + parser.error({ + code: 'expected-name', + message: 'Expected name', + }); + } + + parser.allow_whitespace(); + } + + if (parser.eat('(')) { + parser.allow_whitespace(); + + block.key = read_expression(parser); + parser.allow_whitespace(); + parser.eat(')', true); + parser.allow_whitespace(); + } + } + + const await_block_shorthand = type === 'AwaitBlock' && parser.eat('then'); + if (await_block_shorthand) { + parser.require_whitespace(); + block.value = read_context(parser); + parser.allow_whitespace(); + } + + const await_block_catch_shorthand = !await_block_shorthand && type === 'AwaitBlock' && parser.eat('catch'); + if (await_block_catch_shorthand) { + parser.require_whitespace(); + block.error = read_context(parser); + parser.allow_whitespace(); + } + + parser.eat('}', true); + + // @ts-ignore + parser.current().children.push(block); + parser.stack.push(block); + + if (type === 'AwaitBlock') { + let child_block; + if (await_block_shorthand) { + block.then.skip = false; + child_block = block.then; + } else if (await_block_catch_shorthand) { + block.catch.skip = false; + child_block = block.catch; + } else { + block.pending.skip = false; + child_block = block.pending; + } + + child_block.start = parser.index; + parser.stack.push(child_block); + } + } else if (parser.eat('@html')) { + // {@html content} tag + parser.require_whitespace(); + + const expression = read_expression(parser); + + parser.allow_whitespace(); + parser.eat('}', true); + + // @ts-ignore + parser.current().children.push({ + start, + end: parser.index, + type: 'RawMustacheTag', + expression, + }); + } else if (parser.eat('@debug')) { + // let identifiers; + + // // Implies {@debug} which indicates "debug all" + // if (parser.read(/\s*}/)) { + // identifiers = []; + // } else { + // const expression = read_expression(parser); + + // identifiers = expression.type === 'SequenceExpression' + // ? expression.expressions + // : [expression]; + + // identifiers.forEach(node => { + // if (node.type !== 'Identifier') { + // parser.error({ + // code: 'invalid-debug-args', + // message: '{@debug ...} arguments must be identifiers, not arbitrary expressions' + // }, node.start); + // } + // }); + + // parser.allow_whitespace(); + // parser.eat('}', true); + // } + + // parser.current().children.push({ + // start, + // end: parser.index, + // type: 'DebugTag', + // identifiers + // }); + throw new Error('@debug not yet supported'); + } else { + const expression = read_expression(parser); + + parser.allow_whitespace(); + parser.eat('}', true); + + // @ts-ignore + parser.current().children.push({ + start, + end: parser.index, + type: 'MustacheTag', + expression, + }); + } +} diff --git a/packages/astro-parser/src/parse/state/setup.ts b/packages/astro-parser/src/parse/state/setup.ts new file mode 100644 index 000000000..f64d8c52b --- /dev/null +++ b/packages/astro-parser/src/parse/state/setup.ts @@ -0,0 +1,35 @@ +// @ts-nocheck + +import { Parser } from '../index.js'; + +export default function setup(parser: Parser): void { + // TODO: Error if not at top of file? currently, we ignore / just treat as text. + // if (parser.html.children.length > 0) { + // parser.error({ + // code: 'unexpected-token', + // message: 'Frontmatter scripts only supported at the top of file.', + // }); + // } + + const start = parser.index; + parser.index += 3; + const content_start = parser.index; + const setupScriptContent = parser.read_until(/^---/m); + const content_end = parser.index; + parser.eat('---', true); + const end = parser.index; + parser.js.push({ + type: 'Script', + context: 'setup', + start, + end, + content: setupScriptContent, + // attributes, + // content: { + // start: content_start, + // end: content_end, + // styles, + // }, + }); + return; +} diff --git a/packages/astro-parser/src/parse/state/tag.ts b/packages/astro-parser/src/parse/state/tag.ts new file mode 100644 index 000000000..a8b919a49 --- /dev/null +++ b/packages/astro-parser/src/parse/state/tag.ts @@ -0,0 +1,579 @@ +// @ts-nocheck + +import read_expression from '../read/expression.js'; +import read_script from '../read/script.js'; +import read_style from '../read/style.js'; +import { decode_character_references, closing_tag_omitted } from '../utils/html.js'; +import { is_void } from '../../utils/names.js'; +import { Parser } from '../index.js'; +import { Directive, DirectiveType, TemplateNode, Text } from '../../interfaces.js'; +import fuzzymatch from '../../utils/fuzzymatch.js'; +import list from '../../utils/list.js'; + +// eslint-disable-next-line no-useless-escape +const valid_tag_name = /^\!?[a-zA-Z]{1,}:?[a-zA-Z0-9\-]*/; + +const meta_tags = new Map([ + ['astro:head', 'Head'], + // ['slot:body', 'Body'], + // ['astro:options', 'Options'], + // ['astro:window', 'Window'], + // ['astro:body', 'Body'], +]); + +const valid_meta_tags = Array.from(meta_tags.keys()); //.concat('astro:self', 'astro:component', 'astro:fragment'); + +const specials = new Map([ + // Now handled as "setup" in setup.ts + // [ + // 'script', + // { + // read: read_script, + // property: 'js', + // }, + // ], + [ + 'style', + { + read: read_style, + property: 'css', + }, + ], +]); + +const SELF = /^astro:self(?=[\s/>])/; +const COMPONENT = /^astro:component(?=[\s/>])/; +const SLOT = /^astro:fragment(?=[\s/>])/; +const HEAD = /^head(?=[\s/>])/; + +function parent_is_head(stack) { + let i = stack.length; + while (i--) { + const { type } = stack[i]; + if (type === 'Head') return true; + if (type === 'Element' || type === 'InlineComponent') return false; + } + return false; +} + +export default function tag(parser: Parser) { + const start = parser.index++; + + let parent = parser.current(); + + if (parser.eat('!--')) { + const data = parser.read_until(/-->/); + parser.eat('-->', true, 'comment was left open, expected -->'); + + parser.current().children.push({ + start, + end: parser.index, + type: 'Comment', + data, + }); + + return; + } + + const is_closing_tag = parser.eat('/'); + + const name = read_tag_name(parser); + + if (meta_tags.has(name)) { + const slug = meta_tags.get(name).toLowerCase(); + if (is_closing_tag) { + if ((name === 'astro:window' || name === 'astro:body') && parser.current().children.length) { + parser.error( + { + code: `invalid-${slug}-content`, + message: `<${name}> cannot have children`, + }, + parser.current().children[0].start + ); + } + } else { + if (name in parser.meta_tags) { + parser.error( + { + code: `duplicate-${slug}`, + message: `A component can only have one <${name}> tag`, + }, + start + ); + } + + if (parser.stack.length > 1) { + parser.error( + { + code: `invalid-${slug}-placement`, + message: `<${name}> tags cannot be inside elements or blocks`, + }, + start + ); + } + + parser.meta_tags[name] = true; + } + } + + const type = meta_tags.has(name) + ? meta_tags.get(name) + : /[A-Z]/.test(name[0]) || name === 'astro:self' || name === 'astro:component' + ? 'InlineComponent' + : name === 'astro:fragment' + ? 'SlotTemplate' + : name === 'title' && parent_is_head(parser.stack) + ? 'Title' + : name === 'slot' && !parser.customElement + ? 'Slot' + : 'Element'; + + const element: TemplateNode = { + start, + end: null, // filled in later + type, + name, + attributes: [], + children: [], + }; + + parser.allow_whitespace(); + + if (is_closing_tag) { + if (is_void(name)) { + parser.error( + { + code: 'invalid-void-content', + message: `<${name}> is a void element and cannot have children, or a closing tag`, + }, + start + ); + } + + parser.eat('>', true); + + // close any elements that don't have their own closing tags, e.g. <div><p></div> + while (parent.name !== name) { + if (parent.type !== 'Element') { + const message = + parser.last_auto_closed_tag && parser.last_auto_closed_tag.tag === name + ? `</${name}> attempted to close <${name}> that was already automatically closed by <${parser.last_auto_closed_tag.reason}>` + : `</${name}> attempted to close an element that was not open`; + parser.error( + { + code: 'invalid-closing-tag', + message, + }, + start + ); + } + + parent.end = start; + parser.stack.pop(); + + parent = parser.current(); + } + + parent.end = parser.index; + parser.stack.pop(); + + if (parser.last_auto_closed_tag && parser.stack.length < parser.last_auto_closed_tag.depth) { + parser.last_auto_closed_tag = null; + } + + return; + } else if (closing_tag_omitted(parent.name, name)) { + parent.end = start; + parser.stack.pop(); + parser.last_auto_closed_tag = { + tag: parent.name, + reason: name, + depth: parser.stack.length, + }; + } + + const unique_names: Set<string> = new Set(); + + let attribute; + while ((attribute = read_attribute(parser, unique_names))) { + element.attributes.push(attribute); + parser.allow_whitespace(); + } + + if (name === 'astro:component') { + const index = element.attributes.findIndex((attr) => attr.type === 'Attribute' && attr.name === 'this'); + if (!~index) { + parser.error( + { + code: 'missing-component-definition', + message: "<astro:component> must have a 'this' attribute", + }, + start + ); + } + + const definition = element.attributes.splice(index, 1)[0]; + if (definition.value === true || definition.value.length !== 1 || definition.value[0].type === 'Text') { + parser.error( + { + code: 'invalid-component-definition', + message: 'invalid component definition', + }, + definition.start + ); + } + + element.expression = definition.value[0].expression; + } + + // special cases – top-level <script> and <style> + if (specials.has(name) && parser.stack.length === 1) { + const special = specials.get(name); + + parser.eat('>', true); + const content = special.read(parser, start, element.attributes); + if (content) parser[special.property].push(content); + return; + } + + parser.current().children.push(element); + + const self_closing = parser.eat('/') || is_void(name); + + parser.eat('>', true); + + if (self_closing) { + // don't push self-closing elements onto the stack + element.end = parser.index; + } else if (name === 'textarea') { + // special case + element.children = read_sequence(parser, () => parser.template.slice(parser.index, parser.index + 11) === '</textarea>'); + parser.read(/<\/textarea>/); + element.end = parser.index; + } else if (name === 'script' || name === 'style') { + // special case + const start = parser.index; + const data = parser.read_until(new RegExp(`</${name}>`)); + const end = parser.index; + element.children.push({ start, end, type: 'Text', data }); + parser.eat(`</${name}>`, true); + element.end = parser.index; + } else { + parser.stack.push(element); + } +} + +function read_tag_name(parser: Parser) { + const start = parser.index; + + if (parser.read(SELF)) { + // check we're inside a block, otherwise this + // will cause infinite recursion + let i = parser.stack.length; + let legal = false; + + while (i--) { + const fragment = parser.stack[i]; + if (fragment.type === 'IfBlock' || fragment.type === 'EachBlock' || fragment.type === 'InlineComponent') { + legal = true; + break; + } + } + + if (!legal) { + parser.error( + { + code: 'invalid-self-placement', + message: '<astro:self> components can only exist inside {#if} blocks, {#each} blocks, or slots passed to components', + }, + start + ); + } + + return 'astro:self'; + } + + if (parser.read(COMPONENT)) return 'astro:component'; + + if (parser.read(SLOT)) return 'astro:fragment'; + + if (parser.read(HEAD)) return 'head'; + + const name = parser.read_until(/(\s|\/|>)/); + + if (meta_tags.has(name)) return name; + + if (name.startsWith('astro:')) { + const match = fuzzymatch(name.slice(7), valid_meta_tags); + + let message = `Valid <astro:...> tag names are ${list(valid_meta_tags)}`; + if (match) message += ` (did you mean '${match}'?)`; + + parser.error( + { + code: 'invalid-tag-name', + message, + }, + start + ); + } + + if (!valid_tag_name.test(name)) { + parser.error( + { + code: 'invalid-tag-name', + message: 'Expected valid tag name', + }, + start + ); + } + + return name; +} + +function read_attribute(parser: Parser, unique_names: Set<string>) { + const start = parser.index; + + function check_unique(name: string) { + if (unique_names.has(name)) { + parser.error( + { + code: 'duplicate-attribute', + message: 'Attributes need to be unique', + }, + start + ); + } + unique_names.add(name); + } + + if (parser.eat('{')) { + parser.allow_whitespace(); + + if (parser.eat('...')) { + const { expression } = read_expression(parser); + + parser.allow_whitespace(); + parser.eat('}', true); + + return { + start, + end: parser.index, + type: 'Spread', + expression, + }; + } else { + const value_start = parser.index; + + const name = parser.read_identifier(); + parser.allow_whitespace(); + parser.eat('}', true); + + check_unique(name); + + return { + start, + end: parser.index, + type: 'Attribute', + name, + value: [ + { + start: value_start, + end: value_start + name.length, + type: 'AttributeShorthand', + expression: { + start: value_start, + end: value_start + name.length, + type: 'Identifier', + name, + }, + }, + ], + }; + } + } + + // eslint-disable-next-line no-useless-escape + const name = parser.read_until(/[\s=\/>"']/); + if (!name) return null; + + let end = parser.index; + + parser.allow_whitespace(); + + const colon_index = name.indexOf(':'); + const type = colon_index !== -1 && get_directive_type(name.slice(0, colon_index)); + + let value: any[] | true = true; + if (parser.eat('=')) { + parser.allow_whitespace(); + value = read_attribute_value(parser); + end = parser.index; + } else if (parser.match_regex(/["']/)) { + parser.error( + { + code: 'unexpected-token', + message: 'Expected =', + }, + parser.index + ); + } + + if (type) { + const [directive_name, ...modifiers] = name.slice(colon_index + 1).split('|'); + + if (type === 'Binding' && directive_name !== 'this') { + check_unique(directive_name); + } else if (type !== 'EventHandler' && type !== 'Action') { + check_unique(name); + } + + if (type === 'Ref') { + parser.error( + { + code: 'invalid-ref-directive', + message: `The ref directive is no longer supported — use \`bind:this={${directive_name}}\` instead`, + }, + start + ); + } + + if (type === 'Class' && directive_name === '') { + parser.error( + { + code: 'invalid-class-directive', + message: 'Class binding name cannot be empty', + }, + start + colon_index + 1 + ); + } + + if (value[0]) { + if ((value as any[]).length > 1 || value[0].type === 'Text') { + parser.error( + { + code: 'invalid-directive-value', + message: 'Directive value must be a JavaScript expression enclosed in curly braces', + }, + value[0].start + ); + } + } + + const directive: Directive = { + start, + end, + type, + name: directive_name, + modifiers, + expression: (value[0] && value[0].expression) || null, + }; + + if (type === 'Transition') { + const direction = name.slice(0, colon_index); + directive.intro = direction === 'in' || direction === 'transition'; + directive.outro = direction === 'out' || direction === 'transition'; + } + + if (!directive.expression && (type === 'Binding' || type === 'Class')) { + directive.expression = { + start: directive.start + colon_index + 1, + end: directive.end, + type: 'Identifier', + name: directive.name, + } as any; + } + + return directive; + } + + check_unique(name); + + return { + start, + end, + type: 'Attribute', + name, + value, + }; +} + +function get_directive_type(name: string): DirectiveType { + if (name === 'use') return 'Action'; + if (name === 'animate') return 'Animation'; + if (name === 'bind') return 'Binding'; + if (name === 'class') return 'Class'; + if (name === 'on') return 'EventHandler'; + if (name === 'let') return 'Let'; + if (name === 'ref') return 'Ref'; + if (name === 'in' || name === 'out' || name === 'transition') return 'Transition'; +} + +function read_attribute_value(parser: Parser) { + const quote_mark = parser.eat("'") ? "'" : parser.eat('"') ? '"' : null; + + const regex = quote_mark === "'" ? /'/ : quote_mark === '"' ? /"/ : /(\/>|[\s"'=<>`])/; + + const value = read_sequence(parser, () => !!parser.match_regex(regex)); + + if (quote_mark) parser.index += 1; + return value; +} + +function read_sequence(parser: Parser, done: () => boolean): TemplateNode[] { + let current_chunk: Text = { + start: parser.index, + end: null, + type: 'Text', + raw: '', + data: null, + }; + + function flush() { + if (current_chunk.raw) { + current_chunk.data = decode_character_references(current_chunk.raw); + current_chunk.end = parser.index; + chunks.push(current_chunk); + } + } + + const chunks: TemplateNode[] = []; + + while (parser.index < parser.template.length) { + const index = parser.index; + + if (done()) { + flush(); + return chunks; + } else if (parser.eat('{')) { + flush(); + + parser.allow_whitespace(); + const expression = read_expression(parser); + parser.allow_whitespace(); + parser.eat('}', true); + + chunks.push({ + start: index, + end: parser.index, + type: 'MustacheTag', + expression, + }); + + current_chunk = { + start: parser.index, + end: null, + type: 'Text', + raw: '', + data: null, + }; + } else { + current_chunk.raw += parser.template[parser.index++]; + } + } + + parser.error({ + code: 'unexpected-eof', + message: 'Unexpected end of input', + }); +} diff --git a/packages/astro-parser/src/parse/state/text.ts b/packages/astro-parser/src/parse/state/text.ts new file mode 100644 index 000000000..cca83f2d4 --- /dev/null +++ b/packages/astro-parser/src/parse/state/text.ts @@ -0,0 +1,24 @@ +// @ts-nocheck + +import { decode_character_references } from '../utils/html.js'; +import { Parser } from '../index.js'; + +export default function text(parser: Parser) { + const start = parser.index; + + let data = ''; + + while (parser.index < parser.template.length && !parser.match('---') && !parser.match('<') && !parser.match('{')) { + data += parser.template[parser.index++]; + } + + const node = { + start, + end: parser.index, + type: 'Text', + raw: data, + data: decode_character_references(data), + }; + + parser.current().children.push(node); +} diff --git a/packages/astro-parser/src/parse/utils/bracket.ts b/packages/astro-parser/src/parse/utils/bracket.ts new file mode 100644 index 000000000..7e885ad78 --- /dev/null +++ b/packages/astro-parser/src/parse/utils/bracket.ts @@ -0,0 +1,27 @@ +// @ts-nocheck + +const SQUARE_BRACKET_OPEN = '['.charCodeAt(0); +const SQUARE_BRACKET_CLOSE = ']'.charCodeAt(0); +const CURLY_BRACKET_OPEN = '{'.charCodeAt(0); +const CURLY_BRACKET_CLOSE = '}'.charCodeAt(0); + +export function is_bracket_open(code) { + return code === SQUARE_BRACKET_OPEN || code === CURLY_BRACKET_OPEN; +} + +export function is_bracket_close(code) { + return code === SQUARE_BRACKET_CLOSE || code === CURLY_BRACKET_CLOSE; +} + +export function is_bracket_pair(open, close) { + return (open === SQUARE_BRACKET_OPEN && close === SQUARE_BRACKET_CLOSE) || (open === CURLY_BRACKET_OPEN && close === CURLY_BRACKET_CLOSE); +} + +export function get_bracket_close(open) { + if (open === SQUARE_BRACKET_OPEN) { + return SQUARE_BRACKET_CLOSE; + } + if (open === CURLY_BRACKET_OPEN) { + return CURLY_BRACKET_CLOSE; + } +} diff --git a/packages/astro-parser/src/parse/utils/entities.ts b/packages/astro-parser/src/parse/utils/entities.ts new file mode 100644 index 000000000..e554664eb --- /dev/null +++ b/packages/astro-parser/src/parse/utils/entities.ts @@ -0,0 +1,2034 @@ +// https://dev.w3.org/html5/html-author/charref +export default { + CounterClockwiseContourIntegral: 8755, + ClockwiseContourIntegral: 8754, + DoubleLongLeftRightArrow: 10234, + DiacriticalDoubleAcute: 733, + NotSquareSupersetEqual: 8931, + CloseCurlyDoubleQuote: 8221, + DoubleContourIntegral: 8751, + FilledVerySmallSquare: 9642, + NegativeVeryThinSpace: 8203, + NotPrecedesSlantEqual: 8928, + NotRightTriangleEqual: 8941, + NotSucceedsSlantEqual: 8929, + CapitalDifferentialD: 8517, + DoubleLeftRightArrow: 8660, + DoubleLongRightArrow: 10233, + EmptyVerySmallSquare: 9643, + NestedGreaterGreater: 8811, + NotDoubleVerticalBar: 8742, + NotLeftTriangleEqual: 8940, + NotSquareSubsetEqual: 8930, + OpenCurlyDoubleQuote: 8220, + ReverseUpEquilibrium: 10607, + DoubleLongLeftArrow: 10232, + DownLeftRightVector: 10576, + LeftArrowRightArrow: 8646, + NegativeMediumSpace: 8203, + RightArrowLeftArrow: 8644, + SquareSupersetEqual: 8850, + leftrightsquigarrow: 8621, + DownRightTeeVector: 10591, + DownRightVectorBar: 10583, + LongLeftRightArrow: 10231, + Longleftrightarrow: 10234, + NegativeThickSpace: 8203, + PrecedesSlantEqual: 8828, + ReverseEquilibrium: 8651, + RightDoubleBracket: 10215, + RightDownTeeVector: 10589, + RightDownVectorBar: 10581, + RightTriangleEqual: 8885, + SquareIntersection: 8851, + SucceedsSlantEqual: 8829, + blacktriangleright: 9656, + longleftrightarrow: 10231, + DoubleUpDownArrow: 8661, + DoubleVerticalBar: 8741, + DownLeftTeeVector: 10590, + DownLeftVectorBar: 10582, + FilledSmallSquare: 9724, + GreaterSlantEqual: 10878, + LeftDoubleBracket: 10214, + LeftDownTeeVector: 10593, + LeftDownVectorBar: 10585, + LeftTriangleEqual: 8884, + NegativeThinSpace: 8203, + NotReverseElement: 8716, + NotTildeFullEqual: 8775, + RightAngleBracket: 10217, + RightUpDownVector: 10575, + SquareSubsetEqual: 8849, + VerticalSeparator: 10072, + blacktriangledown: 9662, + blacktriangleleft: 9666, + leftrightharpoons: 8651, + rightleftharpoons: 8652, + twoheadrightarrow: 8608, + DiacriticalAcute: 180, + DiacriticalGrave: 96, + DiacriticalTilde: 732, + DoubleRightArrow: 8658, + DownArrowUpArrow: 8693, + EmptySmallSquare: 9723, + GreaterEqualLess: 8923, + GreaterFullEqual: 8807, + LeftAngleBracket: 10216, + LeftUpDownVector: 10577, + LessEqualGreater: 8922, + NonBreakingSpace: 160, + NotRightTriangle: 8939, + NotSupersetEqual: 8841, + RightTriangleBar: 10704, + RightUpTeeVector: 10588, + RightUpVectorBar: 10580, + UnderParenthesis: 9181, + UpArrowDownArrow: 8645, + circlearrowright: 8635, + downharpoonright: 8642, + ntrianglerighteq: 8941, + rightharpoondown: 8641, + rightrightarrows: 8649, + twoheadleftarrow: 8606, + vartriangleright: 8883, + CloseCurlyQuote: 8217, + ContourIntegral: 8750, + DoubleDownArrow: 8659, + DoubleLeftArrow: 8656, + DownRightVector: 8641, + LeftRightVector: 10574, + LeftTriangleBar: 10703, + LeftUpTeeVector: 10592, + LeftUpVectorBar: 10584, + LowerRightArrow: 8600, + NotGreaterEqual: 8817, + NotGreaterTilde: 8821, + NotLeftTriangle: 8938, + OverParenthesis: 9180, + RightDownVector: 8642, + ShortRightArrow: 8594, + UpperRightArrow: 8599, + bigtriangledown: 9661, + circlearrowleft: 8634, + curvearrowright: 8631, + downharpoonleft: 8643, + leftharpoondown: 8637, + leftrightarrows: 8646, + nLeftrightarrow: 8654, + nleftrightarrow: 8622, + ntrianglelefteq: 8940, + rightleftarrows: 8644, + rightsquigarrow: 8605, + rightthreetimes: 8908, + straightepsilon: 1013, + trianglerighteq: 8885, + vartriangleleft: 8882, + DiacriticalDot: 729, + DoubleRightTee: 8872, + DownLeftVector: 8637, + GreaterGreater: 10914, + HorizontalLine: 9472, + InvisibleComma: 8291, + InvisibleTimes: 8290, + LeftDownVector: 8643, + LeftRightArrow: 8596, + Leftrightarrow: 8660, + LessSlantEqual: 10877, + LongRightArrow: 10230, + Longrightarrow: 10233, + LowerLeftArrow: 8601, + NestedLessLess: 8810, + NotGreaterLess: 8825, + NotLessGreater: 8824, + NotSubsetEqual: 8840, + NotVerticalBar: 8740, + OpenCurlyQuote: 8216, + ReverseElement: 8715, + RightTeeVector: 10587, + RightVectorBar: 10579, + ShortDownArrow: 8595, + ShortLeftArrow: 8592, + SquareSuperset: 8848, + TildeFullEqual: 8773, + UpperLeftArrow: 8598, + ZeroWidthSpace: 8203, + curvearrowleft: 8630, + doublebarwedge: 8966, + downdownarrows: 8650, + hookrightarrow: 8618, + leftleftarrows: 8647, + leftrightarrow: 8596, + leftthreetimes: 8907, + longrightarrow: 10230, + looparrowright: 8620, + nshortparallel: 8742, + ntriangleright: 8939, + rightarrowtail: 8611, + rightharpoonup: 8640, + trianglelefteq: 8884, + upharpoonright: 8638, + ApplyFunction: 8289, + DifferentialD: 8518, + DoubleLeftTee: 10980, + DoubleUpArrow: 8657, + LeftTeeVector: 10586, + LeftVectorBar: 10578, + LessFullEqual: 8806, + LongLeftArrow: 10229, + Longleftarrow: 10232, + NotTildeEqual: 8772, + NotTildeTilde: 8777, + Poincareplane: 8460, + PrecedesEqual: 10927, + PrecedesTilde: 8830, + RightArrowBar: 8677, + RightTeeArrow: 8614, + RightTriangle: 8883, + RightUpVector: 8638, + SucceedsEqual: 10928, + SucceedsTilde: 8831, + SupersetEqual: 8839, + UpEquilibrium: 10606, + VerticalTilde: 8768, + VeryThinSpace: 8202, + bigtriangleup: 9651, + blacktriangle: 9652, + divideontimes: 8903, + fallingdotseq: 8786, + hookleftarrow: 8617, + leftarrowtail: 8610, + leftharpoonup: 8636, + longleftarrow: 10229, + looparrowleft: 8619, + measuredangle: 8737, + ntriangleleft: 8938, + shortparallel: 8741, + smallsetminus: 8726, + triangleright: 9657, + upharpoonleft: 8639, + DownArrowBar: 10515, + DownTeeArrow: 8615, + ExponentialE: 8519, + GreaterEqual: 8805, + GreaterTilde: 8819, + HilbertSpace: 8459, + HumpDownHump: 8782, + Intersection: 8898, + LeftArrowBar: 8676, + LeftTeeArrow: 8612, + LeftTriangle: 8882, + LeftUpVector: 8639, + NotCongruent: 8802, + NotLessEqual: 8816, + NotLessTilde: 8820, + Proportional: 8733, + RightCeiling: 8969, + RoundImplies: 10608, + ShortUpArrow: 8593, + SquareSubset: 8847, + UnderBracket: 9141, + VerticalLine: 124, + blacklozenge: 10731, + exponentiale: 8519, + risingdotseq: 8787, + triangledown: 9663, + triangleleft: 9667, + CircleMinus: 8854, + CircleTimes: 8855, + Equilibrium: 8652, + GreaterLess: 8823, + LeftCeiling: 8968, + LessGreater: 8822, + MediumSpace: 8287, + NotPrecedes: 8832, + NotSucceeds: 8833, + OverBracket: 9140, + RightVector: 8640, + Rrightarrow: 8667, + RuleDelayed: 10740, + SmallCircle: 8728, + SquareUnion: 8852, + SubsetEqual: 8838, + UpDownArrow: 8597, + Updownarrow: 8661, + VerticalBar: 8739, + backepsilon: 1014, + blacksquare: 9642, + circledcirc: 8858, + circleddash: 8861, + curlyeqprec: 8926, + curlyeqsucc: 8927, + diamondsuit: 9830, + eqslantless: 10901, + expectation: 8496, + nRightarrow: 8655, + nrightarrow: 8603, + preccurlyeq: 8828, + precnapprox: 10937, + quaternions: 8461, + straightphi: 981, + succcurlyeq: 8829, + succnapprox: 10938, + thickapprox: 8776, + updownarrow: 8597, + Bernoullis: 8492, + CirclePlus: 8853, + EqualTilde: 8770, + Fouriertrf: 8497, + ImaginaryI: 8520, + Laplacetrf: 8466, + LeftVector: 8636, + Lleftarrow: 8666, + NotElement: 8713, + NotGreater: 8815, + Proportion: 8759, + RightArrow: 8594, + RightFloor: 8971, + Rightarrow: 8658, + TildeEqual: 8771, + TildeTilde: 8776, + UnderBrace: 9183, + UpArrowBar: 10514, + UpTeeArrow: 8613, + circledast: 8859, + complement: 8705, + curlywedge: 8911, + eqslantgtr: 10902, + gtreqqless: 10892, + lessapprox: 10885, + lesseqqgtr: 10891, + lmoustache: 9136, + longmapsto: 10236, + mapstodown: 8615, + mapstoleft: 8612, + nLeftarrow: 8653, + nleftarrow: 8602, + precapprox: 10935, + rightarrow: 8594, + rmoustache: 9137, + sqsubseteq: 8849, + sqsupseteq: 8850, + subsetneqq: 10955, + succapprox: 10936, + supsetneqq: 10956, + upuparrows: 8648, + varepsilon: 949, + varnothing: 8709, + Backslash: 8726, + CenterDot: 183, + CircleDot: 8857, + Congruent: 8801, + Coproduct: 8720, + DoubleDot: 168, + DownArrow: 8595, + DownBreve: 785, + Downarrow: 8659, + HumpEqual: 8783, + LeftArrow: 8592, + LeftFloor: 8970, + Leftarrow: 8656, + LessTilde: 8818, + Mellintrf: 8499, + MinusPlus: 8723, + NotCupCap: 8813, + NotExists: 8708, + OverBrace: 9182, + PlusMinus: 177, + Therefore: 8756, + ThinSpace: 8201, + TripleDot: 8411, + UnionPlus: 8846, + backprime: 8245, + backsimeq: 8909, + bigotimes: 10754, + centerdot: 183, + checkmark: 10003, + complexes: 8450, + dotsquare: 8865, + downarrow: 8595, + gtrapprox: 10886, + gtreqless: 8923, + heartsuit: 9829, + leftarrow: 8592, + lesseqgtr: 8922, + nparallel: 8742, + nshortmid: 8740, + nsubseteq: 8840, + nsupseteq: 8841, + pitchfork: 8916, + rationals: 8474, + spadesuit: 9824, + subseteqq: 10949, + subsetneq: 8842, + supseteqq: 10950, + supsetneq: 8843, + therefore: 8756, + triangleq: 8796, + varpropto: 8733, + DDotrahd: 10513, + DotEqual: 8784, + Integral: 8747, + LessLess: 10913, + NotEqual: 8800, + NotTilde: 8769, + PartialD: 8706, + Precedes: 8826, + RightTee: 8866, + Succeeds: 8827, + SuchThat: 8715, + Superset: 8835, + Uarrocir: 10569, + UnderBar: 818, + andslope: 10840, + angmsdaa: 10664, + angmsdab: 10665, + angmsdac: 10666, + angmsdad: 10667, + angmsdae: 10668, + angmsdaf: 10669, + angmsdag: 10670, + angmsdah: 10671, + angrtvbd: 10653, + approxeq: 8778, + awconint: 8755, + backcong: 8780, + barwedge: 8965, + bbrktbrk: 9142, + bigoplus: 10753, + bigsqcup: 10758, + biguplus: 10756, + bigwedge: 8896, + boxminus: 8863, + boxtimes: 8864, + capbrcup: 10825, + circledR: 174, + circledS: 9416, + cirfnint: 10768, + clubsuit: 9827, + cupbrcap: 10824, + curlyvee: 8910, + cwconint: 8754, + doteqdot: 8785, + dotminus: 8760, + drbkarow: 10512, + dzigrarr: 10239, + elinters: 9191, + emptyset: 8709, + eqvparsl: 10725, + fpartint: 10765, + geqslant: 10878, + gesdotol: 10884, + gnapprox: 10890, + hksearow: 10533, + hkswarow: 10534, + imagline: 8464, + imagpart: 8465, + infintie: 10717, + integers: 8484, + intercal: 8890, + intlarhk: 10775, + laemptyv: 10676, + ldrushar: 10571, + leqslant: 10877, + lesdotor: 10883, + llcorner: 8990, + lnapprox: 10889, + lrcorner: 8991, + lurdshar: 10570, + mapstoup: 8613, + multimap: 8888, + naturals: 8469, + otimesas: 10806, + parallel: 8741, + plusacir: 10787, + pointint: 10773, + precneqq: 10933, + precnsim: 8936, + profalar: 9006, + profline: 8978, + profsurf: 8979, + raemptyv: 10675, + realpart: 8476, + rppolint: 10770, + rtriltri: 10702, + scpolint: 10771, + setminus: 8726, + shortmid: 8739, + smeparsl: 10724, + sqsubset: 8847, + sqsupset: 8848, + subseteq: 8838, + succneqq: 10934, + succnsim: 8937, + supseteq: 8839, + thetasym: 977, + thicksim: 8764, + timesbar: 10801, + triangle: 9653, + triminus: 10810, + trpezium: 9186, + ulcorner: 8988, + urcorner: 8989, + varkappa: 1008, + varsigma: 962, + vartheta: 977, + Because: 8757, + Cayleys: 8493, + Cconint: 8752, + Cedilla: 184, + Diamond: 8900, + DownTee: 8868, + Element: 8712, + Epsilon: 917, + Implies: 8658, + LeftTee: 8867, + NewLine: 10, + NoBreak: 8288, + NotLess: 8814, + Omicron: 927, + OverBar: 175, + Product: 8719, + UpArrow: 8593, + Uparrow: 8657, + Upsilon: 933, + alefsym: 8501, + angrtvb: 8894, + angzarr: 9084, + asympeq: 8781, + backsim: 8765, + because: 8757, + bemptyv: 10672, + between: 8812, + bigcirc: 9711, + bigodot: 10752, + bigstar: 9733, + boxplus: 8862, + ccupssm: 10832, + cemptyv: 10674, + cirscir: 10690, + coloneq: 8788, + congdot: 10861, + cudarrl: 10552, + cudarrr: 10549, + cularrp: 10557, + curarrm: 10556, + dbkarow: 10511, + ddagger: 8225, + ddotseq: 10871, + demptyv: 10673, + diamond: 8900, + digamma: 989, + dotplus: 8724, + dwangle: 10662, + epsilon: 949, + eqcolon: 8789, + equivDD: 10872, + gesdoto: 10882, + gtquest: 10876, + gtrless: 8823, + harrcir: 10568, + intprod: 10812, + isindot: 8949, + larrbfs: 10527, + larrsim: 10611, + lbrksld: 10639, + lbrkslu: 10637, + ldrdhar: 10599, + lesdoto: 10881, + lessdot: 8918, + lessgtr: 8822, + lesssim: 8818, + lotimes: 10804, + lozenge: 9674, + ltquest: 10875, + luruhar: 10598, + maltese: 10016, + minusdu: 10794, + napprox: 8777, + natural: 9838, + nearrow: 8599, + nexists: 8708, + notinva: 8713, + notinvb: 8951, + notinvc: 8950, + notniva: 8716, + notnivb: 8958, + notnivc: 8957, + npolint: 10772, + nsqsube: 8930, + nsqsupe: 8931, + nvinfin: 10718, + nwarrow: 8598, + olcross: 10683, + omicron: 959, + orderof: 8500, + orslope: 10839, + pertenk: 8241, + planckh: 8462, + pluscir: 10786, + plussim: 10790, + plustwo: 10791, + precsim: 8830, + quatint: 10774, + questeq: 8799, + rarrbfs: 10528, + rarrsim: 10612, + rbrksld: 10638, + rbrkslu: 10640, + rdldhar: 10601, + realine: 8475, + rotimes: 10805, + ruluhar: 10600, + searrow: 8600, + simplus: 10788, + simrarr: 10610, + subedot: 10947, + submult: 10945, + subplus: 10943, + subrarr: 10617, + succsim: 8831, + supdsub: 10968, + supedot: 10948, + suphsub: 10967, + suplarr: 10619, + supmult: 10946, + supplus: 10944, + swarrow: 8601, + topfork: 10970, + triplus: 10809, + tritime: 10811, + uparrow: 8593, + upsilon: 965, + uwangle: 10663, + vzigzag: 10650, + zigrarr: 8669, + Aacute: 193, + Abreve: 258, + Agrave: 192, + Assign: 8788, + Atilde: 195, + Barwed: 8966, + Bumpeq: 8782, + Cacute: 262, + Ccaron: 268, + Ccedil: 199, + Colone: 10868, + Conint: 8751, + CupCap: 8781, + Dagger: 8225, + Dcaron: 270, + DotDot: 8412, + Dstrok: 272, + Eacute: 201, + Ecaron: 282, + Egrave: 200, + Exists: 8707, + ForAll: 8704, + Gammad: 988, + Gbreve: 286, + Gcedil: 290, + HARDcy: 1066, + Hstrok: 294, + Iacute: 205, + Igrave: 204, + Itilde: 296, + Jsercy: 1032, + Kcedil: 310, + Lacute: 313, + Lambda: 923, + Lcaron: 317, + Lcedil: 315, + Lmidot: 319, + Lstrok: 321, + Nacute: 323, + Ncaron: 327, + Ncedil: 325, + Ntilde: 209, + Oacute: 211, + Odblac: 336, + Ograve: 210, + Oslash: 216, + Otilde: 213, + Otimes: 10807, + Racute: 340, + Rarrtl: 10518, + Rcaron: 344, + Rcedil: 342, + SHCHcy: 1065, + SOFTcy: 1068, + Sacute: 346, + Scaron: 352, + Scedil: 350, + Square: 9633, + Subset: 8912, + Supset: 8913, + Tcaron: 356, + Tcedil: 354, + Tstrok: 358, + Uacute: 218, + Ubreve: 364, + Udblac: 368, + Ugrave: 217, + Utilde: 360, + Vdashl: 10982, + Verbar: 8214, + Vvdash: 8874, + Yacute: 221, + Zacute: 377, + Zcaron: 381, + aacute: 225, + abreve: 259, + agrave: 224, + andand: 10837, + angmsd: 8737, + angsph: 8738, + apacir: 10863, + approx: 8776, + atilde: 227, + barvee: 8893, + barwed: 8965, + becaus: 8757, + bernou: 8492, + bigcap: 8898, + bigcup: 8899, + bigvee: 8897, + bkarow: 10509, + bottom: 8869, + bowtie: 8904, + boxbox: 10697, + bprime: 8245, + brvbar: 166, + bullet: 8226, + bumpeq: 8783, + cacute: 263, + capand: 10820, + capcap: 10827, + capcup: 10823, + capdot: 10816, + ccaron: 269, + ccedil: 231, + circeq: 8791, + cirmid: 10991, + colone: 8788, + commat: 64, + compfn: 8728, + conint: 8750, + coprod: 8720, + copysr: 8471, + cularr: 8630, + cupcap: 10822, + cupcup: 10826, + cupdot: 8845, + curarr: 8631, + curren: 164, + cylcty: 9005, + dagger: 8224, + daleth: 8504, + dcaron: 271, + dfisht: 10623, + divide: 247, + divonx: 8903, + dlcorn: 8990, + dlcrop: 8973, + dollar: 36, + drcorn: 8991, + drcrop: 8972, + dstrok: 273, + eacute: 233, + easter: 10862, + ecaron: 283, + ecolon: 8789, + egrave: 232, + egsdot: 10904, + elsdot: 10903, + emptyv: 8709, + emsp13: 8196, + emsp14: 8197, + eparsl: 10723, + eqcirc: 8790, + equals: 61, + equest: 8799, + female: 9792, + ffilig: 64259, + ffllig: 64260, + forall: 8704, + frac12: 189, + frac13: 8531, + frac14: 188, + frac15: 8533, + frac16: 8537, + frac18: 8539, + frac23: 8532, + frac25: 8534, + frac34: 190, + frac35: 8535, + frac38: 8540, + frac45: 8536, + frac56: 8538, + frac58: 8541, + frac78: 8542, + gacute: 501, + gammad: 989, + gbreve: 287, + gesdot: 10880, + gesles: 10900, + gtlPar: 10645, + gtrarr: 10616, + gtrdot: 8919, + gtrsim: 8819, + hairsp: 8202, + hamilt: 8459, + hardcy: 1098, + hearts: 9829, + hellip: 8230, + hercon: 8889, + homtht: 8763, + horbar: 8213, + hslash: 8463, + hstrok: 295, + hybull: 8259, + hyphen: 8208, + iacute: 237, + igrave: 236, + iiiint: 10764, + iinfin: 10716, + incare: 8453, + inodot: 305, + intcal: 8890, + iquest: 191, + isinsv: 8947, + itilde: 297, + jsercy: 1112, + kappav: 1008, + kcedil: 311, + kgreen: 312, + lAtail: 10523, + lacute: 314, + lagran: 8466, + lambda: 955, + langle: 10216, + larrfs: 10525, + larrhk: 8617, + larrlp: 8619, + larrpl: 10553, + larrtl: 8610, + latail: 10521, + lbrace: 123, + lbrack: 91, + lcaron: 318, + lcedil: 316, + ldquor: 8222, + lesdot: 10879, + lesges: 10899, + lfisht: 10620, + lfloor: 8970, + lharul: 10602, + llhard: 10603, + lmidot: 320, + lmoust: 9136, + loplus: 10797, + lowast: 8727, + lowbar: 95, + lparlt: 10643, + lrhard: 10605, + lsaquo: 8249, + lsquor: 8218, + lstrok: 322, + lthree: 8907, + ltimes: 8905, + ltlarr: 10614, + ltrPar: 10646, + mapsto: 8614, + marker: 9646, + mcomma: 10793, + midast: 42, + midcir: 10992, + middot: 183, + minusb: 8863, + minusd: 8760, + mnplus: 8723, + models: 8871, + mstpos: 8766, + nVDash: 8879, + nVdash: 8878, + nacute: 324, + ncaron: 328, + ncedil: 326, + nearhk: 10532, + nequiv: 8802, + nesear: 10536, + nexist: 8708, + nltrie: 8940, + nprcue: 8928, + nrtrie: 8941, + nsccue: 8929, + nsimeq: 8772, + ntilde: 241, + numero: 8470, + nvDash: 8877, + nvHarr: 10500, + nvdash: 8876, + nvlArr: 10498, + nvrArr: 10499, + nwarhk: 10531, + nwnear: 10535, + oacute: 243, + odblac: 337, + odsold: 10684, + ograve: 242, + ominus: 8854, + origof: 8886, + oslash: 248, + otilde: 245, + otimes: 8855, + parsim: 10995, + percnt: 37, + period: 46, + permil: 8240, + phmmat: 8499, + planck: 8463, + plankv: 8463, + plusdo: 8724, + plusdu: 10789, + plusmn: 177, + preceq: 10927, + primes: 8473, + prnsim: 8936, + propto: 8733, + prurel: 8880, + puncsp: 8200, + qprime: 8279, + rAtail: 10524, + racute: 341, + rangle: 10217, + rarrap: 10613, + rarrfs: 10526, + rarrhk: 8618, + rarrlp: 8620, + rarrpl: 10565, + rarrtl: 8611, + ratail: 10522, + rbrace: 125, + rbrack: 93, + rcaron: 345, + rcedil: 343, + rdquor: 8221, + rfisht: 10621, + rfloor: 8971, + rharul: 10604, + rmoust: 9137, + roplus: 10798, + rpargt: 10644, + rsaquo: 8250, + rsquor: 8217, + rthree: 8908, + rtimes: 8906, + sacute: 347, + scaron: 353, + scedil: 351, + scnsim: 8937, + searhk: 10533, + seswar: 10537, + sfrown: 8994, + shchcy: 1097, + sigmaf: 962, + sigmav: 962, + simdot: 10858, + smashp: 10803, + softcy: 1100, + solbar: 9023, + spades: 9824, + sqsube: 8849, + sqsupe: 8850, + square: 9633, + squarf: 9642, + ssetmn: 8726, + ssmile: 8995, + sstarf: 8902, + subdot: 10941, + subset: 8834, + subsim: 10951, + subsub: 10965, + subsup: 10963, + succeq: 10928, + supdot: 10942, + supset: 8835, + supsim: 10952, + supsub: 10964, + supsup: 10966, + swarhk: 10534, + swnwar: 10538, + target: 8982, + tcaron: 357, + tcedil: 355, + telrec: 8981, + there4: 8756, + thetav: 977, + thinsp: 8201, + thksim: 8764, + timesb: 8864, + timesd: 10800, + topbot: 9014, + topcir: 10993, + tprime: 8244, + tridot: 9708, + tstrok: 359, + uacute: 250, + ubreve: 365, + udblac: 369, + ufisht: 10622, + ugrave: 249, + ulcorn: 8988, + ulcrop: 8975, + urcorn: 8989, + urcrop: 8974, + utilde: 361, + vangrt: 10652, + varphi: 966, + varrho: 1009, + veebar: 8891, + vellip: 8942, + verbar: 124, + wedbar: 10847, + wedgeq: 8793, + weierp: 8472, + wreath: 8768, + xoplus: 10753, + xotime: 10754, + xsqcup: 10758, + xuplus: 10756, + xwedge: 8896, + yacute: 253, + zacute: 378, + zcaron: 382, + zeetrf: 8488, + AElig: 198, + Acirc: 194, + Alpha: 913, + Amacr: 256, + Aogon: 260, + Aring: 197, + Breve: 728, + Ccirc: 264, + Colon: 8759, + Cross: 10799, + Dashv: 10980, + Delta: 916, + Ecirc: 202, + Emacr: 274, + Eogon: 280, + Equal: 10869, + Gamma: 915, + Gcirc: 284, + Hacek: 711, + Hcirc: 292, + IJlig: 306, + Icirc: 206, + Imacr: 298, + Iogon: 302, + Iukcy: 1030, + Jcirc: 308, + Jukcy: 1028, + Kappa: 922, + OElig: 338, + Ocirc: 212, + Omacr: 332, + Omega: 937, + Prime: 8243, + RBarr: 10512, + Scirc: 348, + Sigma: 931, + THORN: 222, + TRADE: 8482, + TSHcy: 1035, + Theta: 920, + Tilde: 8764, + Ubrcy: 1038, + Ucirc: 219, + Umacr: 362, + Union: 8899, + Uogon: 370, + UpTee: 8869, + Uring: 366, + VDash: 8875, + Vdash: 8873, + Wcirc: 372, + Wedge: 8896, + Ycirc: 374, + acirc: 226, + acute: 180, + aelig: 230, + aleph: 8501, + alpha: 945, + amacr: 257, + amalg: 10815, + angle: 8736, + angrt: 8735, + angst: 8491, + aogon: 261, + aring: 229, + asymp: 8776, + awint: 10769, + bcong: 8780, + bdquo: 8222, + bepsi: 1014, + blank: 9251, + blk12: 9618, + blk14: 9617, + blk34: 9619, + block: 9608, + boxDL: 9559, + boxDR: 9556, + boxDl: 9558, + boxDr: 9555, + boxHD: 9574, + boxHU: 9577, + boxHd: 9572, + boxHu: 9575, + boxUL: 9565, + boxUR: 9562, + boxUl: 9564, + boxUr: 9561, + boxVH: 9580, + boxVL: 9571, + boxVR: 9568, + boxVh: 9579, + boxVl: 9570, + boxVr: 9567, + boxdL: 9557, + boxdR: 9554, + boxdl: 9488, + boxdr: 9484, + boxhD: 9573, + boxhU: 9576, + boxhd: 9516, + boxhu: 9524, + boxuL: 9563, + boxuR: 9560, + boxul: 9496, + boxur: 9492, + boxvH: 9578, + boxvL: 9569, + boxvR: 9566, + boxvh: 9532, + boxvl: 9508, + boxvr: 9500, + breve: 728, + bsemi: 8271, + bsime: 8909, + bsolb: 10693, + bumpE: 10926, + bumpe: 8783, + caret: 8257, + caron: 711, + ccaps: 10829, + ccirc: 265, + ccups: 10828, + cedil: 184, + check: 10003, + clubs: 9827, + colon: 58, + comma: 44, + crarr: 8629, + cross: 10007, + csube: 10961, + csupe: 10962, + ctdot: 8943, + cuepr: 8926, + cuesc: 8927, + cupor: 10821, + cuvee: 8910, + cuwed: 8911, + cwint: 8753, + dashv: 8867, + dblac: 733, + ddarr: 8650, + delta: 948, + dharl: 8643, + dharr: 8642, + diams: 9830, + disin: 8946, + doteq: 8784, + dtdot: 8945, + dtrif: 9662, + duarr: 8693, + duhar: 10607, + eDDot: 10871, + ecirc: 234, + efDot: 8786, + emacr: 275, + empty: 8709, + eogon: 281, + eplus: 10865, + epsiv: 949, + eqsim: 8770, + equiv: 8801, + erDot: 8787, + erarr: 10609, + esdot: 8784, + exist: 8707, + fflig: 64256, + filig: 64257, + fllig: 64258, + fltns: 9649, + forkv: 10969, + frasl: 8260, + frown: 8994, + gamma: 947, + gcirc: 285, + gescc: 10921, + gimel: 8503, + gneqq: 8809, + gnsim: 8935, + grave: 96, + gsime: 10894, + gsiml: 10896, + gtcir: 10874, + gtdot: 8919, + harrw: 8621, + hcirc: 293, + hoarr: 8703, + icirc: 238, + iexcl: 161, + iiint: 8749, + iiota: 8489, + ijlig: 307, + imacr: 299, + image: 8465, + imath: 305, + imped: 437, + infin: 8734, + iogon: 303, + iprod: 10812, + isinE: 8953, + isins: 8948, + isinv: 8712, + iukcy: 1110, + jcirc: 309, + jmath: 567, + jukcy: 1108, + kappa: 954, + lAarr: 8666, + lBarr: 10510, + langd: 10641, + laquo: 171, + larrb: 8676, + lbarr: 10508, + lbbrk: 10098, + lbrke: 10635, + lceil: 8968, + ldquo: 8220, + lescc: 10920, + lhard: 8637, + lharu: 8636, + lhblk: 9604, + llarr: 8647, + lltri: 9722, + lneqq: 8808, + lnsim: 8934, + loang: 10220, + loarr: 8701, + lobrk: 10214, + lopar: 10629, + lrarr: 8646, + lrhar: 8651, + lrtri: 8895, + lsime: 10893, + lsimg: 10895, + lsquo: 8216, + ltcir: 10873, + ltdot: 8918, + ltrie: 8884, + ltrif: 9666, + mDDot: 8762, + mdash: 8212, + micro: 181, + minus: 8722, + mumap: 8888, + nabla: 8711, + napos: 329, + natur: 9838, + ncong: 8775, + ndash: 8211, + neArr: 8663, + nearr: 8599, + ngsim: 8821, + nhArr: 8654, + nharr: 8622, + nhpar: 10994, + nlArr: 8653, + nlarr: 8602, + nless: 8814, + nlsim: 8820, + nltri: 8938, + notin: 8713, + notni: 8716, + nprec: 8832, + nrArr: 8655, + nrarr: 8603, + nrtri: 8939, + nsime: 8772, + nsmid: 8740, + nspar: 8742, + nsube: 8840, + nsucc: 8833, + nsupe: 8841, + numsp: 8199, + nwArr: 8662, + nwarr: 8598, + ocirc: 244, + odash: 8861, + oelig: 339, + ofcir: 10687, + ohbar: 10677, + olarr: 8634, + olcir: 10686, + oline: 8254, + omacr: 333, + omega: 969, + operp: 10681, + oplus: 8853, + orarr: 8635, + order: 8500, + ovbar: 9021, + parsl: 11005, + phone: 9742, + plusb: 8862, + pluse: 10866, + pound: 163, + prcue: 8828, + prime: 8242, + prnap: 10937, + prsim: 8830, + quest: 63, + rAarr: 8667, + rBarr: 10511, + radic: 8730, + rangd: 10642, + range: 10661, + raquo: 187, + rarrb: 8677, + rarrc: 10547, + rarrw: 8605, + ratio: 8758, + rbarr: 10509, + rbbrk: 10099, + rbrke: 10636, + rceil: 8969, + rdquo: 8221, + reals: 8477, + rhard: 8641, + rharu: 8640, + rlarr: 8644, + rlhar: 8652, + rnmid: 10990, + roang: 10221, + roarr: 8702, + robrk: 10215, + ropar: 10630, + rrarr: 8649, + rsquo: 8217, + rtrie: 8885, + rtrif: 9656, + sbquo: 8218, + sccue: 8829, + scirc: 349, + scnap: 10938, + scsim: 8831, + sdotb: 8865, + sdote: 10854, + seArr: 8664, + searr: 8600, + setmn: 8726, + sharp: 9839, + sigma: 963, + simeq: 8771, + simgE: 10912, + simlE: 10911, + simne: 8774, + slarr: 8592, + smile: 8995, + sqcap: 8851, + sqcup: 8852, + sqsub: 8847, + sqsup: 8848, + srarr: 8594, + starf: 9733, + strns: 175, + subnE: 10955, + subne: 8842, + supnE: 10956, + supne: 8843, + swArr: 8665, + swarr: 8601, + szlig: 223, + theta: 952, + thkap: 8776, + thorn: 254, + tilde: 732, + times: 215, + trade: 8482, + trisb: 10701, + tshcy: 1115, + twixt: 8812, + ubrcy: 1118, + ucirc: 251, + udarr: 8645, + udhar: 10606, + uharl: 8639, + uharr: 8638, + uhblk: 9600, + ultri: 9720, + umacr: 363, + uogon: 371, + uplus: 8846, + upsih: 978, + uring: 367, + urtri: 9721, + utdot: 8944, + utrif: 9652, + uuarr: 8648, + vBarv: 10985, + vDash: 8872, + varpi: 982, + vdash: 8866, + veeeq: 8794, + vltri: 8882, + vprop: 8733, + vrtri: 8883, + wcirc: 373, + wedge: 8743, + xcirc: 9711, + xdtri: 9661, + xhArr: 10234, + xharr: 10231, + xlArr: 10232, + xlarr: 10229, + xodot: 10752, + xrArr: 10233, + xrarr: 10230, + xutri: 9651, + ycirc: 375, + Aopf: 120120, + Ascr: 119964, + Auml: 196, + Barv: 10983, + Beta: 914, + Bopf: 120121, + Bscr: 8492, + CHcy: 1063, + COPY: 169, + Cdot: 266, + Copf: 8450, + Cscr: 119966, + DJcy: 1026, + DScy: 1029, + DZcy: 1039, + Darr: 8609, + Dopf: 120123, + Dscr: 119967, + Edot: 278, + Eopf: 120124, + Escr: 8496, + Esim: 10867, + Euml: 203, + Fopf: 120125, + Fscr: 8497, + GJcy: 1027, + Gdot: 288, + Gopf: 120126, + Gscr: 119970, + Hopf: 8461, + Hscr: 8459, + IEcy: 1045, + IOcy: 1025, + Idot: 304, + Iopf: 120128, + Iota: 921, + Iscr: 8464, + Iuml: 207, + Jopf: 120129, + Jscr: 119973, + KHcy: 1061, + KJcy: 1036, + Kopf: 120130, + Kscr: 119974, + LJcy: 1033, + Lang: 10218, + Larr: 8606, + Lopf: 120131, + Lscr: 8466, + Mopf: 120132, + Mscr: 8499, + NJcy: 1034, + Nopf: 8469, + Nscr: 119977, + Oopf: 120134, + Oscr: 119978, + Ouml: 214, + Popf: 8473, + Pscr: 119979, + QUOT: 34, + Qopf: 8474, + Qscr: 119980, + Rang: 10219, + Rarr: 8608, + Ropf: 8477, + Rscr: 8475, + SHcy: 1064, + Sopf: 120138, + Sqrt: 8730, + Sscr: 119982, + Star: 8902, + TScy: 1062, + Topf: 120139, + Tscr: 119983, + Uarr: 8607, + Uopf: 120140, + Upsi: 978, + Uscr: 119984, + Uuml: 220, + Vbar: 10987, + Vert: 8214, + Vopf: 120141, + Vscr: 119985, + Wopf: 120142, + Wscr: 119986, + Xopf: 120143, + Xscr: 119987, + YAcy: 1071, + YIcy: 1031, + YUcy: 1070, + Yopf: 120144, + Yscr: 119988, + Yuml: 376, + ZHcy: 1046, + Zdot: 379, + Zeta: 918, + Zopf: 8484, + Zscr: 119989, + andd: 10844, + andv: 10842, + ange: 10660, + aopf: 120146, + apid: 8779, + apos: 39, + ascr: 119990, + auml: 228, + bNot: 10989, + bbrk: 9141, + beta: 946, + beth: 8502, + bnot: 8976, + bopf: 120147, + boxH: 9552, + boxV: 9553, + boxh: 9472, + boxv: 9474, + bscr: 119991, + bsim: 8765, + bsol: 92, + bull: 8226, + bump: 8782, + cdot: 267, + cent: 162, + chcy: 1095, + cirE: 10691, + circ: 710, + cire: 8791, + comp: 8705, + cong: 8773, + copf: 120148, + copy: 169, + cscr: 119992, + csub: 10959, + csup: 10960, + dArr: 8659, + dHar: 10597, + darr: 8595, + dash: 8208, + diam: 8900, + djcy: 1106, + dopf: 120149, + dscr: 119993, + dscy: 1109, + dsol: 10742, + dtri: 9663, + dzcy: 1119, + eDot: 8785, + ecir: 8790, + edot: 279, + emsp: 8195, + ensp: 8194, + eopf: 120150, + epar: 8917, + epsi: 1013, + escr: 8495, + esim: 8770, + euml: 235, + euro: 8364, + excl: 33, + flat: 9837, + fnof: 402, + fopf: 120151, + fork: 8916, + fscr: 119995, + gdot: 289, + geqq: 8807, + gjcy: 1107, + gnap: 10890, + gneq: 10888, + gopf: 120152, + gscr: 8458, + gsim: 8819, + gtcc: 10919, + hArr: 8660, + half: 189, + harr: 8596, + hbar: 8463, + hopf: 120153, + hscr: 119997, + iecy: 1077, + imof: 8887, + iocy: 1105, + iopf: 120154, + iota: 953, + iscr: 119998, + isin: 8712, + iuml: 239, + jopf: 120155, + jscr: 119999, + khcy: 1093, + kjcy: 1116, + kopf: 120156, + kscr: 120000, + lArr: 8656, + lHar: 10594, + lang: 10216, + larr: 8592, + late: 10925, + lcub: 123, + ldca: 10550, + ldsh: 8626, + leqq: 8806, + ljcy: 1113, + lnap: 10889, + lneq: 10887, + lopf: 120157, + lozf: 10731, + lpar: 40, + lscr: 120001, + lsim: 8818, + lsqb: 91, + ltcc: 10918, + ltri: 9667, + macr: 175, + male: 9794, + malt: 10016, + mlcp: 10971, + mldr: 8230, + mopf: 120158, + mscr: 120002, + nbsp: 160, + ncap: 10819, + ncup: 10818, + ngeq: 8817, + ngtr: 8815, + nisd: 8954, + njcy: 1114, + nldr: 8229, + nleq: 8816, + nmid: 8740, + nopf: 120159, + npar: 8742, + nscr: 120003, + nsim: 8769, + nsub: 8836, + nsup: 8837, + ntgl: 8825, + ntlg: 8824, + oast: 8859, + ocir: 8858, + odiv: 10808, + odot: 8857, + ogon: 731, + oint: 8750, + omid: 10678, + oopf: 120160, + opar: 10679, + ordf: 170, + ordm: 186, + oror: 10838, + oscr: 8500, + osol: 8856, + ouml: 246, + para: 182, + part: 8706, + perp: 8869, + phiv: 966, + plus: 43, + popf: 120161, + prap: 10935, + prec: 8826, + prnE: 10933, + prod: 8719, + prop: 8733, + pscr: 120005, + qint: 10764, + qopf: 120162, + qscr: 120006, + quot: 34, + rArr: 8658, + rHar: 10596, + race: 10714, + rang: 10217, + rarr: 8594, + rcub: 125, + rdca: 10551, + rdsh: 8627, + real: 8476, + rect: 9645, + rhov: 1009, + ring: 730, + ropf: 120163, + rpar: 41, + rscr: 120007, + rsqb: 93, + rtri: 9657, + scap: 10936, + scnE: 10934, + sdot: 8901, + sect: 167, + semi: 59, + sext: 10038, + shcy: 1096, + sime: 8771, + simg: 10910, + siml: 10909, + smid: 8739, + smte: 10924, + solb: 10692, + sopf: 120164, + spar: 8741, + squf: 9642, + sscr: 120008, + star: 9734, + subE: 10949, + sube: 8838, + succ: 8827, + sung: 9834, + sup1: 185, + sup2: 178, + sup3: 179, + supE: 10950, + supe: 8839, + tbrk: 9140, + tdot: 8411, + tint: 8749, + toea: 10536, + topf: 120165, + tosa: 10537, + trie: 8796, + tscr: 120009, + tscy: 1094, + uArr: 8657, + uHar: 10595, + uarr: 8593, + uopf: 120166, + upsi: 965, + uscr: 120010, + utri: 9653, + uuml: 252, + vArr: 8661, + vBar: 10984, + varr: 8597, + vert: 124, + vopf: 120167, + vscr: 120011, + wopf: 120168, + wscr: 120012, + xcap: 8898, + xcup: 8899, + xmap: 10236, + xnis: 8955, + xopf: 120169, + xscr: 120013, + xvee: 8897, + yacy: 1103, + yicy: 1111, + yopf: 120170, + yscr: 120014, + yucy: 1102, + yuml: 255, + zdot: 380, + zeta: 950, + zhcy: 1078, + zopf: 120171, + zscr: 120015, + zwnj: 8204, + AMP: 38, + Acy: 1040, + Afr: 120068, + And: 10835, + Bcy: 1041, + Bfr: 120069, + Cap: 8914, + Cfr: 8493, + Chi: 935, + Cup: 8915, + Dcy: 1044, + Del: 8711, + Dfr: 120071, + Dot: 168, + ENG: 330, + ETH: 208, + Ecy: 1069, + Efr: 120072, + Eta: 919, + Fcy: 1060, + Ffr: 120073, + Gcy: 1043, + Gfr: 120074, + Hat: 94, + Hfr: 8460, + Icy: 1048, + Ifr: 8465, + Int: 8748, + Jcy: 1049, + Jfr: 120077, + Kcy: 1050, + Kfr: 120078, + Lcy: 1051, + Lfr: 120079, + Lsh: 8624, + Map: 10501, + Mcy: 1052, + Mfr: 120080, + Ncy: 1053, + Nfr: 120081, + Not: 10988, + Ocy: 1054, + Ofr: 120082, + Pcy: 1055, + Pfr: 120083, + Phi: 934, + Psi: 936, + Qfr: 120084, + REG: 174, + Rcy: 1056, + Rfr: 8476, + Rho: 929, + Rsh: 8625, + Scy: 1057, + Sfr: 120086, + Sub: 8912, + Sum: 8721, + Sup: 8913, + Tab: 9, + Tau: 932, + Tcy: 1058, + Tfr: 120087, + Ucy: 1059, + Ufr: 120088, + Vcy: 1042, + Vee: 8897, + Vfr: 120089, + Wfr: 120090, + Xfr: 120091, + Ycy: 1067, + Yfr: 120092, + Zcy: 1047, + Zfr: 8488, + acd: 8767, + acy: 1072, + afr: 120094, + amp: 38, + and: 8743, + ang: 8736, + apE: 10864, + ape: 8778, + ast: 42, + bcy: 1073, + bfr: 120095, + bot: 8869, + cap: 8745, + cfr: 120096, + chi: 967, + cir: 9675, + cup: 8746, + dcy: 1076, + deg: 176, + dfr: 120097, + die: 168, + div: 247, + dot: 729, + ecy: 1101, + efr: 120098, + egs: 10902, + ell: 8467, + els: 10901, + eng: 331, + eta: 951, + eth: 240, + fcy: 1092, + ffr: 120099, + gEl: 10892, + gap: 10886, + gcy: 1075, + gel: 8923, + geq: 8805, + ges: 10878, + gfr: 120100, + ggg: 8921, + glE: 10898, + gla: 10917, + glj: 10916, + gnE: 8809, + gne: 10888, + hfr: 120101, + icy: 1080, + iff: 8660, + ifr: 120102, + int: 8747, + jcy: 1081, + jfr: 120103, + kcy: 1082, + kfr: 120104, + lEg: 10891, + lap: 10885, + lat: 10923, + lcy: 1083, + leg: 8922, + leq: 8804, + les: 10877, + lfr: 120105, + lgE: 10897, + lnE: 8808, + lne: 10887, + loz: 9674, + lrm: 8206, + lsh: 8624, + map: 8614, + mcy: 1084, + mfr: 120106, + mho: 8487, + mid: 8739, + nap: 8777, + ncy: 1085, + nfr: 120107, + nge: 8817, + ngt: 8815, + nis: 8956, + niv: 8715, + nle: 8816, + nlt: 8814, + not: 172, + npr: 8832, + nsc: 8833, + num: 35, + ocy: 1086, + ofr: 120108, + ogt: 10689, + ohm: 8486, + olt: 10688, + ord: 10845, + orv: 10843, + par: 8741, + pcy: 1087, + pfr: 120109, + phi: 966, + piv: 982, + prE: 10931, + pre: 10927, + psi: 968, + qfr: 120110, + rcy: 1088, + reg: 174, + rfr: 120111, + rho: 961, + rlm: 8207, + rsh: 8625, + scE: 10932, + sce: 10928, + scy: 1089, + sfr: 120112, + shy: 173, + sim: 8764, + smt: 10922, + sol: 47, + squ: 9633, + sub: 8834, + sum: 8721, + sup: 8835, + tau: 964, + tcy: 1090, + tfr: 120113, + top: 8868, + ucy: 1091, + ufr: 120114, + uml: 168, + vcy: 1074, + vee: 8744, + vfr: 120115, + wfr: 120116, + xfr: 120117, + ycy: 1099, + yen: 165, + yfr: 120118, + zcy: 1079, + zfr: 120119, + zwj: 8205, + DD: 8517, + GT: 62, + Gg: 8921, + Gt: 8811, + Im: 8465, + LT: 60, + Ll: 8920, + Lt: 8810, + Mu: 924, + Nu: 925, + Or: 10836, + Pi: 928, + Pr: 10939, + Re: 8476, + Sc: 10940, + Xi: 926, + ac: 8766, + af: 8289, + ap: 8776, + dd: 8518, + ee: 8519, + eg: 10906, + el: 10905, + gE: 8807, + ge: 8805, + gg: 8811, + gl: 8823, + gt: 62, + ic: 8291, + ii: 8520, + in: 8712, + it: 8290, + lE: 8806, + le: 8804, + lg: 8822, + ll: 8810, + lt: 60, + mp: 8723, + mu: 956, + ne: 8800, + ni: 8715, + nu: 957, + oS: 9416, + or: 8744, + pi: 960, + pm: 177, + pr: 8826, + rx: 8478, + sc: 8827, + wp: 8472, + wr: 8768, + xi: 958, +}; diff --git a/packages/astro-parser/src/parse/utils/html.ts b/packages/astro-parser/src/parse/utils/html.ts new file mode 100644 index 000000000..3b406c9cc --- /dev/null +++ b/packages/astro-parser/src/parse/utils/html.ts @@ -0,0 +1,143 @@ +// @ts-nocheck + +import entities from './entities.js'; + +const windows_1252 = [ + 8364, + 129, + 8218, + 402, + 8222, + 8230, + 8224, + 8225, + 710, + 8240, + 352, + 8249, + 338, + 141, + 381, + 143, + 144, + 8216, + 8217, + 8220, + 8221, + 8226, + 8211, + 8212, + 732, + 8482, + 353, + 8250, + 339, + 157, + 382, + 376, +]; + +const entity_pattern = new RegExp(`&(#?(?:x[\\w\\d]+|\\d+|${Object.keys(entities).join('|')}))(?:;|\\b)`, 'g'); + +export function decode_character_references(html: string) { + return html.replace(entity_pattern, (match, entity) => { + let code; + + // Handle named entities + if (entity[0] !== '#') { + code = entities[entity]; + } else if (entity[1] === 'x') { + code = parseInt(entity.substring(2), 16); + } else { + code = parseInt(entity.substring(1), 10); + } + + if (!code) { + return match; + } + + return String.fromCodePoint(validate_code(code)); + }); +} + +const NUL = 0; + +// some code points are verboten. If we were inserting HTML, the browser would replace the illegal +// code points with alternatives in some cases - since we're bypassing that mechanism, we need +// to replace them ourselves +// +// Source: http://en.wikipedia.org/wiki/Character_encodings_in_HTML#Illegal_characters +function validate_code(code: number) { + // line feed becomes generic whitespace + if (code === 10) { + return 32; + } + + // ASCII range. (Why someone would use HTML entities for ASCII characters I don't know, but...) + if (code < 128) { + return code; + } + + // code points 128-159 are dealt with leniently by browsers, but they're incorrect. We need + // to correct the mistake or we'll end up with missing € signs and so on + if (code <= 159) { + return windows_1252[code - 128]; + } + + // basic multilingual plane + if (code < 55296) { + return code; + } + + // UTF-16 surrogate halves + if (code <= 57343) { + return NUL; + } + + // rest of the basic multilingual plane + if (code <= 65535) { + return code; + } + + // supplementary multilingual plane 0x10000 - 0x1ffff + if (code >= 65536 && code <= 131071) { + return code; + } + + // supplementary ideographic plane 0x20000 - 0x2ffff + if (code >= 131072 && code <= 196607) { + return code; + } + + return NUL; +} + +// based on http://developers.whatwg.org/syntax.html#syntax-tag-omission +const disallowed_contents = new Map([ + ['li', new Set(['li'])], + ['dt', new Set(['dt', 'dd'])], + ['dd', new Set(['dt', 'dd'])], + ['p', new Set('address article aside blockquote div dl fieldset footer form h1 h2 h3 h4 h5 h6 header hgroup hr main menu nav ol p pre section table ul'.split(' '))], + ['rt', new Set(['rt', 'rp'])], + ['rp', new Set(['rt', 'rp'])], + ['optgroup', new Set(['optgroup'])], + ['option', new Set(['option', 'optgroup'])], + ['thead', new Set(['tbody', 'tfoot'])], + ['tbody', new Set(['tbody', 'tfoot'])], + ['tfoot', new Set(['tbody'])], + ['tr', new Set(['tr', 'tbody'])], + ['td', new Set(['td', 'th', 'tr'])], + ['th', new Set(['td', 'th', 'tr'])], +]); + +// can this be a child of the parent element, or does it implicitly +// close it, like `<li>one<li>two`? +export function closing_tag_omitted(current: string, next?: string) { + if (disallowed_contents.has(current)) { + if (!next || disallowed_contents.get(current).has(next)) { + return true; + } + } + + return false; +} diff --git a/packages/astro-parser/src/parse/utils/node.ts b/packages/astro-parser/src/parse/utils/node.ts new file mode 100644 index 000000000..45769f96e --- /dev/null +++ b/packages/astro-parser/src/parse/utils/node.ts @@ -0,0 +1,30 @@ +import { TemplateNode } from '../../interfaces.js'; + +export function to_string(node: TemplateNode) { + switch (node.type) { + case 'IfBlock': + return '{#if} block'; + case 'ThenBlock': + return '{:then} block'; + case 'ElseBlock': + return '{:else} block'; + case 'PendingBlock': + case 'AwaitBlock': + return '{#await} block'; + case 'CatchBlock': + return '{:catch} block'; + case 'EachBlock': + return '{#each} block'; + case 'RawMustacheTag': + return '{@html} block'; + case 'DebugTag': + return '{@debug} block'; + case 'Element': + case 'InlineComponent': + case 'Slot': + case 'Title': + return `<${node.name}> tag`; + default: + return node.type; + } +} |