diff options
Diffstat (limited to 'src/parser.ts')
-rw-r--r-- | src/parser.ts | 304 |
1 files changed, 304 insertions, 0 deletions
diff --git a/src/parser.ts b/src/parser.ts new file mode 100644 index 000000000..f47486f65 --- /dev/null +++ b/src/parser.ts @@ -0,0 +1,304 @@ +const [ + CHARS, + TAG_START, + TAG_END, + END_TAG_START, + EQ, + EOF, + UNKNOWN +] = Array.from(new Array(20), (x, i) => i + 1); + +const voidTags = new Set(['area', 'base', 'br', 'col', 'command', 'embed', 'hr', + 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']); + +type Visitor = (tag: Tag) => Tag; + +interface State { + code: string; + index: number; + visitor: Visitor; + tagName?: string; +} + +interface Attribute { + name: string; + value?: string; + boolean: boolean; + start: number; + end: number; +} + +interface Text { + type: 0; + data: string; + start: number; + end: number; +} + +export interface Tag { + type: 1; + tagName: string; + attributes: Array<Attribute>; + children: Array<Tag | Text>; + void: boolean; + start: number; + end: number; +} + +interface Document { + children: Array<Tag | Text>; +} + +function stateChar(state: State) { + return state.code[state.index]; +} + +function stateNext(state: State) { + state.index++; + return stateChar(state); +} + +function stateRewind(state: State) { + state.index--; + return stateChar(state); +} + +function stateInBounds(state: State) { + return state.index < state.code.length; +} + +function createState(code: string, visitor: Visitor): State { + return { + code, + index: 0, + visitor + }; +} + +function* _stringify(tag: Tag): Generator<string, void, unknown> { + yield '<'; + yield tag.tagName; + for(let attr of tag.attributes) { + yield ' '; + yield `"${attr.name}"`; + if(!attr.boolean) { + yield '='; + yield `"${attr.value}"`; + } + } + if(!tag.void) { + for(let child of tag.children) { + if(child.type === 0) { + yield child.data; + } else { + yield * _stringify(child); + } + } + } +} + +function stringify(tag: Tag) { + let out = ''; + for(let chunk of _stringify(tag)) { + out += chunk; + } + return out; +} + +function spliceSlice(str: string, index: number, count: number, add: string) { + // We cannot pass negative indexes directly to the 2nd slicing operation. + if (index < 0) { + index = str.length + index; + if (index < 0) { + index = 0; + } + } + + return str.slice(0, index) + (add || "") + str.slice(index + count); +} + +function replaceTag(state: State, tag: Tag) { + const origLen = tag.end - tag.start; + const html = stringify(tag); + const newLen = html.length; + const newCurIndex = tag.start + newLen; + + state.code = spliceSlice(state.code, tag.start, origLen, html); + state.index = newCurIndex; +} + +function consumeToken(state: State) { + do { + const c = stateNext(state); + + if(/\s/.test(c)) { + continue; + } + + if(c === '<') { + return TAG_START; + } + + if(c === '>') { + return TAG_END; + } + + if(c === '/') { + return END_TAG_START; + } + + if(/[a-zA-Z]/.test(c)) { + return CHARS; + } + + return UNKNOWN; + } while(stateInBounds(state)); + + return EOF; +} + +function consumeText(state: State): Text { + let start = state.index; + let data = ''; + let c = stateNext(state); + while(stateInBounds(state) && c !== '<') { + data += c; + c = stateNext(state); + } + + return { + type: 0, + data, + start, + end: state.index - 1 + }; +} + +function consumeTagName(state: State): string { + let name = ''; + let token = consumeToken(state); + while(token === CHARS) { + name += stateChar(state); + token = consumeToken(state); + } + return name.toLowerCase(); +} + +function consumeAttribute(state: State): Attribute { + let start = state.index; + let name = '', token; + do { + name += stateChar(state).toLowerCase(); + token = consumeToken(state); + } while(token === CHARS); + + if(token !== EQ) { + stateRewind(state); + return { + name, + boolean: true, + start, + end: state.index - 1 + }; + } + + let value = ''; + do { + value += stateChar(state).toLowerCase(); + token = consumeToken(state); + } while(token === CHARS); + + return { + name, + value, + boolean: false, + start, + end: state.index - 1 + }; +} + +function consumeChildren(state: State): Array<Tag | Text> { + const children: Array<Tag | Text> = []; + + childLoop: while(stateInBounds(state)) { + const token = consumeToken(state); + switch(token) { + case TAG_START: { + const next = consumeToken(state); + if(next === END_TAG_START) { + consumeTagName(state); + consumeToken(state); // > + break childLoop; + } else { + stateRewind(state); + consumeTag(state); + } + break; + } + case CHARS: { + children.push(consumeText(state)); + break; + } + default: { + break; + } + } + } + + return children; +} + +function consumeTag(state: State): Tag { + const start = state.index - 1; + const tagName = consumeTagName(state); + const attributes: Array<Attribute> = []; + + let token = consumeToken(state); + + // Collect attributes + attrLoop: while(token !== TAG_END) { + switch(token) { + case CHARS: { + attributes.push(consumeAttribute(state)); + break; + } + default: { + break attrLoop; + } + } + + token = consumeToken(state); + } + + const children: Array<Tag | Text> = consumeChildren(state); + + const node: Tag = { + type: 1, + tagName, + attributes, + children, + void: voidTags.has(tagName), + start, + end: state.index - 1 + }; + + const replacement = state.visitor(node); + if(replacement !== node) { + replaceTag(state, node); + } + + return node; +} + +function consumeDocument(state: State): Document { + const children: Array<Tag | Text> = consumeChildren(state); + + return { + children + }; +} + +export function preparse(code: string, visitor: Visitor) { + const state = createState(code, visitor); + consumeDocument(state); +}
\ No newline at end of file |