summaryrefslogtreecommitdiff
path: root/src/parser/parse/utils/html.ts
diff options
context:
space:
mode:
authorGravatar Nate Moore <natemoo-re@users.noreply.github.com> 2021-04-30 16:33:35 -0500
committerGravatar GitHub <noreply@github.com> 2021-04-30 16:33:35 -0500
commit4df1347156cf2632ea2f3475d3a5f8f08d197cc3 (patch)
tree9d50de89dfe62827c32a8a4046120af4ab61dc0c /src/parser/parse/utils/html.ts
parent1d498facc8f78a3ffbfecd05cc6ecd45e8a4a1ae (diff)
downloadastro-4df1347156cf2632ea2f3475d3a5f8f08d197cc3.tar.gz
astro-4df1347156cf2632ea2f3475d3a5f8f08d197cc3.tar.zst
astro-4df1347156cf2632ea2f3475d3a5f8f08d197cc3.zip
Migrate to `yarn` monorepo (#157)
* chore: use monorepo * chore: scaffold astro-scripts * chore: move tests inside packages/astro * chore: refactor tests, add scripts * chore: move parser to own module * chore: move runtime to packages/astro * fix: move parser to own package * test: fix prettier-plugin-astro tests * fix: tests * chore: update package-lock * chore: add changesets * fix: cleanup examples * fix: starter example * chore: update changeset config * chore: update changeset config * chore: setup changeset release workflow * chore: bump lockfiles * chore: prism => astro-prism * fix: tsc --emitDeclarationOnly * chore: final cleanup, switch to yarn * chore: add lerna * chore: update workflows to yarn * chore: update workflows * chore: remove lint workflow * chore: add astro-dev script * chore: add symlinked README
Diffstat (limited to 'src/parser/parse/utils/html.ts')
-rw-r--r--src/parser/parse/utils/html.ts143
1 files changed, 0 insertions, 143 deletions
diff --git a/src/parser/parse/utils/html.ts b/src/parser/parse/utils/html.ts
deleted file mode 100644
index 3b406c9cc..000000000
--- a/src/parser/parse/utils/html.ts
+++ /dev/null
@@ -1,143 +0,0 @@
-// @ts-nocheck
-
-import entities from './entities.js';
-
-const windows_1252 = [
- 8364,
- 129,
- 8218,
- 402,
- 8222,
- 8230,
- 8224,
- 8225,
- 710,
- 8240,
- 352,
- 8249,
- 338,
- 141,
- 381,
- 143,
- 144,
- 8216,
- 8217,
- 8220,
- 8221,
- 8226,
- 8211,
- 8212,
- 732,
- 8482,
- 353,
- 8250,
- 339,
- 157,
- 382,
- 376,
-];
-
-const entity_pattern = new RegExp(`&(#?(?:x[\\w\\d]+|\\d+|${Object.keys(entities).join('|')}))(?:;|\\b)`, 'g');
-
-export function decode_character_references(html: string) {
- return html.replace(entity_pattern, (match, entity) => {
- let code;
-
- // Handle named entities
- if (entity[0] !== '#') {
- code = entities[entity];
- } else if (entity[1] === 'x') {
- code = parseInt(entity.substring(2), 16);
- } else {
- code = parseInt(entity.substring(1), 10);
- }
-
- if (!code) {
- return match;
- }
-
- return String.fromCodePoint(validate_code(code));
- });
-}
-
-const NUL = 0;
-
-// some code points are verboten. If we were inserting HTML, the browser would replace the illegal
-// code points with alternatives in some cases - since we're bypassing that mechanism, we need
-// to replace them ourselves
-//
-// Source: http://en.wikipedia.org/wiki/Character_encodings_in_HTML#Illegal_characters
-function validate_code(code: number) {
- // line feed becomes generic whitespace
- if (code === 10) {
- return 32;
- }
-
- // ASCII range. (Why someone would use HTML entities for ASCII characters I don't know, but...)
- if (code < 128) {
- return code;
- }
-
- // code points 128-159 are dealt with leniently by browsers, but they're incorrect. We need
- // to correct the mistake or we'll end up with missing € signs and so on
- if (code <= 159) {
- return windows_1252[code - 128];
- }
-
- // basic multilingual plane
- if (code < 55296) {
- return code;
- }
-
- // UTF-16 surrogate halves
- if (code <= 57343) {
- return NUL;
- }
-
- // rest of the basic multilingual plane
- if (code <= 65535) {
- return code;
- }
-
- // supplementary multilingual plane 0x10000 - 0x1ffff
- if (code >= 65536 && code <= 131071) {
- return code;
- }
-
- // supplementary ideographic plane 0x20000 - 0x2ffff
- if (code >= 131072 && code <= 196607) {
- return code;
- }
-
- return NUL;
-}
-
-// based on http://developers.whatwg.org/syntax.html#syntax-tag-omission
-const disallowed_contents = new Map([
- ['li', new Set(['li'])],
- ['dt', new Set(['dt', 'dd'])],
- ['dd', new Set(['dt', 'dd'])],
- ['p', new Set('address article aside blockquote div dl fieldset footer form h1 h2 h3 h4 h5 h6 header hgroup hr main menu nav ol p pre section table ul'.split(' '))],
- ['rt', new Set(['rt', 'rp'])],
- ['rp', new Set(['rt', 'rp'])],
- ['optgroup', new Set(['optgroup'])],
- ['option', new Set(['option', 'optgroup'])],
- ['thead', new Set(['tbody', 'tfoot'])],
- ['tbody', new Set(['tbody', 'tfoot'])],
- ['tfoot', new Set(['tbody'])],
- ['tr', new Set(['tr', 'tbody'])],
- ['td', new Set(['td', 'th', 'tr'])],
- ['th', new Set(['td', 'th', 'tr'])],
-]);
-
-// can this be a child of the parent element, or does it implicitly
-// close it, like `<li>one<li>two`?
-export function closing_tag_omitted(current: string, next?: string) {
- if (disallowed_contents.has(current)) {
- if (!next || disallowed_contents.get(current).has(next)) {
- return true;
- }
- }
-
- return false;
-}