diff options
Diffstat (limited to 'src/compiler/parse/utils/html.ts')
| -rw-r--r-- | src/compiler/parse/utils/html.ts | 143 | 
1 files changed, 0 insertions, 143 deletions
| diff --git a/src/compiler/parse/utils/html.ts b/src/compiler/parse/utils/html.ts deleted file mode 100644 index 3b406c9cc..000000000 --- a/src/compiler/parse/utils/html.ts +++ /dev/null @@ -1,143 +0,0 @@ -// @ts-nocheck - -import entities from './entities.js'; - -const windows_1252 = [ -  8364, -  129, -  8218, -  402, -  8222, -  8230, -  8224, -  8225, -  710, -  8240, -  352, -  8249, -  338, -  141, -  381, -  143, -  144, -  8216, -  8217, -  8220, -  8221, -  8226, -  8211, -  8212, -  732, -  8482, -  353, -  8250, -  339, -  157, -  382, -  376, -]; - -const entity_pattern = new RegExp(`&(#?(?:x[\\w\\d]+|\\d+|${Object.keys(entities).join('|')}))(?:;|\\b)`, 'g'); - -export function decode_character_references(html: string) { -  return html.replace(entity_pattern, (match, entity) => { -    let code; - -    // Handle named entities -    if (entity[0] !== '#') { -      code = entities[entity]; -    } else if (entity[1] === 'x') { -      code = parseInt(entity.substring(2), 16); -    } else { -      code = parseInt(entity.substring(1), 10); -    } - -    if (!code) { -      return match; -    } - -    return String.fromCodePoint(validate_code(code)); -  }); -} - -const NUL = 0; - -// some code points are verboten. If we were inserting HTML, the browser would replace the illegal -// code points with alternatives in some cases - since we're bypassing that mechanism, we need -// to replace them ourselves -// -// Source: http://en.wikipedia.org/wiki/Character_encodings_in_HTML#Illegal_characters -function validate_code(code: number) { -  // line feed becomes generic whitespace -  if (code === 10) { -    return 32; -  } - -  // ASCII range. (Why someone would use HTML entities for ASCII characters I don't know, but...) -  if (code < 128) { -    return code; -  } - -  // code points 128-159 are dealt with leniently by browsers, but they're incorrect. We need -  // to correct the mistake or we'll end up with missing € signs and so on -  if (code <= 159) { -    return windows_1252[code - 128]; -  } - -  // basic multilingual plane -  if (code < 55296) { -    return code; -  } - -  // UTF-16 surrogate halves -  if (code <= 57343) { -    return NUL; -  } - -  // rest of the basic multilingual plane -  if (code <= 65535) { -    return code; -  } - -  // supplementary multilingual plane 0x10000 - 0x1ffff -  if (code >= 65536 && code <= 131071) { -    return code; -  } - -  // supplementary ideographic plane 0x20000 - 0x2ffff -  if (code >= 131072 && code <= 196607) { -    return code; -  } - -  return NUL; -} - -// based on http://developers.whatwg.org/syntax.html#syntax-tag-omission -const disallowed_contents = new Map([ -  ['li', new Set(['li'])], -  ['dt', new Set(['dt', 'dd'])], -  ['dd', new Set(['dt', 'dd'])], -  ['p', new Set('address article aside blockquote div dl fieldset footer form h1 h2 h3 h4 h5 h6 header hgroup hr main menu nav ol p pre section table ul'.split(' '))], -  ['rt', new Set(['rt', 'rp'])], -  ['rp', new Set(['rt', 'rp'])], -  ['optgroup', new Set(['optgroup'])], -  ['option', new Set(['option', 'optgroup'])], -  ['thead', new Set(['tbody', 'tfoot'])], -  ['tbody', new Set(['tbody', 'tfoot'])], -  ['tfoot', new Set(['tbody'])], -  ['tr', new Set(['tr', 'tbody'])], -  ['td', new Set(['td', 'th', 'tr'])], -  ['th', new Set(['td', 'th', 'tr'])], -]); - -// can this be a child of the parent element, or does it implicitly -// close it, like `<li>one<li>two`? -export function closing_tag_omitted(current: string, next?: string) { -  if (disallowed_contents.has(current)) { -    if (!next || disallowed_contents.get(current).has(next)) { -      return true; -    } -  } - -  return false; -} | 
