1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
// Thank you @evanw for this code!!!
const fs = require("fs");
const path = require("path");
// ES5 reference: https://es5.github.io/
//
// A conforming implementation of this International standard shall interpret
// characters in conformance with the Unicode Standard, Version 3.0 or later
// and ISO/IEC 10646-1 with either UCS-2 or UTF-16 as the adopted encoding
// form, implementation level 3. If the adopted ISO/IEC 10646-1 subset is not
// otherwise specified, it is presumed to be the BMP subset, collection 300.
//
// UnicodeLetter: any character in the Unicode categories “Uppercase letter (Lu)”,
// “Lowercase letter (Ll)”, “Titlecase letter (Lt)”, “Modifier letter (Lm)”,
// “Other letter (Lo)”, or “Letter number (Nl)”.
const idStartES5 = []
.concat(
require("@unicode/unicode-3.0.0/General_Category/Uppercase_Letter/code-points"),
require("@unicode/unicode-3.0.0/General_Category/Lowercase_Letter/code-points"),
require("@unicode/unicode-3.0.0/General_Category/Titlecase_Letter/code-points"),
require("@unicode/unicode-3.0.0/General_Category/Modifier_Letter/code-points"),
require("@unicode/unicode-3.0.0/General_Category/Other_Letter/code-points")
// The "letter number" category is not included because old versions of Safari
// had a bug where they didn't include it. This means it does not match ES5.
// We need to make sure we escape these characters so Safari can read them.
// See https://github.com/evanw/esbuild/issues/1349 for more information.
// require('@unicode/unicode-3.0.0/General_Category/Letter_Number/code-points'),
)
.sort((a, b) => a - b);
// UnicodeCombiningMark: any character in the Unicode categories “Non-spacing mark (Mn)”
// or “Combining spacing mark (Mc)”
// UnicodeDigit: any character in the Unicode category “Decimal number (Nd)”
// UnicodeConnectorPunctuation: any character in the Unicode category “Connector punctuation (Pc)”
const idContinueES5 = idStartES5
.concat(
require("@unicode/unicode-3.0.0/General_Category/Nonspacing_Mark/code-points"),
require("@unicode/unicode-3.0.0/General_Category/Spacing_Mark/code-points"),
require("@unicode/unicode-3.0.0/General_Category/Decimal_Number/code-points"),
require("@unicode/unicode-3.0.0/General_Category/Connector_Punctuation/code-points")
)
.sort((a, b) => a - b);
// ESNext reference: https://tc39.es/ecma262/
//
// A conforming implementation of ECMAScript must interpret source text input
// in conformance with the Unicode Standard, Version 5.1.0 or later and ISO/IEC
// 10646. If the adopted ISO/IEC 10646-1 subset is not otherwise specified, it
// is presumed to be the Unicode set, collection 10646.
//
// UnicodeIDStart: any Unicode code point with the Unicode property “ID_Start”
const idStartESNext = require("@unicode/unicode-13.0.0/Binary_Property/ID_Start/code-points");
const idStartESNextSet = new Set(idStartESNext);
// UnicodeIDContinue: any Unicode code point with the Unicode property “ID_Continue”
const idContinueESNext = require("@unicode/unicode-13.0.0/Binary_Property/ID_Continue/code-points");
const idContinueESNextSet = new Set(idContinueESNext);
// These identifiers are valid in both ES5 and ES6+ (i.e. an intersection of both)
const idStartES5AndESNext = idStartES5.filter((n) => idStartESNextSet.has(n));
const idContinueES5AndESNext = idContinueES5.filter((n) =>
idContinueESNextSet.has(n)
);
// These identifiers are valid in either ES5 or ES6+ (i.e. a union of both)
const idStartES5OrESNext = [...new Set(idStartES5.concat(idStartESNext))].sort(
(a, b) => a - b
);
const idContinueES5OrESNext = [
...new Set(idContinueES5.concat(idContinueESNext)),
].sort((a, b) => a - b);
function generateRangeTable(codePoints) {
let lines = [];
let index = 0;
let latinOffset = 0;
while (latinOffset < codePoints.length && codePoints[latinOffset] <= 0xff) {
latinOffset++;
}
lines.push(`RangeTable.init(`, ` ${latinOffset},`, ` &[_]R16Range{`);
// 16-bit code points
while (index < codePoints.length && codePoints[index] < 0x1000) {
let start = codePoints[index];
index++;
while (
index < codePoints.length &&
codePoints[index] < 0x1000 &&
codePoints[index] === codePoints[index - 1] + 1
) {
index++;
}
let end = codePoints[index - 1];
lines.push(` .{0x${start.toString(16)}, 0x${end.toString(16)}},`);
}
lines.push(` },`, `&[_]R32Range{`);
// 32-bit code points
while (index < codePoints.length) {
let start = codePoints[index];
index++;
while (
index < codePoints.length &&
codePoints[index] === codePoints[index - 1] + 1
) {
index++;
}
let end = codePoints[index - 1];
lines.push(` .{0x${start.toString(16)}, 0x${end.toString(16)}},`);
}
lines.push(` },`, `);`);
return lines.join("\n");
}
fs.writeFileSync(
path.join(__dirname, "..", "src", "js_lexer", "unicode.zig"),
`// This file was automatically generated by ${path.basename(
__filename
)}. Do not edit.
const RangeTable = @import("./range_table.zig");
// ES5 || ESNext
pub const id_start = ${generateRangeTable(idStartES5OrESNext)}
// ES5 || ESNext
pub const id_continue = ${generateRangeTable(idContinueES5OrESNext)}
pub const printable_id_start = ${generateRangeTable(idStartESNext)}
pub const printable_id_continue = ${generateRangeTable(idContinueESNext)}
`
);
|