mirror of
https://github.com/oven-sh/bun
synced 2026-02-02 15:08:46 +00:00
* Add a zig fmt action * add failing file * Setup prettier better * Update prettier-fmt.yml * Fail on error * Update prettier-fmt.yml * boop * boop2 * tar.gz * Update zig-fmt.yml * Update zig-fmt.yml * Update zig-fmt.yml * Update zig-fmt.yml * Update zig-fmt.yml * boop * Update prettier-fmt.yml * tag * newlines * multiline * fixup * Update zig-fmt.yml * update it * fixup * both * w * Update prettier-fmt.yml * prettier all the things * Update package.json * zig fmt * ❌ ✅ * bump * . * quotes * fix prettier ignore * once more * Update prettier-fmt.yml * Update fallback.ts * consistentcy --------- Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
173 lines
6.6 KiB
JavaScript
173 lines
6.6 KiB
JavaScript
// Thank you @evanw for this code!!!
|
|
const fs = require("fs");
|
|
const path = require("path");
|
|
|
|
// ES5 reference: https://es5.github.io/
|
|
//
|
|
// A conforming implementation of this International standard shall interpret
|
|
// characters in conformance with the Unicode Standard, Version 3.0 or later
|
|
// and ISO/IEC 10646-1 with either UCS-2 or UTF-16 as the adopted encoding
|
|
// form, implementation level 3. If the adopted ISO/IEC 10646-1 subset is not
|
|
// otherwise specified, it is presumed to be the BMP subset, collection 300.
|
|
//
|
|
// UnicodeLetter: any character in the Unicode categories “Uppercase letter (Lu)”,
|
|
// “Lowercase letter (Ll)”, “Titlecase letter (Lt)”, “Modifier letter (Lm)”,
|
|
// “Other letter (Lo)”, or “Letter number (Nl)”.
|
|
const idStartES5 = []
|
|
.concat(
|
|
require("@unicode/unicode-3.0.0/General_Category/Uppercase_Letter/code-points"),
|
|
require("@unicode/unicode-3.0.0/General_Category/Lowercase_Letter/code-points"),
|
|
require("@unicode/unicode-3.0.0/General_Category/Titlecase_Letter/code-points"),
|
|
require("@unicode/unicode-3.0.0/General_Category/Modifier_Letter/code-points"),
|
|
require("@unicode/unicode-3.0.0/General_Category/Other_Letter/code-points"),
|
|
|
|
// The "letter number" category is not included because old versions of Safari
|
|
// had a bug where they didn't include it. This means it does not match ES5.
|
|
// We need to make sure we escape these characters so Safari can read them.
|
|
// See https://github.com/evanw/esbuild/issues/1349 for more information.
|
|
// require('@unicode/unicode-3.0.0/General_Category/Letter_Number/code-points'),
|
|
)
|
|
.sort((a, b) => a - b);
|
|
|
|
// UnicodeCombiningMark: any character in the Unicode categories “Non-spacing mark (Mn)”
|
|
// or “Combining spacing mark (Mc)”
|
|
// UnicodeDigit: any character in the Unicode category “Decimal number (Nd)”
|
|
// UnicodeConnectorPunctuation: any character in the Unicode category “Connector punctuation (Pc)”
|
|
const idContinueES5 = idStartES5
|
|
.concat(
|
|
require("@unicode/unicode-3.0.0/General_Category/Nonspacing_Mark/code-points"),
|
|
require("@unicode/unicode-3.0.0/General_Category/Spacing_Mark/code-points"),
|
|
require("@unicode/unicode-3.0.0/General_Category/Decimal_Number/code-points"),
|
|
require("@unicode/unicode-3.0.0/General_Category/Connector_Punctuation/code-points"),
|
|
)
|
|
.sort((a, b) => a - b);
|
|
|
|
// ESNext reference: https://tc39.es/ecma262/
|
|
//
|
|
// A conforming implementation of ECMAScript must interpret source text input
|
|
// in conformance with the Unicode Standard, Version 5.1.0 or later and ISO/IEC
|
|
// 10646. If the adopted ISO/IEC 10646-1 subset is not otherwise specified, it
|
|
// is presumed to be the Unicode set, collection 10646.
|
|
//
|
|
// UnicodeIDStart: any Unicode code point with the Unicode property “ID_Start”
|
|
const idStartESNext = require("@unicode/unicode-13.0.0/Binary_Property/ID_Start/code-points");
|
|
const idStartESNextSet = new Set(idStartESNext);
|
|
|
|
// UnicodeIDContinue: any Unicode code point with the Unicode property “ID_Continue”
|
|
const idContinueESNext = require("@unicode/unicode-13.0.0/Binary_Property/ID_Continue/code-points");
|
|
const idContinueESNextSet = new Set(idContinueESNext);
|
|
|
|
// These identifiers are valid in both ES5 and ES6+ (i.e. an intersection of both)
|
|
const idStartES5AndESNext = idStartES5.filter(n => idStartESNextSet.has(n));
|
|
const idContinueES5AndESNext = idContinueES5.filter(n => idContinueESNextSet.has(n));
|
|
|
|
// These identifiers are valid in either ES5 or ES6+ (i.e. a union of both)
|
|
const idStartES5OrESNext = [...new Set(idStartES5.concat(idStartESNext))].sort((a, b) => a - b);
|
|
const idContinueES5OrESNext = [...new Set(idContinueES5.concat(idContinueESNext))].sort((a, b) => a - b);
|
|
|
|
function generateRangeTable(codePoints) {
|
|
let lines = [];
|
|
let index = 0;
|
|
let latinOffset = 0;
|
|
|
|
while (latinOffset < codePoints.length && codePoints[latinOffset] <= 0xff) {
|
|
latinOffset++;
|
|
}
|
|
|
|
lines.push(`RangeTable.init(`, ` ${latinOffset},`, ` &[_]R16Range{`);
|
|
|
|
// 16-bit code points
|
|
while (index < codePoints.length && codePoints[index] < 0x1000) {
|
|
let start = codePoints[index];
|
|
index++;
|
|
while (index < codePoints.length && codePoints[index] < 0x1000 && codePoints[index] === codePoints[index - 1] + 1) {
|
|
index++;
|
|
}
|
|
let end = codePoints[index - 1];
|
|
lines.push(` .{0x${start.toString(16)}, 0x${end.toString(16)}},`);
|
|
}
|
|
|
|
lines.push(` },`, `&[_]R32Range{`);
|
|
|
|
// 32-bit code points
|
|
while (index < codePoints.length) {
|
|
let start = codePoints[index];
|
|
index++;
|
|
while (index < codePoints.length && codePoints[index] === codePoints[index - 1] + 1) {
|
|
index++;
|
|
}
|
|
let end = codePoints[index - 1];
|
|
lines.push(` .{0x${start.toString(16)}, 0x${end.toString(16)}},`);
|
|
}
|
|
|
|
lines.push(` },`, `);`);
|
|
return lines.join("\n");
|
|
}
|
|
|
|
function generateBigSwitchStatement(codePoints) {
|
|
let lines = [];
|
|
let index = 0;
|
|
let latinOffset = 0;
|
|
|
|
while (latinOffset < codePoints.length && codePoints[latinOffset] <= 0xff) {
|
|
latinOffset++;
|
|
}
|
|
|
|
lines.push(`return switch(codepoint) {`);
|
|
|
|
// 16-bit code points
|
|
while (index < codePoints.length && codePoints[index] < 0x1000) {
|
|
let start = codePoints[index];
|
|
index++;
|
|
while (index < codePoints.length && codePoints[index] < 0x1000 && codePoints[index] === codePoints[index - 1] + 1) {
|
|
index++;
|
|
}
|
|
let end = codePoints[index - 1];
|
|
lines.push(`0x${start.toString(16)}...0x${end.toString(16)},`);
|
|
}
|
|
|
|
// 32-bit code points
|
|
while (index < codePoints.length) {
|
|
let start = codePoints[index];
|
|
index++;
|
|
while (index < codePoints.length && codePoints[index] === codePoints[index - 1] + 1) {
|
|
index++;
|
|
}
|
|
let end = codePoints[index - 1];
|
|
lines.push(` 0x${start.toString(16)}...0x${end.toString(16)},`);
|
|
}
|
|
|
|
lines.push(` => true,
|
|
else => false
|
|
};`);
|
|
return lines.join("\n");
|
|
}
|
|
|
|
fs.writeFileSync(
|
|
path.join(__dirname, "..", "src", "js_lexer", "unicode.zig"),
|
|
`// This file was automatically generated by ${path.basename(__filename)}. Do not edit.
|
|
|
|
const RangeTable = @import("./range_table.zig");
|
|
|
|
|
|
// ES5 || ESNext
|
|
pub const id_start = ${generateRangeTable(idStartES5OrESNext)}
|
|
|
|
// ES5 || ESNext
|
|
pub const id_continue = ${generateRangeTable(idContinueES5OrESNext)}
|
|
|
|
pub const printable_id_start = ${generateRangeTable(idStartESNext)}
|
|
pub const printable_id_continue = ${generateRangeTable(idContinueESNext)}
|
|
|
|
pub fn isIdentifierStart(comptime Codepoint: type, codepoint: Codepoint) bool{
|
|
${generateBigSwitchStatement(idStartES5OrESNext)}
|
|
}
|
|
|
|
pub fn isIdentifierContinue(comptime Codepoint: type, codepoint: Codepoint) bool{
|
|
${generateBigSwitchStatement(idContinueES5OrESNext)}
|
|
}
|
|
|
|
|
|
`,
|
|
);
|