mirror of
https://github.com/oven-sh/bun
synced 2026-02-02 15:08:46 +00:00
139 lines
3.7 KiB
TypeScript
139 lines
3.7 KiB
TypeScript
import crypto from "crypto";
|
|
|
|
// Types to mirror Zig's structures
|
|
interface Context<Elem> {
|
|
get(codepoint: number): Promise<Elem> | Elem;
|
|
eql(a: Elem, b: Elem): boolean;
|
|
}
|
|
|
|
interface Tables<Elem> {
|
|
stage1: number[];
|
|
stage2: number[];
|
|
stage3: Elem[];
|
|
}
|
|
|
|
class Generator<Elem> {
|
|
private static readonly BLOCK_SIZE = 256;
|
|
private readonly ctx: Context<Elem>;
|
|
private readonly blockMap = new Map<string, number>();
|
|
|
|
constructor(ctx: Context<Elem>) {
|
|
this.ctx = ctx;
|
|
}
|
|
|
|
private hashBlock(block: number[]): string {
|
|
const hash = crypto.createHash("sha256");
|
|
hash.update(Buffer.from(new Uint16Array(block).buffer));
|
|
return hash.digest("hex");
|
|
}
|
|
|
|
async generate(): Promise<Tables<Elem>> {
|
|
const stage1: number[] = [];
|
|
const stage2: number[] = [];
|
|
const stage3: Elem[] = [];
|
|
|
|
let block = new Array(Generator.BLOCK_SIZE).fill(0);
|
|
let blockLen = 0;
|
|
|
|
// Maximum Unicode codepoint is 0x10FFFF
|
|
for (let cp = 0; cp <= 0x10ffff; cp++) {
|
|
// Get the mapping for this codepoint
|
|
const elem = await this.ctx.get(cp);
|
|
|
|
// Find or add the element in stage3
|
|
let blockIdx = stage3.findIndex(item => this.ctx.eql(item, elem));
|
|
if (blockIdx === -1) {
|
|
blockIdx = stage3.length;
|
|
stage3.push(elem);
|
|
}
|
|
|
|
if (blockIdx > 0xffff) {
|
|
throw new Error("Block index too large");
|
|
}
|
|
|
|
// Add to current block
|
|
block[blockLen] = blockIdx;
|
|
blockLen++;
|
|
|
|
// Check if we need to finalize this block
|
|
if (blockLen < Generator.BLOCK_SIZE && cp !== 0x10ffff) {
|
|
continue;
|
|
}
|
|
|
|
// Fill remaining block space with zeros if needed
|
|
if (blockLen < Generator.BLOCK_SIZE) {
|
|
block.fill(0, blockLen);
|
|
}
|
|
|
|
// Get or create stage2 index for this block
|
|
const blockHash = this.hashBlock(block);
|
|
let stage2Idx = this.blockMap.get(blockHash);
|
|
|
|
if (stage2Idx === undefined) {
|
|
stage2Idx = stage2.length;
|
|
this.blockMap.set(blockHash, stage2Idx);
|
|
stage2.push(...block.slice(0, blockLen));
|
|
}
|
|
|
|
if (stage2Idx > 0xffff) {
|
|
throw new Error("Stage2 index too large");
|
|
}
|
|
|
|
// Add mapping to stage1
|
|
stage1.push(stage2Idx);
|
|
|
|
// Reset block
|
|
block = new Array(Generator.BLOCK_SIZE).fill(0);
|
|
blockLen = 0;
|
|
}
|
|
|
|
return { stage1, stage2, stage3 };
|
|
}
|
|
|
|
// Generates Zig code for the lookup tables
|
|
static writeZig<Elem>(tableName: string, tables: Tables<Elem>, elemToString: (elem: Elem) => string): string {
|
|
let output = `/// Auto-generated. Do not edit.\n`;
|
|
output += `fn ${tableName}(comptime Elem: type) type {\n`;
|
|
output += " return struct {\n";
|
|
|
|
// Stage 1
|
|
output += `pub const stage1: [${tables.stage1.length}]u16 = .{`;
|
|
output += tables.stage1.join(",");
|
|
output += "};\n\n";
|
|
|
|
// Stage 2
|
|
output += `pub const stage2: [${tables.stage2.length}]u8 = .{`;
|
|
output += tables.stage2.join(",");
|
|
output += "};\n\n";
|
|
|
|
// Stage 3
|
|
output += `pub const stage3: [${tables.stage3.length}]Elem = .{`;
|
|
output += tables.stage3.map(elemToString).join(",");
|
|
output += "};\n";
|
|
|
|
output += " };\n}\n";
|
|
return output;
|
|
}
|
|
}
|
|
|
|
// Example usage:
|
|
async function example() {
|
|
// Example context that maps codepoints to their category
|
|
const ctx: Context<string> = {
|
|
get: async (cp: number) => {
|
|
// This would normally look up the actual Unicode category
|
|
return "Lu";
|
|
},
|
|
eql: (a: string, b: string) => a === b,
|
|
};
|
|
|
|
const generator = new Generator(ctx);
|
|
const tables = await generator.generate();
|
|
|
|
// Generate Zig code
|
|
const zigCode = Generator.writeZig(tables, (elem: string) => `"${elem}"`);
|
|
console.log(zigCode);
|
|
}
|
|
|
|
export { Generator, type Context, type Tables };
|