Files
bun.sh/src/codegen/create_hash_table
Jarred Sumner 7a4e0158d6 Lots of stuff (#7027)
* Use debug mode by default

* Enable build with assertions enabled

* Update cli.zig

* Update bun-linux-build.yml

* Fixes

* Fix `ASSERT_ENABLED`

* try this

* Update Dockerfile

* mimalloc debug

* Update CMakeLists.txt

* `Bun.deepMatch` - fix assertion failures

cc @dylan-conway, looks like we need to use `putDirectMayBeIndex` and check for `isCell` more carefully.

* Object.create support in code generator and callbacks wrapper

* Remove unused file

* zig upgrade

* zls

* Fix various errors

* Support `BuiltinAccessor` in create_hash_table script

* Fix assertion failure in `process.mainModule`

* Fix assertion failure in `onerror`

* Fix assertion failure when creating a Worker

* Fix asssertion failure when loading lots of files in bun test

* Fix assertion failure when termating a `Worker`

* Add helper for converting BunString to a WTFString

* Fix assertion failure in notifyNeedTermination

* Add more debug logs in `bun test`

* Fix compiler warning in usockets

* Fix assertion failure with `Worker` termination (another)

* Fix assertion failure in `coerceToInt64`

* Fix assertion failure in `BroadcastChannel`

* Fix assertion failure in `Headers.prototype.getAll`

* Fixes #7067

* Add heap analyzer label for CommonJS modules

* Fix assertion failure in module.require && module.require.resolve

* Remove unused code

* Fix assertion failure in debugger

* Fix crash in debugger

* Fix assertion failures in bun:sqlite

* Bump zig

* Bump WebKit

* Fix assertion failure in JSPromise::reject && JSInternalPromise::reject

* Fix assertion failure in ReadableStream::cancel

* Fix assertion failure in AsyncContextFrame::create

* Fix assertion failure in bun:sqlite

* Fix assertion failure in mocks

* Fix assertion failure in ServerWebSocket.close

* Fix assertion failure in N-API with subclasses

* [napi] Make promises cheaper

* undo

* Don't check for exceptions in ObjectInitializationScope

* Add separate entry point for test runner that doesn't generate code

* Don't deref builtin code

* Fix preload test

* Fix assertion failure in memoryUsage()

* Fix preload test, part 2

* Ensure that the env map for a Worker is empty after it is used

* The pointer for the Arena allocator used in parsing should not change

* Terminate thread on exit

* Start to implement scriptExecutionStatus

* Update worker.test.ts

* Fix Dirent.name setter

* Update settings.json

* Fix assertion failure in node:http

* Use correct value for `JSFinalObject::maxInlineCapacity`

* JSFinalObject::maxInlineCapacity x2

* Don't strip when assertions are enabled

* Make `m_wasTerminated` atomic

* Preserve directives in the transpiler

cc @ctjlewis

* Workaround assertion failure in ServerWebSocket.sendBinary and ServerWebSocket.sendText

* windows

* Buffer lockfile serialization in-memory

* PR feedback

* PR feedback

* PR feedback

* Windows

* quotes

* Update CMakeLists.txt

* Update bun-linux-build.yml

* Update bun-linux-build.yml

* Move this code to BunString.cpp

* Update BunString.cpp

---------

Co-authored-by: Jarred Sumner <709451+Jarred-Sumner@users.noreply.github.com>
2023-11-13 22:10:09 -08:00

545 lines
17 KiB
Perl
Executable File

#! /usr/bin/env perl
#
# Static Hashtable Generator
#
# (c) 2000-2002 by Harri Porten <porten@kde.org> and
# David Faure <faure@kde.org>
# Modified (c) 2004 by Nikolas Zimmermann <wildfox@kde.org>
# Copyright (C) 2007-2023 Apple Inc. All rights reserved.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
use strict;
use warnings;
use Math::BigInt;
use Getopt::Long qw(:config pass_through);
my $file = shift @ARGV or die("Must provide source file as final argument.");
open(IN, $file) or die "No such file $file";
my @keys = ();
my @attrs = ();
my @values = ();
my @table = ();
my @links = ();
my $hasSetter = "false";
my $includeBuiltin = 0;
my $inside = 0;
my $name;
my $pefectHashSize;
my $compactSize;
my $compactHashSizeMask;
my $banner = 0;
my $mask64 = 2**64 - 1;
my $mask32 = 2**32 - 1;
sub calcPerfectHashSize($);
sub calcCompactHashSize($);
sub output();
sub jsc_ucfirst($);
sub hashValue($$);
while (<IN>) {
chomp;
s/^\s+//;
next if /^\#|^$/; # Comment or blank line. Do nothing.
if (/^\@begin/ && !$inside) {
if (/^\@begin\s*([:_\w]+)\s*\d*\s*$/) {
$inside = 1;
$name = $1;
} else {
print STDERR "WARNING: \@begin without table name, skipping $_\n";
}
} elsif (/^\@end\s*$/ && $inside) {
output();
@keys = ();
@attrs = ();
@values = ();
$includeBuiltin = 0;
$inside = 0;
} elsif (/^(\S+)\s*(\S+)\s*([\w\|]*)\s*(\w*)\s*(\w*)\s*$/ && $inside) {
my $key = $1;
my $val = $2;
my $att = $3;
my $param = $4;
my $intrinsic = $5;
push(@keys, $key);
push(@attrs, length($att) > 0 ? $att : "None");
if ($val eq "JSBuiltin") {
$includeBuiltin = 1;
}
if ($att =~ m/Function/) {
push(@values, { "type" => "PropertyAttribute::Function", "function" => $val, "params" => (length($param) ? $param : ""), "intrinsic" => (length($intrinsic) ? $intrinsic : "NoIntrinsic") });
#printf STDERR "WARNING: Number of arguments missing for $key/$val\n" if (length($param) == 0);
} elsif ($att =~ m/CellProperty/) {
my $property = $val;
push(@values, { "type" => "PropertyAttribute::CellProperty", "property" => $property });
} elsif ($att =~ m/ClassStructure/) {
my $property = $val;
push(@values, { "type" => "PropertyAttribute::ClassStructure", "property" => $property });
} elsif ($att =~ m/PropertyCallback/) {
my $cback = $val;
push(@values, { "type" => "PropertyAttribute::PropertyCallback", "cback" => $cback });
} elsif (length($att)) {
my $get = $val;
my $put = "0";
my $type = "PropertyAttribute::Property";
if ($att =~ m/Builtin/) {
$type = "PropertyAttribute::BuiltinAccessor";
}
if (!($att =~ m/ReadOnly/)) {
$put = "set" . jsc_ucfirst($val);
}
$hasSetter = "true";
push(@values, { "type" => $type, "get" => $get, "put" => $put });
} else {
push(@values, { "type" => "Lexer", "value" => $val });
}
} elsif ($inside) {
die "invalid data {" . $_ . "}";
}
}
die "missing closing \@end" if ($inside);
sub jsc_ucfirst($)
{
my ($value) = @_;
if ($value =~ /js/) {
$value =~ s/js/JS/;
return $value;
}
return ucfirst($value);
}
sub ceilingToPowerOf2
{
my ($pefectHashSize) = @_;
my $powerOf2 = 1;
while ($pefectHashSize > $powerOf2) {
$powerOf2 <<= 1;
}
return $powerOf2;
}
sub calcPerfectHashSize($)
{
my ($isMac) = @_;
tableSizeLoop:
for ($pefectHashSize = ceilingToPowerOf2(scalar @keys); ; $pefectHashSize += $pefectHashSize) {
my @table = ();
foreach my $key (@keys) {
my $h = hashValue($key, $isMac) % $pefectHashSize;
next tableSizeLoop if $table[$h];
$table[$h] = 1;
}
last;
}
}
sub leftShift($$) {
my ($value, $distance) = @_;
return (($value << $distance) & 0xFFFFFFFF);
}
sub calcCompactHashSize($)
{
my ($isMac) = @_;
my $compactHashSize = ceilingToPowerOf2(2 * @keys);
$compactHashSizeMask = $compactHashSize - 1;
$compactSize = $compactHashSize;
my $collisions = 0;
my $maxdepth = 0;
my $i = 0;
foreach my $key (@keys) {
my $depth = 0;
my $h = hashValue($key, $isMac) % $compactHashSize;
while (defined($table[$h])) {
if (defined($links[$h])) {
$h = $links[$h];
$depth++;
} else {
$collisions++;
$links[$h] = $compactSize;
$h = $compactSize;
$compactSize++;
}
}
$table[$h] = $i;
$i++;
$maxdepth = $depth if ( $depth > $maxdepth);
}
}
sub avalancheBits($) {
my ($value) = @_;
$value &= $mask32;
# Force "avalanching" of lower 32 bits
$value ^= leftShift($value, 3);
$value += ($value >> 5);
$value = ($value & $mask32);
$value ^= (leftShift($value, 2) & $mask32);
$value += ($value >> 15);
$value = $value & $mask32;
$value ^= (leftShift($value, 10) & $mask32);
return $value;
}
sub maskTop8BitsAndAvoidZero($) {
my ($value) = @_;
$value &= $mask32;
# Save 8 bits for StringImpl to use as flags.
$value &= 0xffffff;
# This avoids ever returning a hash code of 0, since that is used to
# signal "hash not computed yet". Setting the high bit maintains
# reasonable fidelity to a hash code of 0 because it is likely to yield
# exactly 0 when hash lookup masks out the high bits.
$value = (0x80000000 >> 8) if ($value == 0);
return $value;
}
# Paul Hsieh's SuperFastHash
# http://www.azillionmonkeys.com/qed/hash.html
sub superFastHash {
my @chars = @_;
# This hash is designed to work on 16-bit chunks at a time. But since the normal case
# (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
# were 16-bit chunks, which should give matching results
my $hash = 0x9e3779b9;
my $l = scalar @chars; #I wish this was in Ruby --- Maks
my $rem = $l & 1;
$l = $l >> 1;
my $s = 0;
# Main loop
for (; $l > 0; $l--) {
$hash += ord($chars[$s]);
my $tmp = leftShift(ord($chars[$s+1]), 11) ^ $hash;
$hash = (leftShift($hash, 16) & $mask32) ^ $tmp;
$s += 2;
$hash += $hash >> 11;
$hash &= $mask32;
}
# Handle end case
if ($rem != 0) {
$hash += ord($chars[$s]);
$hash ^= (leftShift($hash, 11) & $mask32);
$hash += $hash >> 17;
}
$hash = avalancheBits($hash);
return maskTop8BitsAndAvoidZero($hash);
}
sub uint64_add($$) {
my ($a, $b) = @_;
my $sum = $a + $b;
return $sum & $mask64;
}
sub uint64_multi($$) {
my ($a, $b) = @_;
my $product = $a * $b;
return $product & $mask64;
}
sub wymum($$) {
my ($A, $B) = @_;
my $ha = $A >> 32;
my $hb = $B >> 32;
my $la = $A & $mask32;
my $lb = $B & $mask32;
my $hi;
my $lo;
my $rh = uint64_multi($ha, $hb);
my $rm0 = uint64_multi($ha, $lb);
my $rm1 = uint64_multi($hb, $la);
my $rl = uint64_multi($la, $lb);
my $t = uint64_add($rl, ($rm0 << 32));
my $c = int($t < $rl);
$lo = uint64_add($t, ($rm1 << 32));
$c += int($lo < $t);
$hi = uint64_add($rh, uint64_add(($rm0 >> 32), uint64_add(($rm1 >> 32), $c)));
return ($lo, $hi);
};
sub wymix($$) {
my ($A, $B) = @_;
($A, $B) = wymum($A, $B);
return $A ^ $B;
}
sub convert32BitTo64Bit($) {
my ($v) = @_;
my ($mask1) = 281470681808895; # 0x0000_ffff_0000_ffff
$v = ($v | ($v << 16)) & $mask1;
my ($mask2) = 71777214294589695; # 0x00ff_00ff_00ff_00ff
return ($v | ($v << 8)) & $mask2;
}
sub convert16BitTo32Bit($) {
my ($v) = @_;
return ($v | ($v << 8)) & 0x00ff_00ff;
}
sub wyhash {
# https://github.com/wangyi-fudan/wyhash
my @chars = @_;
my $charCount = scalar @chars;
my $byteCount = $charCount << 1;
my $charIndex = 0;
my $seed = 0;
my @secret = ( 11562461410679940143, 16646288086500911323, 10285213230658275043, 6384245875588680899 );
my $move1 = (($byteCount >> 3) << 2) >> 1;
$seed ^= wymix($seed ^ $secret[0], $secret[1]);
my $a = 0;
my $b = 0;
local *c2i = sub {
my ($i) = @_;
return ord($chars[$i]);
};
local *wyr8 = sub {
my ($i) = @_;
my $v = c2i($i) | (c2i($i + 1) << 8) | (c2i($i + 2) << 16) | (c2i($i + 3) << 24);
return convert32BitTo64Bit($v);
};
local *wyr4 = sub {
my ($i) = @_;
my $v = c2i($i) | (c2i($i + 1) << 8);
return convert16BitTo32Bit($v);
};
local *wyr2 = sub {
my ($i) = @_;
return c2i($i) << 16;
};
if ($byteCount <= 16) {
if ($byteCount >= 4) {
$a = (wyr4($charIndex) << 32) | wyr4($charIndex + $move1);
$charIndex = $charIndex + $charCount - 2;
$b = (wyr4($charIndex) << 32) | wyr4($charIndex - $move1);
} elsif ($byteCount > 0) {
$a = wyr2($charIndex);
$b = 0;
} else {
$a = $b = 0;
}
} else {
my $i = $byteCount;
if ($i > 48) {
my $see1 = $seed;
my $see2 = $seed;
do {
$seed = wymix(wyr8($charIndex) ^ $secret[1], wyr8($charIndex + 4) ^ $seed);
$see1 = wymix(wyr8($charIndex + 8) ^ $secret[2], wyr8($charIndex + 12) ^ $see1);
$see2 = wymix(wyr8($charIndex + 16) ^ $secret[3], wyr8($charIndex + 20) ^ $see2);
$charIndex += 24;
$i -= 48;
} while ($i > 48);
$seed ^= $see1 ^ $see2;
}
while ($i > 16) {
$seed = wymix(wyr8($charIndex) ^ $secret[1], wyr8($charIndex + 4) ^ $seed);
$i -= 16;
$charIndex += 8;
}
my $move2 = $i >> 1;
$a = wyr8($charIndex + $move2 - 8);
$b = wyr8($charIndex + $move2 - 4);
}
$a ^= $secret[1];
$b ^= $seed;
($a, $b) = wymum($a, $b);
my $hash = wymix($a ^ $secret[0] ^ $byteCount, $b ^ $secret[1]) & $mask32;
return maskTop8BitsAndAvoidZero($hash);
}
sub hashValue($$) {
my ($string, $isMac) = @_;
my @chars = split(/ */, $string);
my $charCount = scalar @chars;
if ($isMac) {
if ($charCount <= 48) {
return superFastHash(@chars);
}
return wyhash(@chars);
} else {
return superFastHash(@chars);
}
}
sub output() {
if (!$banner) {
$banner = 1;
print "// Automatically generated from $file using $0. DO NOT EDIT!\n";
}
my $nameEntries = "${name}Values";
$nameEntries =~ s/:/_/g;
my $nameIndex = "${name}Index";
$nameIndex =~ s/:/_/g;
print "\n";
print "#include \"JSCBuiltins.h\"\n" if ($includeBuiltin);
print "#include \"Lookup.h\"\n";
print "\n";
print "namespace JSC {\n";
print "\n";
local *generateHashTableHelper = sub {
my ($isMac, $setToOldValues) = @_;
my $oldCompactSize = $compactSize;
my $oldCompactHashSizeMask = $compactHashSizeMask;
calcPerfectHashSize($isMac);
calcCompactHashSize($isMac);
my $hashTableString = "";
if ($compactSize != 0) {
$hashTableString .= "static const struct CompactHashIndex ${nameIndex}\[$compactSize\] = {\n";
for (my $i = 0; $i < $compactSize; $i++) {
my $T = -1;
if (defined($table[$i])) { $T = $table[$i]; }
my $L = -1;
if (defined($links[$i])) { $L = $links[$i]; }
$hashTableString .= " { $T, $L },\n";
}
} else {
# MSVC dislikes empty arrays.
$hashTableString .= "static const struct CompactHashIndex ${nameIndex}\[1\] = {\n";
$hashTableString .= " { 0, 0 }\n";
}
$hashTableString .= "};\n";
$hashTableString .= "\n";
my $packedSize = scalar @keys;
if ($packedSize != 0) {
$hashTableString .= "static const struct HashTableValue ${nameEntries}\[$packedSize\] = {\n";
} else {
# MSVC dislikes empty arrays.
$hashTableString .= "static const struct HashTableValue ${nameEntries}\[1\] = {\n";
$hashTableString .= " { { }, 0, NoIntrinsic, { HashTableValue::End } }\n";
}
my $i = 0;
foreach my $key (@keys) {
my $typeTag = "";
my $firstValue = "";
my $secondValue = "";
my $hasSecondValue = 1;
my $intrinsic = "NoIntrinsic";
if ($values[$i]{"type"} eq "PropertyAttribute::Function") {
$typeTag = "NativeFunction";
$firstValue = $values[$i]{"function"};
$secondValue = $values[$i]{"params"};
$intrinsic = $values[$i]{"intrinsic"};
} elsif ($values[$i]{"type"} eq "PropertyAttribute::BuiltinAccessor") {
$typeTag = "BuiltinAccessor";
$firstValue = $values[$i]{"get"};
$secondValue = $values[$i]{"put"};
} elsif ($values[$i]{"type"} eq "PropertyAttribute::Property") {
$typeTag = "GetterSetter";
$firstValue = $values[$i]{"get"};
$secondValue = $values[$i]{"put"};
} elsif ($values[$i]{"type"} eq "Lexer") {
$typeTag = "Lexer";
$firstValue = $values[$i]{"value"};
$hasSecondValue = 0;
} elsif ($values[$i]{"type"} eq "PropertyAttribute::CellProperty" || $values[$i]{"type"} eq "PropertyAttribute::ClassStructure") {
$typeTag = ($values[$i]{"type"} eq "PropertyAttribute::CellProperty") ? "LazyCellProperty" : "LazyClassStructure";
$values[$i]{"property"} =~ /\A([a-zA-Z0-9_]+)::(.*)\Z/ or die;
$firstValue = "OBJECT_OFFSETOF($1, $2)";
$hasSecondValue = 0;
} elsif ($values[$i]{"type"} eq "PropertyAttribute::PropertyCallback") {
$typeTag = "LazyProperty";
$firstValue = $values[$i]{"cback"};
$hasSecondValue = 0;
}
my $attributes = "PropertyAttribute::" . $attrs[$i];
$attributes =~ s/\|/\|PropertyAttribute::/g;
$attributes = "static_cast<unsigned>(" . $attributes . ")";
if ($values[$i]{"type"} eq "PropertyAttribute::Function" && $firstValue eq "JSBuiltin") {
$typeTag = "BuiltinGenerator";
my $tableHead = $name;
$tableHead =~ s/Table$//;
$hashTableString .= " { \"$key\"_s, (($attributes) & ~PropertyAttribute::Function) | PropertyAttribute::Builtin, $intrinsic, { HashTableValue::" . $typeTag . "Type, " . $tableHead . ucfirst($key) . "CodeGenerator, $secondValue } },\n";
}
else {
$hashTableString .= " { \"$key\"_s, $attributes, $intrinsic, { HashTableValue::" . $typeTag . "Type, $firstValue" . ($hasSecondValue ? ", " . $secondValue : "") . " } },\n";
}
$i++;
}
$hashTableString .= "};\n";
$hashTableString .= "\n";
$hashTableString .= "static const struct HashTable $name =\n";
$hashTableString .= " \{ $packedSize, $compactHashSizeMask, $hasSetter, nullptr, $nameEntries, $nameIndex \};\n";
$hashTableString .= "\n";
@table = ();
@links = ();
if ($setToOldValues) {
$compactSize = $oldCompactSize;
$compactHashSizeMask = $oldCompactHashSizeMask;
}
return $hashTableString;
};
my $hashTableForMacOS = generateHashTableHelper(1, 1);
my $hashTableForIOS = generateHashTableHelper(0, 0);
my $hashTableToWrite = $hashTableForMacOS;
if ($hashTableForMacOS ne $hashTableForIOS) {
$hashTableToWrite = "#if PLATFORM(MAC)\n" . $hashTableForMacOS . "#else\n" . $hashTableForIOS . "#endif\n";
}
print $hashTableToWrite;
print "} // namespace JSC\n";
}