aboutsummaryrefslogtreecommitdiff
path: root/src/bun.js/scripts/create_hash_table
diff options
context:
space:
mode:
Diffstat (limited to 'src/bun.js/scripts/create_hash_table')
-rwxr-xr-xsrc/bun.js/scripts/create_hash_table461
1 files changed, 325 insertions, 136 deletions
diff --git a/src/bun.js/scripts/create_hash_table b/src/bun.js/scripts/create_hash_table
index e2645b429..bd604ceaa 100755
--- a/src/bun.js/scripts/create_hash_table
+++ b/src/bun.js/scripts/create_hash_table
@@ -5,7 +5,7 @@
# (c) 2000-2002 by Harri Porten <porten@kde.org> and
# David Faure <faure@kde.org>
# Modified (c) 2004 by Nikolas Zimmermann <wildfox@kde.org>
-# Copyright (C) 2007-2022 Apple Inc. All rights reserved.
+# Copyright (C) 2007-2023 Apple Inc. All rights reserved.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -24,6 +24,7 @@
use strict;
use warnings;
+use Math::BigInt;
use Getopt::Long qw(:config pass_through);
my $file = shift @ARGV or die("Must provide source file as final argument.");
@@ -33,7 +34,6 @@ open(IN, $file) or die "No such file $file";
my @keys = ();
my @attrs = ();
my @values = ();
-my @hashes = ();
my @table = ();
my @links = ();
@@ -46,11 +46,13 @@ my $pefectHashSize;
my $compactSize;
my $compactHashSizeMask;
my $banner = 0;
-sub calcPerfectHashSize();
-sub calcCompactHashSize();
+my $mask64 = 2**64 - 1;
+my $mask32 = 2**32 - 1;
+sub calcPerfectHashSize($);
+sub calcCompactHashSize($);
sub output();
sub jsc_ucfirst($);
-sub hashValue($);
+sub hashValue($$);
while (<IN>) {
chomp;
@@ -64,16 +66,11 @@ while (<IN>) {
print STDERR "WARNING: \@begin without table name, skipping $_\n";
}
} elsif (/^\@end\s*$/ && $inside) {
- calcPerfectHashSize();
- calcCompactHashSize();
output();
@keys = ();
@attrs = ();
@values = ();
- @hashes = ();
- @table = ();
- @links = ();
$includeBuiltin = 0;
$inside = 0;
@@ -114,7 +111,6 @@ while (<IN>) {
} else {
push(@values, { "type" => "Lexer", "value" => $val });
}
- push(@hashes, hashValue($key));
} elsif ($inside) {
die "invalid data {" . $_ . "}";
}
@@ -147,13 +143,14 @@ sub ceilingToPowerOf2
return $powerOf2;
}
-sub calcPerfectHashSize()
+sub calcPerfectHashSize($)
{
+ my ($isMac) = @_;
tableSizeLoop:
for ($pefectHashSize = ceilingToPowerOf2(scalar @keys); ; $pefectHashSize += $pefectHashSize) {
my @table = ();
foreach my $key (@keys) {
- my $h = hashValue($key) % $pefectHashSize;
+ my $h = hashValue($key, $isMac) % $pefectHashSize;
next tableSizeLoop if $table[$h];
$table[$h] = 1;
}
@@ -166,8 +163,9 @@ sub leftShift($$) {
return (($value << $distance) & 0xFFFFFFFF);
}
-sub calcCompactHashSize()
+sub calcCompactHashSize($)
{
+ my ($isMac) = @_;
my $compactHashSize = ceilingToPowerOf2(2 * @keys);
$compactHashSizeMask = $compactHashSize - 1;
$compactSize = $compactHashSize;
@@ -176,7 +174,7 @@ sub calcCompactHashSize()
my $i = 0;
foreach my $key (@keys) {
my $depth = 0;
- my $h = hashValue($key) % $compactHashSize;
+ my $h = hashValue($key, $isMac) % $compactHashSize;
while (defined($table[$h])) {
if (defined($links[$h])) {
$h = $links[$h];
@@ -194,60 +192,222 @@ sub calcCompactHashSize()
}
}
+sub avalancheBits($) {
+ my ($value) = @_;
+
+ $value &= $mask32;
+
+ # Force "avalanching" of lower 32 bits
+ $value ^= leftShift($value, 3);
+ $value += ($value >> 5);
+ $value = ($value & $mask32);
+ $value ^= (leftShift($value, 2) & $mask32);
+ $value += ($value >> 15);
+ $value = $value & $mask32;
+ $value ^= (leftShift($value, 10) & $mask32);
+
+ return $value;
+}
+
+sub maskTop8BitsAndAvoidZero($) {
+ my ($value) = @_;
+
+ $value &= $mask32;
+
+ # Save 8 bits for StringImpl to use as flags.
+ $value &= 0xffffff;
+
+ # This avoids ever returning a hash code of 0, since that is used to
+ # signal "hash not computed yet". Setting the high bit maintains
+ # reasonable fidelity to a hash code of 0 because it is likely to yield
+ # exactly 0 when hash lookup masks out the high bits.
+ $value = (0x80000000 >> 8) if ($value == 0);
+
+ return $value;
+}
+
# Paul Hsieh's SuperFastHash
# http://www.azillionmonkeys.com/qed/hash.html
-sub hashValue($) {
- my @chars = split(/ */, $_[0]);
-
- # This hash is designed to work on 16-bit chunks at a time. But since the normal case
- # (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
- # were 16-bit chunks, which should give matching results
-
- my $EXP2_32 = 4294967296;
-
- my $hash = 0x9e3779b9;
- my $l = scalar @chars; #I wish this was in Ruby --- Maks
- my $rem = $l & 1;
- $l = $l >> 1;
-
- my $s = 0;
-
- # Main loop
- for (; $l > 0; $l--) {
- $hash += ord($chars[$s]);
- my $tmp = leftShift(ord($chars[$s+1]), 11) ^ $hash;
- $hash = (leftShift($hash, 16)% $EXP2_32) ^ $tmp;
- $s += 2;
- $hash += $hash >> 11;
- $hash %= $EXP2_32;
- }
-
- # Handle end case
- if ($rem != 0) {
- $hash += ord($chars[$s]);
- $hash ^= (leftShift($hash, 11)% $EXP2_32);
- $hash += $hash >> 17;
- }
-
- # Force "avalanching" of final 127 bits
- $hash ^= leftShift($hash, 3);
- $hash += ($hash >> 5);
- $hash = ($hash% $EXP2_32);
- $hash ^= (leftShift($hash, 2)% $EXP2_32);
- $hash += ($hash >> 15);
- $hash = $hash% $EXP2_32;
- $hash ^= (leftShift($hash, 10)% $EXP2_32);
-
- # Save 8 bits for StringImpl to use as flags.
- $hash &= 0xffffff;
-
- # This avoids ever returning a hash code of 0, since that is used to
- # signal "hash not computed yet". Setting the high bit maintains
- # reasonable fidelity to a hash code of 0 because it is likely to yield
- # exactly 0 when hash lookup masks out the high bits.
- $hash = (0x80000000 >> 8) if ($hash == 0);
-
- return $hash;
+sub superFastHash {
+ my @chars = @_;
+
+ # This hash is designed to work on 16-bit chunks at a time. But since the normal case
+ # (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
+ # were 16-bit chunks, which should give matching results
+
+ my $hash = 0x9e3779b9;
+ my $l = scalar @chars; #I wish this was in Ruby --- Maks
+ my $rem = $l & 1;
+ $l = $l >> 1;
+
+ my $s = 0;
+
+ # Main loop
+ for (; $l > 0; $l--) {
+ $hash += ord($chars[$s]);
+ my $tmp = leftShift(ord($chars[$s+1]), 11) ^ $hash;
+ $hash = (leftShift($hash, 16) & $mask32) ^ $tmp;
+ $s += 2;
+ $hash += $hash >> 11;
+ $hash &= $mask32;
+ }
+
+ # Handle end case
+ if ($rem != 0) {
+ $hash += ord($chars[$s]);
+ $hash ^= (leftShift($hash, 11) & $mask32);
+ $hash += $hash >> 17;
+ }
+
+ $hash = avalancheBits($hash);
+ return maskTop8BitsAndAvoidZero($hash);
+}
+
+sub uint64_add($$) {
+ my ($a, $b) = @_;
+ my $sum = $a + $b;
+ return $sum & $mask64;
+}
+
+sub uint64_multi($$) {
+ my ($a, $b) = @_;
+ my $product = $a * $b;
+ return $product & $mask64;
+}
+
+sub wymum($$) {
+ my ($A, $B) = @_;
+
+ my $ha = $A >> 32;
+ my $hb = $B >> 32;
+ my $la = $A & $mask32;
+ my $lb = $B & $mask32;
+ my $hi;
+ my $lo;
+ my $rh = uint64_multi($ha, $hb);
+ my $rm0 = uint64_multi($ha, $lb);
+ my $rm1 = uint64_multi($hb, $la);
+ my $rl = uint64_multi($la, $lb);
+ my $t = uint64_add($rl, ($rm0 << 32));
+ my $c = int($t < $rl);
+
+ $lo = uint64_add($t, ($rm1 << 32));
+ $c += int($lo < $t);
+ $hi = uint64_add($rh, uint64_add(($rm0 >> 32), uint64_add(($rm1 >> 32), $c)));
+
+ return ($lo, $hi);
+};
+
+sub wymix($$) {
+ my ($A, $B) = @_;
+ ($A, $B) = wymum($A, $B);
+ return $A ^ $B;
+}
+
+sub convert32BitTo64Bit($) {
+ my ($v) = @_;
+ my ($mask1) = 281470681808895; # 0x0000_ffff_0000_ffff
+ $v = ($v | ($v << 16)) & $mask1;
+ my ($mask2) = 71777214294589695; # 0x00ff_00ff_00ff_00ff
+ return ($v | ($v << 8)) & $mask2;
+}
+
+sub convert16BitTo32Bit($) {
+ my ($v) = @_;
+ return ($v | ($v << 8)) & 0x00ff_00ff;
+}
+
+sub wyhash {
+ # https://github.com/wangyi-fudan/wyhash
+ my @chars = @_;
+ my $charCount = scalar @chars;
+ my $byteCount = $charCount << 1;
+ my $charIndex = 0;
+ my $seed = 0;
+ my @secret = ( 11562461410679940143, 16646288086500911323, 10285213230658275043, 6384245875588680899 );
+ my $move1 = (($byteCount >> 3) << 2) >> 1;
+
+ $seed ^= wymix($seed ^ $secret[0], $secret[1]);
+ my $a = 0;
+ my $b = 0;
+
+ local *c2i = sub {
+ my ($i) = @_;
+ return ord($chars[$i]);
+ };
+
+ local *wyr8 = sub {
+ my ($i) = @_;
+ my $v = c2i($i) | (c2i($i + 1) << 8) | (c2i($i + 2) << 16) | (c2i($i + 3) << 24);
+ return convert32BitTo64Bit($v);
+ };
+
+ local *wyr4 = sub {
+ my ($i) = @_;
+ my $v = c2i($i) | (c2i($i + 1) << 8);
+ return convert16BitTo32Bit($v);
+ };
+
+ local *wyr2 = sub {
+ my ($i) = @_;
+ return c2i($i) << 16;
+ };
+
+ if ($byteCount <= 16) {
+ if ($byteCount >= 4) {
+ $a = (wyr4($charIndex) << 32) | wyr4($charIndex + $move1);
+ $charIndex = $charIndex + $charCount - 2;
+ $b = (wyr4($charIndex) << 32) | wyr4($charIndex - $move1);
+ } elsif ($byteCount > 0) {
+ $a = wyr2($charIndex);
+ $b = 0;
+ } else {
+ $a = $b = 0;
+ }
+ } else {
+ my $i = $byteCount;
+ if ($i > 48) {
+ my $see1 = $seed;
+ my $see2 = $seed;
+ do {
+ $seed = wymix(wyr8($charIndex) ^ $secret[1], wyr8($charIndex + 4) ^ $seed);
+ $see1 = wymix(wyr8($charIndex + 8) ^ $secret[2], wyr8($charIndex + 12) ^ $see1);
+ $see2 = wymix(wyr8($charIndex + 16) ^ $secret[3], wyr8($charIndex + 20) ^ $see2);
+ $charIndex += 24;
+ $i -= 48;
+ } while ($i > 48);
+ $seed ^= $see1 ^ $see2;
+ }
+ while ($i > 16) {
+ $seed = wymix(wyr8($charIndex) ^ $secret[1], wyr8($charIndex + 4) ^ $seed);
+ $i -= 16;
+ $charIndex += 8;
+ }
+ my $move2 = $i >> 1;
+ $a = wyr8($charIndex + $move2 - 8);
+ $b = wyr8($charIndex + $move2 - 4);
+ }
+ $a ^= $secret[1];
+ $b ^= $seed;
+
+ ($a, $b) = wymum($a, $b);
+ my $hash = wymix($a ^ $secret[0] ^ $byteCount, $b ^ $secret[1]) & $mask32;
+
+ return maskTop8BitsAndAvoidZero($hash);
+}
+
+sub hashValue($$) {
+ my ($string, $isMac) = @_;
+ my @chars = split(/ */, $string);
+ my $charCount = scalar @chars;
+ if ($isMac) {
+ if ($charCount <= 48) {
+ return superFastHash(@chars);
+ }
+ return wyhash(@chars);
+ } else {
+ return superFastHash(@chars);
+ }
}
sub output() {
@@ -267,81 +427,110 @@ sub output() {
print "\n";
print "namespace JSC {\n";
print "\n";
- if ($compactSize != 0) {
- print "static const struct CompactHashIndex ${nameIndex}\[$compactSize\] = {\n";
- for (my $i = 0; $i < $compactSize; $i++) {
- my $T = -1;
- if (defined($table[$i])) { $T = $table[$i]; }
- my $L = -1;
- if (defined($links[$i])) { $L = $links[$i]; }
- print " { $T, $L },\n";
+
+ local *generateHashTableHelper = sub {
+ my ($isMac, $setToOldValues) = @_;
+ my $oldCompactSize = $compactSize;
+ my $oldCompactHashSizeMask = $compactHashSizeMask;
+ calcPerfectHashSize($isMac);
+ calcCompactHashSize($isMac);
+
+ my $hashTableString = "";
+
+ if ($compactSize != 0) {
+ $hashTableString .= "static const struct CompactHashIndex ${nameIndex}\[$compactSize\] = {\n";
+ for (my $i = 0; $i < $compactSize; $i++) {
+ my $T = -1;
+ if (defined($table[$i])) { $T = $table[$i]; }
+ my $L = -1;
+ if (defined($links[$i])) { $L = $links[$i]; }
+ $hashTableString .= " { $T, $L },\n";
+ }
+ } else {
+ # MSVC dislikes empty arrays.
+ $hashTableString .= "static const struct CompactHashIndex ${nameIndex}\[1\] = {\n";
+ $hashTableString .= " { 0, 0 }\n";
}
- } else {
- # MSVC dislikes empty arrays.
- print "static const struct CompactHashIndex ${nameIndex}\[1\] = {\n";
- print " { 0, 0 }\n";
- }
- print "};\n";
- print "\n";
+ $hashTableString .= "};\n";
+ $hashTableString .= "\n";
- my $packedSize = scalar @keys;
- if ($packedSize != 0) {
- print "static const struct HashTableValue ${nameEntries}\[$packedSize\] = {\n";
- } else {
- # MSVC dislikes empty arrays.
- print "static const struct HashTableValue ${nameEntries}\[1\] = {\n";
- print " { { }, 0, NoIntrinsic, { HashTableValue::End } }\n";
- }
- my $i = 0;
- foreach my $key (@keys) {
- my $typeTag = "";
- my $firstValue = "";
- my $secondValue = "";
- my $hasSecondValue = 1;
- my $intrinsic = "NoIntrinsic";
-
- if ($values[$i]{"type"} eq "PropertyAttribute::Function") {
- $typeTag = "NativeFunction";
- $firstValue = $values[$i]{"function"};
- $secondValue = $values[$i]{"params"};
- $intrinsic = $values[$i]{"intrinsic"};
- } elsif ($values[$i]{"type"} eq "PropertyAttribute::Property") {
- $typeTag = "GetterSetter";
- $firstValue = $values[$i]{"get"};
- $secondValue = $values[$i]{"put"};
- } elsif ($values[$i]{"type"} eq "Lexer") {
- $typeTag = "Lexer";
- $firstValue = $values[$i]{"value"};
- $hasSecondValue = 0;
- } elsif ($values[$i]{"type"} eq "PropertyAttribute::CellProperty" || $values[$i]{"type"} eq "PropertyAttribute::ClassStructure") {
- $typeTag = ($values[$i]{"type"} eq "PropertyAttribute::CellProperty") ? "LazyCellProperty" : "LazyClassStructure";
- $values[$i]{"property"} =~ /\A([a-zA-Z0-9_]+)::(.*)\Z/ or die;
- $firstValue = "OBJECT_OFFSETOF($1, $2)";
- $hasSecondValue = 0;
- } elsif ($values[$i]{"type"} eq "PropertyAttribute::PropertyCallback") {
- $typeTag = "LazyProperty";
- $firstValue = $values[$i]{"cback"};
- $hasSecondValue = 0;
+ my $packedSize = scalar @keys;
+ if ($packedSize != 0) {
+ $hashTableString .= "static const struct HashTableValue ${nameEntries}\[$packedSize\] = {\n";
+ } else {
+ # MSVC dislikes empty arrays.
+ $hashTableString .= "static const struct HashTableValue ${nameEntries}\[1\] = {\n";
+ $hashTableString .= " { { }, 0, NoIntrinsic, { HashTableValue::End } }\n";
}
- my $attributes = "PropertyAttribute::" . $attrs[$i];
- $attributes =~ s/\|/\|PropertyAttribute::/g;
- $attributes = "static_cast<unsigned>(" . $attributes . ")";
- if ($values[$i]{"type"} eq "PropertyAttribute::Function" && $firstValue eq "JSBuiltin") {
- $typeTag = "BuiltinGenerator";
- my $tableHead = $name;
- $tableHead =~ s/Table$//;
- print " { \"$key\"_s, (($attributes) & ~PropertyAttribute::Function) | PropertyAttribute::Builtin, $intrinsic, { HashTableValue::" . $typeTag . "Type, " . $tableHead . ucfirst($key) . "CodeGenerator, $secondValue } },\n";
+ my $i = 0;
+ foreach my $key (@keys) {
+ my $typeTag = "";
+ my $firstValue = "";
+ my $secondValue = "";
+ my $hasSecondValue = 1;
+ my $intrinsic = "NoIntrinsic";
+
+ if ($values[$i]{"type"} eq "PropertyAttribute::Function") {
+ $typeTag = "NativeFunction";
+ $firstValue = $values[$i]{"function"};
+ $secondValue = $values[$i]{"params"};
+ $intrinsic = $values[$i]{"intrinsic"};
+ } elsif ($values[$i]{"type"} eq "PropertyAttribute::Property") {
+ $typeTag = "GetterSetter";
+ $firstValue = $values[$i]{"get"};
+ $secondValue = $values[$i]{"put"};
+ } elsif ($values[$i]{"type"} eq "Lexer") {
+ $typeTag = "Lexer";
+ $firstValue = $values[$i]{"value"};
+ $hasSecondValue = 0;
+ } elsif ($values[$i]{"type"} eq "PropertyAttribute::CellProperty" || $values[$i]{"type"} eq "PropertyAttribute::ClassStructure") {
+ $typeTag = ($values[$i]{"type"} eq "PropertyAttribute::CellProperty") ? "LazyCellProperty" : "LazyClassStructure";
+ $values[$i]{"property"} =~ /\A([a-zA-Z0-9_]+)::(.*)\Z/ or die;
+ $firstValue = "OBJECT_OFFSETOF($1, $2)";
+ $hasSecondValue = 0;
+ } elsif ($values[$i]{"type"} eq "PropertyAttribute::PropertyCallback") {
+ $typeTag = "LazyProperty";
+ $firstValue = $values[$i]{"cback"};
+ $hasSecondValue = 0;
+ }
+
+ my $attributes = "PropertyAttribute::" . $attrs[$i];
+ $attributes =~ s/\|/\|PropertyAttribute::/g;
+ $attributes = "static_cast<unsigned>(" . $attributes . ")";
+ if ($values[$i]{"type"} eq "PropertyAttribute::Function" && $firstValue eq "JSBuiltin") {
+ $typeTag = "BuiltinGenerator";
+ my $tableHead = $name;
+ $tableHead =~ s/Table$//;
+ $hashTableString .= " { \"$key\"_s, (($attributes) & ~PropertyAttribute::Function) | PropertyAttribute::Builtin, $intrinsic, { HashTableValue::" . $typeTag . "Type, " . $tableHead . ucfirst($key) . "CodeGenerator, $secondValue } },\n";
+ }
+ else {
+ $hashTableString .= " { \"$key\"_s, $attributes, $intrinsic, { HashTableValue::" . $typeTag . "Type, $firstValue" . ($hasSecondValue ? ", " . $secondValue : "") . " } },\n";
+ }
+ $i++;
}
- else {
- print " { \"$key\"_s, $attributes, $intrinsic, { HashTableValue::" . $typeTag . "Type, $firstValue" . ($hasSecondValue ? ", " . $secondValue : "") . " } },\n";
+ $hashTableString .= "};\n";
+ $hashTableString .= "\n";
+ $hashTableString .= "static const struct HashTable $name =\n";
+ $hashTableString .= " \{ $packedSize, $compactHashSizeMask, $hasSetter, nullptr, $nameEntries, $nameIndex \};\n";
+ $hashTableString .= "\n";
+
+ @table = ();
+ @links = ();
+ if ($setToOldValues) {
+ $compactSize = $oldCompactSize;
+ $compactHashSizeMask = $oldCompactHashSizeMask;
}
- $i++;
+ return $hashTableString;
+ };
+
+ my $hashTableForMacOS = generateHashTableHelper(1, 1);
+ my $hashTableForIOS = generateHashTableHelper(0, 0);
+ my $hashTableToWrite = $hashTableForMacOS;
+ if ($hashTableForMacOS ne $hashTableForIOS) {
+ $hashTableToWrite = "#if PLATFORM(MAC)\n" . $hashTableForMacOS . "#else\n" . $hashTableForIOS . "#endif\n";
}
- print "};\n";
- print "\n";
- print "static const struct HashTable $name =\n";
- print " \{ $packedSize, $compactHashSizeMask, $hasSetter, nullptr, $nameEntries, $nameIndex \};\n";
- print "\n";
+ print $hashTableToWrite;
+
print "} // namespace JSC\n";
}