diff options
Diffstat (limited to 'src/bun.js/scripts/create_hash_table')
-rwxr-xr-x | src/bun.js/scripts/create_hash_table | 461 |
1 files changed, 325 insertions, 136 deletions
diff --git a/src/bun.js/scripts/create_hash_table b/src/bun.js/scripts/create_hash_table index e2645b429..bd604ceaa 100755 --- a/src/bun.js/scripts/create_hash_table +++ b/src/bun.js/scripts/create_hash_table @@ -5,7 +5,7 @@ # (c) 2000-2002 by Harri Porten <porten@kde.org> and # David Faure <faure@kde.org> # Modified (c) 2004 by Nikolas Zimmermann <wildfox@kde.org> -# Copyright (C) 2007-2022 Apple Inc. All rights reserved. +# Copyright (C) 2007-2023 Apple Inc. All rights reserved. # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -24,6 +24,7 @@ use strict; use warnings; +use Math::BigInt; use Getopt::Long qw(:config pass_through); my $file = shift @ARGV or die("Must provide source file as final argument."); @@ -33,7 +34,6 @@ open(IN, $file) or die "No such file $file"; my @keys = (); my @attrs = (); my @values = (); -my @hashes = (); my @table = (); my @links = (); @@ -46,11 +46,13 @@ my $pefectHashSize; my $compactSize; my $compactHashSizeMask; my $banner = 0; -sub calcPerfectHashSize(); -sub calcCompactHashSize(); +my $mask64 = 2**64 - 1; +my $mask32 = 2**32 - 1; +sub calcPerfectHashSize($); +sub calcCompactHashSize($); sub output(); sub jsc_ucfirst($); -sub hashValue($); +sub hashValue($$); while (<IN>) { chomp; @@ -64,16 +66,11 @@ while (<IN>) { print STDERR "WARNING: \@begin without table name, skipping $_\n"; } } elsif (/^\@end\s*$/ && $inside) { - calcPerfectHashSize(); - calcCompactHashSize(); output(); @keys = (); @attrs = (); @values = (); - @hashes = (); - @table = (); - @links = (); $includeBuiltin = 0; $inside = 0; @@ -114,7 +111,6 @@ while (<IN>) { } else { push(@values, { "type" => "Lexer", "value" => $val }); } - push(@hashes, hashValue($key)); } elsif ($inside) { die "invalid data {" . $_ . "}"; } @@ -147,13 +143,14 @@ sub ceilingToPowerOf2 return $powerOf2; } -sub calcPerfectHashSize() +sub calcPerfectHashSize($) { + my ($isMac) = @_; tableSizeLoop: for ($pefectHashSize = ceilingToPowerOf2(scalar @keys); ; $pefectHashSize += $pefectHashSize) { my @table = (); foreach my $key (@keys) { - my $h = hashValue($key) % $pefectHashSize; + my $h = hashValue($key, $isMac) % $pefectHashSize; next tableSizeLoop if $table[$h]; $table[$h] = 1; } @@ -166,8 +163,9 @@ sub leftShift($$) { return (($value << $distance) & 0xFFFFFFFF); } -sub calcCompactHashSize() +sub calcCompactHashSize($) { + my ($isMac) = @_; my $compactHashSize = ceilingToPowerOf2(2 * @keys); $compactHashSizeMask = $compactHashSize - 1; $compactSize = $compactHashSize; @@ -176,7 +174,7 @@ sub calcCompactHashSize() my $i = 0; foreach my $key (@keys) { my $depth = 0; - my $h = hashValue($key) % $compactHashSize; + my $h = hashValue($key, $isMac) % $compactHashSize; while (defined($table[$h])) { if (defined($links[$h])) { $h = $links[$h]; @@ -194,60 +192,222 @@ sub calcCompactHashSize() } } +sub avalancheBits($) { + my ($value) = @_; + + $value &= $mask32; + + # Force "avalanching" of lower 32 bits + $value ^= leftShift($value, 3); + $value += ($value >> 5); + $value = ($value & $mask32); + $value ^= (leftShift($value, 2) & $mask32); + $value += ($value >> 15); + $value = $value & $mask32; + $value ^= (leftShift($value, 10) & $mask32); + + return $value; +} + +sub maskTop8BitsAndAvoidZero($) { + my ($value) = @_; + + $value &= $mask32; + + # Save 8 bits for StringImpl to use as flags. + $value &= 0xffffff; + + # This avoids ever returning a hash code of 0, since that is used to + # signal "hash not computed yet". Setting the high bit maintains + # reasonable fidelity to a hash code of 0 because it is likely to yield + # exactly 0 when hash lookup masks out the high bits. + $value = (0x80000000 >> 8) if ($value == 0); + + return $value; +} + # Paul Hsieh's SuperFastHash # http://www.azillionmonkeys.com/qed/hash.html -sub hashValue($) { - my @chars = split(/ */, $_[0]); - - # This hash is designed to work on 16-bit chunks at a time. But since the normal case - # (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they - # were 16-bit chunks, which should give matching results - - my $EXP2_32 = 4294967296; - - my $hash = 0x9e3779b9; - my $l = scalar @chars; #I wish this was in Ruby --- Maks - my $rem = $l & 1; - $l = $l >> 1; - - my $s = 0; - - # Main loop - for (; $l > 0; $l--) { - $hash += ord($chars[$s]); - my $tmp = leftShift(ord($chars[$s+1]), 11) ^ $hash; - $hash = (leftShift($hash, 16)% $EXP2_32) ^ $tmp; - $s += 2; - $hash += $hash >> 11; - $hash %= $EXP2_32; - } - - # Handle end case - if ($rem != 0) { - $hash += ord($chars[$s]); - $hash ^= (leftShift($hash, 11)% $EXP2_32); - $hash += $hash >> 17; - } - - # Force "avalanching" of final 127 bits - $hash ^= leftShift($hash, 3); - $hash += ($hash >> 5); - $hash = ($hash% $EXP2_32); - $hash ^= (leftShift($hash, 2)% $EXP2_32); - $hash += ($hash >> 15); - $hash = $hash% $EXP2_32; - $hash ^= (leftShift($hash, 10)% $EXP2_32); - - # Save 8 bits for StringImpl to use as flags. - $hash &= 0xffffff; - - # This avoids ever returning a hash code of 0, since that is used to - # signal "hash not computed yet". Setting the high bit maintains - # reasonable fidelity to a hash code of 0 because it is likely to yield - # exactly 0 when hash lookup masks out the high bits. - $hash = (0x80000000 >> 8) if ($hash == 0); - - return $hash; +sub superFastHash { + my @chars = @_; + + # This hash is designed to work on 16-bit chunks at a time. But since the normal case + # (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they + # were 16-bit chunks, which should give matching results + + my $hash = 0x9e3779b9; + my $l = scalar @chars; #I wish this was in Ruby --- Maks + my $rem = $l & 1; + $l = $l >> 1; + + my $s = 0; + + # Main loop + for (; $l > 0; $l--) { + $hash += ord($chars[$s]); + my $tmp = leftShift(ord($chars[$s+1]), 11) ^ $hash; + $hash = (leftShift($hash, 16) & $mask32) ^ $tmp; + $s += 2; + $hash += $hash >> 11; + $hash &= $mask32; + } + + # Handle end case + if ($rem != 0) { + $hash += ord($chars[$s]); + $hash ^= (leftShift($hash, 11) & $mask32); + $hash += $hash >> 17; + } + + $hash = avalancheBits($hash); + return maskTop8BitsAndAvoidZero($hash); +} + +sub uint64_add($$) { + my ($a, $b) = @_; + my $sum = $a + $b; + return $sum & $mask64; +} + +sub uint64_multi($$) { + my ($a, $b) = @_; + my $product = $a * $b; + return $product & $mask64; +} + +sub wymum($$) { + my ($A, $B) = @_; + + my $ha = $A >> 32; + my $hb = $B >> 32; + my $la = $A & $mask32; + my $lb = $B & $mask32; + my $hi; + my $lo; + my $rh = uint64_multi($ha, $hb); + my $rm0 = uint64_multi($ha, $lb); + my $rm1 = uint64_multi($hb, $la); + my $rl = uint64_multi($la, $lb); + my $t = uint64_add($rl, ($rm0 << 32)); + my $c = int($t < $rl); + + $lo = uint64_add($t, ($rm1 << 32)); + $c += int($lo < $t); + $hi = uint64_add($rh, uint64_add(($rm0 >> 32), uint64_add(($rm1 >> 32), $c))); + + return ($lo, $hi); +}; + +sub wymix($$) { + my ($A, $B) = @_; + ($A, $B) = wymum($A, $B); + return $A ^ $B; +} + +sub convert32BitTo64Bit($) { + my ($v) = @_; + my ($mask1) = 281470681808895; # 0x0000_ffff_0000_ffff + $v = ($v | ($v << 16)) & $mask1; + my ($mask2) = 71777214294589695; # 0x00ff_00ff_00ff_00ff + return ($v | ($v << 8)) & $mask2; +} + +sub convert16BitTo32Bit($) { + my ($v) = @_; + return ($v | ($v << 8)) & 0x00ff_00ff; +} + +sub wyhash { + # https://github.com/wangyi-fudan/wyhash + my @chars = @_; + my $charCount = scalar @chars; + my $byteCount = $charCount << 1; + my $charIndex = 0; + my $seed = 0; + my @secret = ( 11562461410679940143, 16646288086500911323, 10285213230658275043, 6384245875588680899 ); + my $move1 = (($byteCount >> 3) << 2) >> 1; + + $seed ^= wymix($seed ^ $secret[0], $secret[1]); + my $a = 0; + my $b = 0; + + local *c2i = sub { + my ($i) = @_; + return ord($chars[$i]); + }; + + local *wyr8 = sub { + my ($i) = @_; + my $v = c2i($i) | (c2i($i + 1) << 8) | (c2i($i + 2) << 16) | (c2i($i + 3) << 24); + return convert32BitTo64Bit($v); + }; + + local *wyr4 = sub { + my ($i) = @_; + my $v = c2i($i) | (c2i($i + 1) << 8); + return convert16BitTo32Bit($v); + }; + + local *wyr2 = sub { + my ($i) = @_; + return c2i($i) << 16; + }; + + if ($byteCount <= 16) { + if ($byteCount >= 4) { + $a = (wyr4($charIndex) << 32) | wyr4($charIndex + $move1); + $charIndex = $charIndex + $charCount - 2; + $b = (wyr4($charIndex) << 32) | wyr4($charIndex - $move1); + } elsif ($byteCount > 0) { + $a = wyr2($charIndex); + $b = 0; + } else { + $a = $b = 0; + } + } else { + my $i = $byteCount; + if ($i > 48) { + my $see1 = $seed; + my $see2 = $seed; + do { + $seed = wymix(wyr8($charIndex) ^ $secret[1], wyr8($charIndex + 4) ^ $seed); + $see1 = wymix(wyr8($charIndex + 8) ^ $secret[2], wyr8($charIndex + 12) ^ $see1); + $see2 = wymix(wyr8($charIndex + 16) ^ $secret[3], wyr8($charIndex + 20) ^ $see2); + $charIndex += 24; + $i -= 48; + } while ($i > 48); + $seed ^= $see1 ^ $see2; + } + while ($i > 16) { + $seed = wymix(wyr8($charIndex) ^ $secret[1], wyr8($charIndex + 4) ^ $seed); + $i -= 16; + $charIndex += 8; + } + my $move2 = $i >> 1; + $a = wyr8($charIndex + $move2 - 8); + $b = wyr8($charIndex + $move2 - 4); + } + $a ^= $secret[1]; + $b ^= $seed; + + ($a, $b) = wymum($a, $b); + my $hash = wymix($a ^ $secret[0] ^ $byteCount, $b ^ $secret[1]) & $mask32; + + return maskTop8BitsAndAvoidZero($hash); +} + +sub hashValue($$) { + my ($string, $isMac) = @_; + my @chars = split(/ */, $string); + my $charCount = scalar @chars; + if ($isMac) { + if ($charCount <= 48) { + return superFastHash(@chars); + } + return wyhash(@chars); + } else { + return superFastHash(@chars); + } } sub output() { @@ -267,81 +427,110 @@ sub output() { print "\n"; print "namespace JSC {\n"; print "\n"; - if ($compactSize != 0) { - print "static const struct CompactHashIndex ${nameIndex}\[$compactSize\] = {\n"; - for (my $i = 0; $i < $compactSize; $i++) { - my $T = -1; - if (defined($table[$i])) { $T = $table[$i]; } - my $L = -1; - if (defined($links[$i])) { $L = $links[$i]; } - print " { $T, $L },\n"; + + local *generateHashTableHelper = sub { + my ($isMac, $setToOldValues) = @_; + my $oldCompactSize = $compactSize; + my $oldCompactHashSizeMask = $compactHashSizeMask; + calcPerfectHashSize($isMac); + calcCompactHashSize($isMac); + + my $hashTableString = ""; + + if ($compactSize != 0) { + $hashTableString .= "static const struct CompactHashIndex ${nameIndex}\[$compactSize\] = {\n"; + for (my $i = 0; $i < $compactSize; $i++) { + my $T = -1; + if (defined($table[$i])) { $T = $table[$i]; } + my $L = -1; + if (defined($links[$i])) { $L = $links[$i]; } + $hashTableString .= " { $T, $L },\n"; + } + } else { + # MSVC dislikes empty arrays. + $hashTableString .= "static const struct CompactHashIndex ${nameIndex}\[1\] = {\n"; + $hashTableString .= " { 0, 0 }\n"; } - } else { - # MSVC dislikes empty arrays. - print "static const struct CompactHashIndex ${nameIndex}\[1\] = {\n"; - print " { 0, 0 }\n"; - } - print "};\n"; - print "\n"; + $hashTableString .= "};\n"; + $hashTableString .= "\n"; - my $packedSize = scalar @keys; - if ($packedSize != 0) { - print "static const struct HashTableValue ${nameEntries}\[$packedSize\] = {\n"; - } else { - # MSVC dislikes empty arrays. - print "static const struct HashTableValue ${nameEntries}\[1\] = {\n"; - print " { { }, 0, NoIntrinsic, { HashTableValue::End } }\n"; - } - my $i = 0; - foreach my $key (@keys) { - my $typeTag = ""; - my $firstValue = ""; - my $secondValue = ""; - my $hasSecondValue = 1; - my $intrinsic = "NoIntrinsic"; - - if ($values[$i]{"type"} eq "PropertyAttribute::Function") { - $typeTag = "NativeFunction"; - $firstValue = $values[$i]{"function"}; - $secondValue = $values[$i]{"params"}; - $intrinsic = $values[$i]{"intrinsic"}; - } elsif ($values[$i]{"type"} eq "PropertyAttribute::Property") { - $typeTag = "GetterSetter"; - $firstValue = $values[$i]{"get"}; - $secondValue = $values[$i]{"put"}; - } elsif ($values[$i]{"type"} eq "Lexer") { - $typeTag = "Lexer"; - $firstValue = $values[$i]{"value"}; - $hasSecondValue = 0; - } elsif ($values[$i]{"type"} eq "PropertyAttribute::CellProperty" || $values[$i]{"type"} eq "PropertyAttribute::ClassStructure") { - $typeTag = ($values[$i]{"type"} eq "PropertyAttribute::CellProperty") ? "LazyCellProperty" : "LazyClassStructure"; - $values[$i]{"property"} =~ /\A([a-zA-Z0-9_]+)::(.*)\Z/ or die; - $firstValue = "OBJECT_OFFSETOF($1, $2)"; - $hasSecondValue = 0; - } elsif ($values[$i]{"type"} eq "PropertyAttribute::PropertyCallback") { - $typeTag = "LazyProperty"; - $firstValue = $values[$i]{"cback"}; - $hasSecondValue = 0; + my $packedSize = scalar @keys; + if ($packedSize != 0) { + $hashTableString .= "static const struct HashTableValue ${nameEntries}\[$packedSize\] = {\n"; + } else { + # MSVC dislikes empty arrays. + $hashTableString .= "static const struct HashTableValue ${nameEntries}\[1\] = {\n"; + $hashTableString .= " { { }, 0, NoIntrinsic, { HashTableValue::End } }\n"; } - my $attributes = "PropertyAttribute::" . $attrs[$i]; - $attributes =~ s/\|/\|PropertyAttribute::/g; - $attributes = "static_cast<unsigned>(" . $attributes . ")"; - if ($values[$i]{"type"} eq "PropertyAttribute::Function" && $firstValue eq "JSBuiltin") { - $typeTag = "BuiltinGenerator"; - my $tableHead = $name; - $tableHead =~ s/Table$//; - print " { \"$key\"_s, (($attributes) & ~PropertyAttribute::Function) | PropertyAttribute::Builtin, $intrinsic, { HashTableValue::" . $typeTag . "Type, " . $tableHead . ucfirst($key) . "CodeGenerator, $secondValue } },\n"; + my $i = 0; + foreach my $key (@keys) { + my $typeTag = ""; + my $firstValue = ""; + my $secondValue = ""; + my $hasSecondValue = 1; + my $intrinsic = "NoIntrinsic"; + + if ($values[$i]{"type"} eq "PropertyAttribute::Function") { + $typeTag = "NativeFunction"; + $firstValue = $values[$i]{"function"}; + $secondValue = $values[$i]{"params"}; + $intrinsic = $values[$i]{"intrinsic"}; + } elsif ($values[$i]{"type"} eq "PropertyAttribute::Property") { + $typeTag = "GetterSetter"; + $firstValue = $values[$i]{"get"}; + $secondValue = $values[$i]{"put"}; + } elsif ($values[$i]{"type"} eq "Lexer") { + $typeTag = "Lexer"; + $firstValue = $values[$i]{"value"}; + $hasSecondValue = 0; + } elsif ($values[$i]{"type"} eq "PropertyAttribute::CellProperty" || $values[$i]{"type"} eq "PropertyAttribute::ClassStructure") { + $typeTag = ($values[$i]{"type"} eq "PropertyAttribute::CellProperty") ? "LazyCellProperty" : "LazyClassStructure"; + $values[$i]{"property"} =~ /\A([a-zA-Z0-9_]+)::(.*)\Z/ or die; + $firstValue = "OBJECT_OFFSETOF($1, $2)"; + $hasSecondValue = 0; + } elsif ($values[$i]{"type"} eq "PropertyAttribute::PropertyCallback") { + $typeTag = "LazyProperty"; + $firstValue = $values[$i]{"cback"}; + $hasSecondValue = 0; + } + + my $attributes = "PropertyAttribute::" . $attrs[$i]; + $attributes =~ s/\|/\|PropertyAttribute::/g; + $attributes = "static_cast<unsigned>(" . $attributes . ")"; + if ($values[$i]{"type"} eq "PropertyAttribute::Function" && $firstValue eq "JSBuiltin") { + $typeTag = "BuiltinGenerator"; + my $tableHead = $name; + $tableHead =~ s/Table$//; + $hashTableString .= " { \"$key\"_s, (($attributes) & ~PropertyAttribute::Function) | PropertyAttribute::Builtin, $intrinsic, { HashTableValue::" . $typeTag . "Type, " . $tableHead . ucfirst($key) . "CodeGenerator, $secondValue } },\n"; + } + else { + $hashTableString .= " { \"$key\"_s, $attributes, $intrinsic, { HashTableValue::" . $typeTag . "Type, $firstValue" . ($hasSecondValue ? ", " . $secondValue : "") . " } },\n"; + } + $i++; } - else { - print " { \"$key\"_s, $attributes, $intrinsic, { HashTableValue::" . $typeTag . "Type, $firstValue" . ($hasSecondValue ? ", " . $secondValue : "") . " } },\n"; + $hashTableString .= "};\n"; + $hashTableString .= "\n"; + $hashTableString .= "static const struct HashTable $name =\n"; + $hashTableString .= " \{ $packedSize, $compactHashSizeMask, $hasSetter, nullptr, $nameEntries, $nameIndex \};\n"; + $hashTableString .= "\n"; + + @table = (); + @links = (); + if ($setToOldValues) { + $compactSize = $oldCompactSize; + $compactHashSizeMask = $oldCompactHashSizeMask; } - $i++; + return $hashTableString; + }; + + my $hashTableForMacOS = generateHashTableHelper(1, 1); + my $hashTableForIOS = generateHashTableHelper(0, 0); + my $hashTableToWrite = $hashTableForMacOS; + if ($hashTableForMacOS ne $hashTableForIOS) { + $hashTableToWrite = "#if PLATFORM(MAC)\n" . $hashTableForMacOS . "#else\n" . $hashTableForIOS . "#endif\n"; } - print "};\n"; - print "\n"; - print "static const struct HashTable $name =\n"; - print " \{ $packedSize, $compactHashSizeMask, $hasSetter, nullptr, $nameEntries, $nameIndex \};\n"; - print "\n"; + print $hashTableToWrite; + print "} // namespace JSC\n"; } |