#! /usr/bin/env perl # # Static Hashtable Generator # # (c) 2000-2002 by Harri Porten and # David Faure # Modified (c) 2004 by Nikolas Zimmermann # Copyright (C) 2007-2023 Apple Inc. All rights reserved. # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # use strict; use warnings; use Math::BigInt; use Getopt::Long qw(:config pass_through); my $file = shift @ARGV or die("Must provide source file as final argument."); open(IN, $file) or die "No such file $file"; my @keys = (); my @attrs = (); my @values = (); my @table = (); my @links = (); my $hasSetter = "false"; my $includeBuiltin = 0; my $inside = 0; my $name; my $pefectHashSize; my $compactSize; my $compactHashSizeMask; my $banner = 0; my $mask64 = 2**64 - 1; my $mask32 = 2**32 - 1; sub calcPerfectHashSize($); sub calcCompactHashSize($); sub output(); sub jsc_ucfirst($); sub hashValue($$); while () { chomp; s/^\s+//; next if /^\#|^$/; # Comment or blank line. Do nothing. if (/^\@begin/ && !$inside) { if (/^\@begin\s*([:_\w]+)\s*\d*\s*$/) { $inside = 1; $name = $1; } else { print STDERR "WARNING: \@begin without table name, skipping $_\n"; } } elsif (/^\@end\s*$/ && $inside) { output(); @keys = (); @attrs = (); @values = (); $includeBuiltin = 0; $inside = 0; } elsif (/^(\S+)\s*(\S+)\s*([\w\|]*)\s*(\w*)\s*(\w*)\s*$/ && $inside) { my $key = $1; my $val = $2; my $att = $3; my $param = $4; my $intrinsic = $5; push(@keys, $key); push(@attrs, length($att) > 0 ? $att : "None"); if ($val eq "JSBuiltin") { $includeBuiltin = 1; } if ($att =~ m/Function/) { push(@values, { "type" => "PropertyAttribute::Function", "function" => $val, "params" => (length($param) ? $param : ""), "intrinsic" => (length($intrinsic) ? $intrinsic : "NoIntrinsic") }); #printf STDERR "WARNING: Number of arguments missing for $key/$val\n" if (length($param) == 0); } elsif ($att =~ m/CellProperty/) { my $property = $val; push(@values, { "type" => "PropertyAttribute::CellProperty", "property" => $property }); } elsif ($att =~ m/ClassStructure/) { my $property = $val; push(@values, { "type" => "PropertyAttribute::ClassStructure", "property" => $property }); } elsif ($att =~ m/PropertyCallback/) { my $cback = $val; push(@values, { "type" => "PropertyAttribute::PropertyCallback", "cback" => $cback }); } elsif (length($att)) { my $get = $val; my $put = "0"; if (!($att =~ m/ReadOnly/)) { $put = "set" . jsc_ucfirst($val); } $hasSetter = "true"; push(@values, { "type" => "PropertyAttribute::Property", "get" => $get, "put" => $put }); } else { push(@values, { "type" => "Lexer", "value" => $val }); } } elsif ($inside) { die "invalid data {" . $_ . "}"; } } die "missing closing \@end" if ($inside); sub jsc_ucfirst($) { my ($value) = @_; if ($value =~ /js/) { $value =~ s/js/JS/; return $value; } return ucfirst($value); } sub ceilingToPowerOf2 { my ($pefectHashSize) = @_; my $powerOf2 = 1; while ($pefectHashSize > $powerOf2) { $powerOf2 <<= 1; } return $powerOf2; } sub calcPerfectHashSize($) { my ($isMac) = @_; tableSizeLoop: for ($pefectHashSize = ceilingToPowerOf2(scalar @keys); ; $pefectHashSize += $pefectHashSize) { my @table = (); foreach my $key (@keys) { my $h = hashValue($key, $isMac) % $pefectHashSize; next tableSizeLoop if $table[$h]; $table[$h] = 1; } last; } } sub leftShift($$) { my ($value, $distance) = @_; return (($value << $distance) & 0xFFFFFFFF); } sub calcCompactHashSize($) { my ($isMac) = @_; my $compactHashSize = ceilingToPowerOf2(2 * @keys); $compactHashSizeMask = $compactHashSize - 1; $compactSize = $compactHashSize; my $collisions = 0; my $maxdepth = 0; my $i = 0; foreach my $key (@keys) { my $depth = 0; my $h = hashValue($key, $isMac) % $compactHashSize; while (defined($table[$h])) { if (defined($links[$h])) { $h = $links[$h]; $depth++; } else { $collisions++; $links[$h] = $compactSize; $h = $compactSize; $compactSize++; } } $table[$h] = $i; $i++; $maxdepth = $depth if ( $depth > $maxdepth); } } sub avalancheBits($) { my ($value) = @_; $value &= $mask32; # Force "avalanching" of lower 32 bits $value ^= leftShift($value, 3); $value += ($value >> 5); $value = ($value & $mask32); $value ^= (leftShift($value, 2) & $mask32); $value += ($value >> 15); $value = $value & $mask32; $value ^= (leftShift($value, 10) & $mask32); return $value; } sub maskTop8BitsAndAvoidZero($) { my ($value) = @_; $value &= $mask32; # Save 8 bits for StringImpl to use as flags. $value &= 0xffffff; # This avoids ever returning a hash code of 0, since that is used to # signal "hash not computed yet". Setting the high bit maintains # reasonable fidelity to a hash code of 0 because it is likely to yield # exactly 0 when hash lookup masks out the high bits. $value = (0x80000000 >> 8) if ($value == 0); return $value; } # Paul Hsieh's SuperFastHash # http://www.azillionmonkeys.com/qed/hash.html sub superFastHash { my @chars = @_; # This hash is designed to work on 16-bit chunks at a time. But since the normal case # (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they # were 16-bit chunks, which should give matching results my $hash = 0x9e3779b9; my $l = scalar @chars; #I wish this was in Ruby --- Maks my $rem = $l & 1; $l = $l >> 1; my $s = 0; # Main loop for (; $l > 0; $l--) { $hash += ord($chars[$s]); my $tmp = leftShift(ord($chars[$s+1]), 11) ^ $hash; $hash = (leftShift($hash, 16) & $mask32) ^ $tmp; $s += 2; $hash += $hash >> 11; $hash &= $mask32; } # Handle end case if ($rem != 0) { $hash += ord($chars[$s]); $hash ^= (leftShift($hash, 11) & $mask32); $hash += $hash >> 17; } $hash = avalancheBits($hash); return maskTop8BitsAndAvoidZero($hash); } sub uint64_add($$) { my ($a, $b) = @_; my $sum = $a + $b; return $sum & $mask64; } sub uint64_multi($$) { my ($a, $b) = @_; my $product = $a * $b; return $product & $mask64; } sub wymum($$) { my ($A, $B) = @_; my $ha = $A >> 32; my $hb = $B >> 32; my $la = $A & $mask32; my $lb = $B & $mask32; my $hi; my $lo; my $rh = uint64_multi($ha, $hb); my $rm0 = uint64_multi($ha, $lb); my $rm1 = uint64_multi($hb, $la); my $rl = uint64_multi($la, $lb); my $t = uint64_add($rl, ($rm0 << 32)); my $c = int($t < $rl); $lo = uint64_add($t, ($rm1 << 32)); $c += int($lo < $t); $hi = uint64_add($rh, uint64_add(($rm0 >> 32), uint64_add(($rm1 >> 32), $c))); return ($lo, $hi); }; sub wymix($$) { my ($A, $B) = @_; ($A, $B) = wymum($A, $B); return $A ^ $B; } sub convert32BitTo64Bit($) { my ($v) = @_; my ($mask1) = 281470681808895; # 0x0000_ffff_0000_ffff $v = ($v | ($v << 16)) & $mask1; my ($mask2) = 71777214294589695; # 0x00ff_00ff_00ff_00ff return ($v | ($v << 8)) & $mask2; } sub convert16BitTo32Bit($) { my ($v) = @_; return ($v | ($v << 8)) & 0x00ff_00ff; } sub wyhash { # https://github.com/wangyi-fudan/wyhash my @chars = @_; my $charCount = scalar @chars; my $byteCount = $charCount << 1; my $charIndex = 0; my $seed = 0; my @secret = ( 11562461410679940143, 16646288086500911323, 10285213230658275043, 6384245875588680899 ); my $move1 = (($byteCount >> 3) << 2) >> 1; $seed ^= wymix($seed ^ $secret[0], $secret[1]); my $a = 0; my $b = 0; local *c2i = sub { my ($i) = @_; return ord($chars[$i]); }; local *wyr8 = sub { my ($i) = @_; my $v = c2i($i) | (c2i($i + 1) << 8) | (c2i($i + 2) << 16) | (c2i($i + 3) << 24); return convert32BitTo64Bit($v); }; local *wyr4 = sub { my ($i) = @_; my $v = c2i($i) | (c2i($i + 1) << 8); return convert16BitTo32Bit($v); }; local *wyr2 = sub { my ($i) = @_; return c2i($i) << 16; }; if ($byteCount <= 16) { if ($byteCount >= 4) { $a = (wyr4($charIndex) << 32) | wyr4($charIndex + $move1); $charIndex = $charIndex + $charCount - 2; $b = (wyr4($charIndex) << 32) | wyr4($charIndex - $move1); } elsif ($byteCount > 0) { $a = wyr2($charIndex); $b = 0; } else { $a = $b = 0; } } else { my $i = $byteCount; if ($i > 48) { my $see1 = $seed; my $see2 = $seed; do { $seed = wymix(wyr8($charIndex) ^ $secret[1], wyr8($charIndex + 4) ^ $seed); $see1 = wymix(wyr8($charIndex + 8) ^ $secret[2], wyr8($charIndex + 12) ^ $see1); $see2 = wymix(wyr8($charIndex + 16) ^ $secret[3], wyr8($charIndex + 20) ^ $see2); $charIndex += 24; $i -= 48; } while ($i > 48); $seed ^= $see1 ^ $see2; } while ($i > 16) { $seed = wymix(wyr8($charIndex) ^ $secret[1], wyr8($charIndex + 4) ^ $seed); $i -= 16; $charIndex += 8; } my $move2 = $i >> 1; $a = wyr8($charIndex + $move2 - 8); $b = wyr8($charIndex + $move2 - 4); } $a ^= $secret[1]; $b ^= $seed; ($a, $b) = wymum($a, $b); my $hash = wymix($a ^ $secret[0] ^ $byteCount, $b ^ $secret[1]) & $mask32; return maskTop8BitsAndAvoidZero($hash); } sub hashValue($$) { my ($string, $isMac) = @_; my @chars = split(/ */, $string); my $charCount = scalar @chars; if ($isMac) { if ($charCount <= 48) { return superFastHash(@chars); } return wyhash(@chars); } else { return superFastHash(@chars); } } sub output() { if (!$banner) { $banner = 1; print "// Automatically generated from $file using $0. DO NOT EDIT!\n"; } my $nameEntries = "${name}Values"; $nameEntries =~ s/:/_/g; my $nameIndex = "${name}Index"; $nameIndex =~ s/:/_/g; print "\n"; print "#include \"JSCBuiltins.h\"\n" if ($includeBuiltin); print "#include \"Lookup.h\"\n"; print "\n"; print "namespace JSC {\n"; print "\n"; local *generateHashTableHelper = sub { my ($isMac, $setToOldValues) = @_; my $oldCompactSize = $compactSize; my $oldCompactHashSizeMask = $compactHashSizeMask; calcPerfectHashSize($isMac); calcCompactHashSize($isMac); my $hashTableString = ""; if ($compactSize != 0) { $hashTableString .= "static const struct CompactHashIndex ${nameIndex}\[$compactSize\] = {\n"; for (my $i = 0; $i < $compactSize; $i++) { my $T = -1; if (defined($table[$i])) { $T = $table[$i]; } my $L = -1; if (defined($links[$i])) { $L = $links[$i]; } $hashTableString .= " { $T, $L },\n"; } } else { # MSVC dislikes empty arrays. $hashTableString .= "static const struct CompactHashIndex ${nameIndex}\[1\] = {\n"; $hashTableString .= " { 0, 0 }\n"; } $hashTableString .= "};\n"; $hashTableString .= "\n"; my $packedSize = scalar @keys; if ($packedSize != 0) { $hashTableString .= "static const struct HashTableValue ${nameEntries}\[$packedSize\] = {\n"; } else { # MSVC dislikes empty arrays. $hashTableString .= "static const struct HashTableValue ${nameEntries}\[1\] = {\n"; $hashTableString .= " { { }, 0, NoIntrinsic, { HashTableValue::End } }\n"; } my $i = 0; foreach my $key (@keys) { my $typeTag = ""; my $firstValue = ""; my $secondValue = ""; my $hasSecondValue = 1; my $intrinsic = "NoIntrinsic"; if ($values[$i]{"type"} eq "PropertyAttribute::Function") { $typeTag = "NativeFunction"; $firstValue = $values[$i]{"function"}; $secondValue = $values[$i]{"params"}; $intrinsic = $values[$i]{"intrinsic"}; } elsif ($values[$i]{"type"} eq "PropertyAttribute::Property") { $typeTag = "GetterSetter"; $firstValue = $values[$i]{"get"}; $secondValue = $values[$i]{"put"}; } elsif ($values[$i]{"type"} eq "Lexer") { $typeTag = "Lexer"; $firstValue = $values[$i]{"value"}; $hasSecondValue = 0; } elsif ($values[$i]{"type"} eq "PropertyAttribute::CellProperty" || $values[$i]{"type"} eq "PropertyAttribute::ClassStructure") { $typeTag = ($values[$i]{"type"} eq "PropertyAttribute::CellProperty") ? "LazyCellProperty" : "LazyClassStructure"; $values[$i]{"property"} =~ /\A([a-zA-Z0-9_]+)::(.*)\Z/ or die; $firstValue = "OBJECT_OFFSETOF($1, $2)"; $hasSecondValue = 0; } elsif ($values[$i]{"type"} eq "PropertyAttribute::PropertyCallback") { $typeTag = "LazyProperty"; $firstValue = $values[$i]{"cback"}; $hasSecondValue = 0; } my $attributes = "PropertyAttribute::" . $attrs[$i]; $attributes =~ s/\|/\|PropertyAttribute::/g; $attributes = "static_cast(" . $attributes . ")"; if ($values[$i]{"type"} eq "PropertyAttribute::Function" && $firstValue eq "JSBuiltin") { $typeTag = "BuiltinGenerator"; my $tableHead = $name; $tableHead =~ s/Table$//; $hashTableString .= " { \"$key\"_s, (($attributes) & ~PropertyAttribute::Function) | PropertyAttribute::Builtin, $intrinsic, { HashTableValue::" . $typeTag . "Type, " . $tableHead . ucfirst($key) . "CodeGenerator, $secondValue } },\n"; } else { $hashTableString .= " { \"$key\"_s, $attributes, $intrinsic, { HashTableValue::" . $typeTag . "Type, $firstValue" . ($hasSecondValue ? ", " . $secondValue : "") . " } },\n"; } $i++; } $hashTableString .= "};\n"; $hashTableString .= "\n"; $hashTableString .= "static const struct HashTable $name =\n"; $hashTableString .= " \{ $packedSize, $compactHashSizeMask, $hasSetter, nullptr, $nameEntries, $nameIndex \};\n"; $hashTableString .= "\n"; @table = (); @links = (); if ($setToOldValues) { $compactSize = $oldCompactSize; $compactHashSizeMask = $oldCompactHashSizeMask; } return $hashTableString; }; my $hashTableForMacOS = generateHashTableHelper(1, 1); my $hashTableForIOS = generateHashTableHelper(0, 0); my $hashTableToWrite = $hashTableForMacOS; if ($hashTableForMacOS ne $hashTableForIOS) { $hashTableToWrite = "#if PLATFORM(MAC)\n" . $hashTableForMacOS . "#else\n" . $hashTableForIOS . "#endif\n"; } print $hashTableToWrite; print "} // namespace JSC\n"; }