| /* |
| * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| package jdk.tools.jlink.internal; |
| |
| import java.lang.reflect.Array; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.LinkedHashMap; |
| import java.util.List; |
| import java.util.Map; |
| import jdk.internal.jimage.ImageStringsReader; |
| |
| /* |
| * The algorithm used here is outlined in Applications of Finite Automata |
| * Representing Large Vocabularies - Claudio L Lucchesi and Tomasz Kowaltowski, |
| * 1992, and A Practical Minimal Perfect Hashing Method - Fabiano C. Botelho1, |
| * Yoshiharu Kohayakawa, and Nivio Ziviani, 2005. |
| * |
| * The primary JDK use of this algorithm is managing the jimage location index. |
| * |
| * The goal of PerfectHashBuilder is to construct an automaton which maps a |
| * string key to a unique index 0..N-1, where N is the number of key-value pairs. |
| * What makes MPHM effective is that the size of the lookup table is N or very |
| * near N, and the minimum lookup is O(1) maximum lookup is O(2). |
| * |
| * The result of PerfectHashBuilder is two integer arrays, redirect and order. |
| * The redirect table provides a 1-1 mapping to the order table, using the |
| * reader algorithm described further on. The order table provides a mapping |
| * to entries. If entries are fixed size and can be put in a direct table, then |
| * the order table can be used to construct the direct table and then discarded. |
| * |
| * The steps for constructing the lookup tables are as follows; |
| * |
| * - Compute an MPHM hash for each key, based on a fixed base value modulo N. |
| * Note, the hash is based on the modified UTF-8 of the key, simplifying |
| * computation in native code. |
| * |
| * - Combine keys that map to the same hash code (collisions) into bucket |
| * chains. |
| * |
| * - Sort bucket chains by length of chains, longest first (most collisions.) |
| * Sorting is done to pack the redirect table with the worst collision |
| * offenders first. |
| * |
| * - For each chain, recompute the hash of each key using a new base value. |
| * Recomputation should give a different key distribution. A tally is kept |
| * of where the key maps, using the order table. The tally is used to detect |
| * new collisions. If there are further collisions, then restart |
| * redistribution using a different hash base value. If a chain is |
| * successfully distributed, then the base value used to compute the hash |
| * is recorded in the redirect table. |
| * |
| * - Once all colliding chains are resolved (length > 1), then the chains with |
| * only one entry are used to fill in the empty slots in the order table. |
| * These keys are recorded in the redirect table using the twos complement |
| * of the order index. |
| * |
| * - It is possible that a given set of keys cannot be packed into a table of |
| * size N. If this situation occurs then the size of the table is |
| * adjusted so that keys distribute differently. |
| * |
| * Readers algoritm; |
| * |
| * - Compute the hash for the key using the fixed base value modulo N. This |
| * will provide an index into the redirect table. The integer value in the |
| * redirect table will determine the next step. |
| * |
| * - If the value in the redirect table is positive, then that value is used |
| * to rehash the key to get the index into the order table. |
| * |
| * - If the value in the redirect table is negative, then that value is the |
| * twos complement of the index into the order table. |
| * |
| * - If the value in the redirect table is zero, then there is no matching |
| * entry. |
| * |
| * - Note that the resulting entry needs to be validated to ensure a match. |
| * This is typically done by comparing the key with the key in entry. |
| */ |
| public class PerfectHashBuilder<E> { |
| private static final int RETRY_LIMIT = 1000; |
| |
| private Class<?> entryComponent; |
| private Class<?> bucketComponent; |
| |
| private final Map<String, Entry<E>> map = new LinkedHashMap<>(); |
| private int[] redirect; |
| private Entry<E>[] order; |
| private int count = 0; |
| |
| @SuppressWarnings("EqualsAndHashcode") |
| public static class Entry<E> { |
| private final String key; |
| private final E value; |
| |
| Entry() { |
| this("", null); |
| } |
| |
| Entry(String key, E value) { |
| this.key = key; |
| this.value = value; |
| } |
| |
| String getKey() { |
| return key; |
| } |
| |
| E getValue() { |
| return value; |
| } |
| |
| int hashCode(int seed) { |
| return ImageStringsReader.hashCode(key, seed); |
| } |
| |
| @Override |
| public int hashCode() { |
| return ImageStringsReader.hashCode(key); |
| } |
| |
| @Override |
| public boolean equals(Object other) { |
| if (other == this) { |
| return true; |
| } |
| if (!(other instanceof Entry)) { |
| return false; |
| } |
| Entry<?> entry = (Entry<?>) other; |
| return entry.key.equals(key); |
| } |
| } |
| |
| static class Bucket<E> implements Comparable<Bucket<E>> { |
| final List<Entry<E>> list = new ArrayList<>(); |
| |
| void add(Entry<E> entry) { |
| list.add(entry); |
| } |
| |
| int getSize() { |
| return list.size(); |
| } |
| |
| List<Entry<E>> getList() { |
| return list; |
| } |
| |
| Entry<E> getFirst() { |
| assert !list.isEmpty() : "bucket should never be empty"; |
| return list.get(0); |
| } |
| |
| @Override |
| public int hashCode() { |
| return getFirst().hashCode(); |
| } |
| |
| @Override |
| @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") |
| public boolean equals(Object obj) { |
| return this == obj; |
| } |
| |
| @Override |
| public int compareTo(Bucket<E> o) { |
| return o.getSize() - getSize(); |
| } |
| } |
| |
| public PerfectHashBuilder(Class<?> entryComponent, Class<?> bucketComponent) { |
| this.entryComponent = entryComponent; |
| this.bucketComponent = bucketComponent; |
| } |
| |
| public int getCount() { |
| return map.size(); |
| } |
| |
| public int[] getRedirect() { |
| return redirect.clone(); |
| } |
| |
| public Entry<E>[] getOrder() { |
| return order.clone(); |
| } |
| |
| public Entry<E> put(String key, E value) { |
| return put(new Entry<>(key, value)); |
| } |
| |
| public Entry<E> put(Entry<E> entry) { |
| Entry<E> old = map.put(entry.key, entry); |
| |
| if (old == null) { |
| count++; |
| } |
| |
| return old; |
| } |
| |
| @SuppressWarnings("unchecked") |
| public void generate() { |
| // If the table is empty then exit early. |
| boolean redo = count != 0; |
| |
| // Repeat until a valid packing is achieved. |
| while (redo) { |
| redo = false; |
| |
| // Allocate the resulting redirect and order tables. |
| redirect = new int[count]; |
| order = (Entry<E>[])Array.newInstance(entryComponent, count); |
| |
| // Place all the entries in bucket chains based on hash. Sort by |
| // length of chain. |
| Bucket<E>[] sorted = createBuckets(); |
| int free = 0; |
| |
| // Iterate through the chains, longest first. |
| for (Bucket<E> bucket : sorted) { |
| if (bucket.getSize() != 1) { |
| // Attempt to pack entries until no collisions occur. |
| if (!collidedEntries(bucket, count)) { |
| // Failed to pack. Meed to grow table. |
| redo = true; |
| break; |
| } |
| } else { |
| // A no collision entry (bucket.getSize() == 1). Find a free |
| // spot in the order table. |
| for ( ; free < count && order[free] != null; free++) {} |
| |
| // If none found, then grow table. |
| if (free >= count) { |
| redo = true; |
| break; |
| } |
| |
| // Store entry in order table. |
| order[free] = bucket.getFirst(); |
| // Twos complement of order index stired in the redirect table. |
| redirect[(bucket.hashCode() & 0x7FFFFFFF) % count] = -1 - free; |
| // Update free slot index. |
| free++; |
| } |
| } |
| |
| // If packing failed, then bump table size. Make odd to increase |
| // chances of being relatively prime. |
| if (redo) { |
| count = (count + 1) | 1; |
| } |
| } |
| } |
| |
| @SuppressWarnings("unchecked") |
| private Bucket<E>[] createBuckets() { |
| // Build bucket chains based on key hash. Collisions end up in same chain. |
| Bucket<E>[] buckets = (Bucket<E>[])Array.newInstance(bucketComponent, count); |
| |
| map.values().stream().forEach((entry) -> { |
| int index = (entry.hashCode() & 0x7FFFFFFF) % count; |
| Bucket<E> bucket = buckets[index]; |
| |
| if (bucket == null) { |
| buckets[index] = bucket = new Bucket<>(); |
| } |
| |
| bucket.add(entry); |
| }); |
| |
| // Sort chains, longest first. |
| Bucket<E>[] sorted = Arrays.asList(buckets).stream() |
| .filter((bucket) -> (bucket != null)) |
| .sorted() |
| .toArray((length) -> { |
| return (Bucket<E>[])Array.newInstance(bucketComponent, length); |
| }); |
| |
| return sorted; |
| } |
| |
| private boolean collidedEntries(Bucket<E> bucket, int count) { |
| // Track packing attempts. |
| List<Integer> undo = new ArrayList<>(); |
| // Start with a new hash seed. |
| int seed = ImageStringsReader.HASH_MULTIPLIER + 1; |
| int retry = 0; |
| |
| // Attempt to pack all the entries in a single chain. |
| redo: |
| while (true) { |
| for (Entry<E> entry : bucket.getList()) { |
| // Compute new hash. |
| int index = entry.hashCode(seed) % count; |
| |
| // If a collision is detected. |
| if (order[index] != null) { |
| // Only retry so many times with current table size. |
| if (++retry > RETRY_LIMIT) { |
| return false; |
| } |
| |
| // Undo the attempted packing. |
| undo.stream().forEach((i) -> { |
| order[i] = null; |
| }); |
| |
| // Reset the undo list and bump up the hash seed. |
| undo.clear(); |
| seed++; |
| |
| // Zero seed is not valid. |
| if (seed == 0) { |
| seed = 1; |
| } |
| |
| // Try again. |
| continue redo; |
| } |
| |
| // No collision. |
| order[index] = entry; |
| undo.add(index); |
| } |
| |
| // Entire chain packed. Record hash seed used. |
| redirect[(bucket.hashCode() & 0x7FFFFFFF) % count] = seed; |
| |
| break; |
| } |
| |
| return true; |
| } |
| } |