Add a basic hashtable data structure, with tests!

The basic hashtable is intended to be used to support a variety
of different datastructures such as map, set, multimap,
multiset, linkedmap, generationcache, etc.

Consequently its interface is fairly primitive.

The basic hashtable supports copy-on-write style functionality
using SharedBuffer.

The change introduces a simple generic function in TypeHelpers for
specifying hash functions.  The idea is to add template
specializations of hash_type<T> next to the relevant data structures
such as String8, String16, sp<T>, etc.

Change-Id: I2c479229e9d4527b4fbfe3b8b04776a2fd32c973
diff --git a/include/utils/BasicHashtable.h b/include/utils/BasicHashtable.h
new file mode 100644
index 0000000..fdf9738
--- /dev/null
+++ b/include/utils/BasicHashtable.h
@@ -0,0 +1,393 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_BASIC_HASHTABLE_H
+#define ANDROID_BASIC_HASHTABLE_H
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <utils/SharedBuffer.h>
+#include <utils/TypeHelpers.h>
+
+namespace android {
+
+/* Implementation type.  Nothing to see here. */
+class BasicHashtableImpl {
+protected:
+    struct Bucket {
+        // The collision flag indicates that the bucket is part of a collision chain
+        // such that at least two entries both hash to this bucket.  When true, we
+        // may need to seek further along the chain to find the entry.
+        static const uint32_t COLLISION = 0x80000000UL;
+
+        // The present flag indicates that the bucket contains an initialized entry value.
+        static const uint32_t PRESENT   = 0x40000000UL;
+
+        // Mask for 30 bits worth of the hash code that are stored within the bucket to
+        // speed up lookups and rehashing by eliminating the need to recalculate the
+        // hash code of the entry's key.
+        static const uint32_t HASH_MASK = 0x3fffffffUL;
+
+        // Combined value that stores the collision and present flags as well as
+        // a 30 bit hash code.
+        uint32_t cookie;
+
+        // Storage for the entry begins here.
+        char entry[0];
+    };
+
+    BasicHashtableImpl(size_t entrySize, bool hasTrivialDestructor,
+            size_t minimumInitialCapacity, float loadFactor);
+    BasicHashtableImpl(const BasicHashtableImpl& other);
+
+    void dispose();
+
+    inline void edit() {
+        if (mBuckets && !SharedBuffer::bufferFromData(mBuckets)->onlyOwner()) {
+            clone();
+        }
+    }
+
+    void setTo(const BasicHashtableImpl& other);
+    void clear();
+
+    ssize_t next(ssize_t index) const;
+    ssize_t find(ssize_t index, hash_t hash, const void* __restrict__ key) const;
+    size_t add(hash_t hash, const void* __restrict__ entry);
+    void removeAt(size_t index);
+    void rehash(size_t minimumCapacity, float loadFactor);
+
+    const size_t mBucketSize; // number of bytes per bucket including the entry
+    const bool mHasTrivialDestructor; // true if the entry type does not require destruction
+    size_t mCapacity;         // number of buckets that can be filled before exceeding load factor
+    float mLoadFactor;        // load factor
+    size_t mSize;             // number of elements actually in the table
+    size_t mFilledBuckets;    // number of buckets for which collision or present is true
+    size_t mBucketCount;      // number of slots in the mBuckets array
+    void* mBuckets;           // array of buckets, as a SharedBuffer
+
+    inline const Bucket& bucketAt(const void* __restrict__ buckets, size_t index) const {
+        return *reinterpret_cast<const Bucket*>(
+                static_cast<const uint8_t*>(buckets) + index * mBucketSize);
+    }
+
+    inline Bucket& bucketAt(void* __restrict__ buckets, size_t index) const {
+        return *reinterpret_cast<Bucket*>(static_cast<uint8_t*>(buckets) + index * mBucketSize);
+    }
+
+    virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const = 0;
+    virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const = 0;
+    virtual void destroyBucketEntry(Bucket& bucket) const = 0;
+
+private:
+    void clone();
+
+    // Allocates a bucket array as a SharedBuffer.
+    void* allocateBuckets(size_t count) const;
+
+    // Releases a bucket array's associated SharedBuffer.
+    void releaseBuckets(void* __restrict__ buckets, size_t count) const;
+
+    // Destroys the contents of buckets (invokes destroyBucketEntry for each
+    // populated bucket if needed).
+    void destroyBuckets(void* __restrict__ buckets, size_t count) const;
+
+    // Copies the content of buckets (copies the cookie and invokes copyBucketEntry
+    // for each populated bucket if needed).
+    void copyBuckets(const void* __restrict__ fromBuckets,
+            void* __restrict__ toBuckets, size_t count) const;
+
+    // Determines the appropriate size of a bucket array to store a certain minimum
+    // number of entries and returns its effective capacity.
+    static void determineCapacity(size_t minimumCapacity, float loadFactor,
+            size_t* __restrict__ outBucketCount, size_t* __restrict__ outCapacity);
+
+    // Trim a hash code to 30 bits to match what we store in the bucket's cookie.
+    inline static hash_t trimHash(hash_t hash) {
+        return (hash & Bucket::HASH_MASK) ^ (hash >> 30);
+    }
+
+    // Returns the index of the first bucket that is in the collision chain
+    // for the specified hash code, given the total number of buckets.
+    // (Primary hash)
+    inline static size_t chainStart(hash_t hash, size_t count) {
+        return hash % count;
+    }
+
+    // Returns the increment to add to a bucket index to seek to the next bucket
+    // in the collision chain for the specified hash code, given the total number of buckets.
+    // (Secondary hash)
+    inline static size_t chainIncrement(hash_t hash, size_t count) {
+        return ((hash >> 7) | (hash << 25)) % (count - 1) + 1;
+    }
+
+    // Returns the index of the next bucket that is in the collision chain
+    // that is defined by the specified increment, given the total number of buckets.
+    inline static size_t chainSeek(size_t index, size_t increment, size_t count) {
+        return (index + increment) % count;
+    }
+};
+
+/*
+ * A BasicHashtable stores entries that are indexed by hash code in place
+ * within an array.  The basic operations are finding entries by key,
+ * adding new entries and removing existing entries.
+ *
+ * This class provides a very limited set of operations with simple semantics.
+ * It is intended to be used as a building block to construct more complex
+ * and interesting data structures such as HashMap.  Think very hard before
+ * adding anything extra to BasicHashtable, it probably belongs at a
+ * higher level of abstraction.
+ *
+ * TKey: The key type.
+ * TEntry: The entry type which is what is actually stored in the array.
+ *
+ * TKey must support the following contract:
+ *     bool operator==(const TKey& other) const;  // return true if equal
+ *     bool operator!=(const TKey& other) const;  // return true if unequal
+ *
+ * TEntry must support the following contract:
+ *     const TKey& getKey() const;  // get the key from the entry
+ *
+ * This class supports storing entries with duplicate keys.  Of course, it can't
+ * tell them apart during removal so only the first entry will be removed.
+ * We do this because it means that operations like add() can't fail.
+ */
+template <typename TKey, typename TEntry>
+class BasicHashtable : private BasicHashtableImpl {
+public:
+    /* Creates a hashtable with the specified minimum initial capacity.
+     * The underlying array will be created when the first entry is added.
+     *
+     * minimumInitialCapacity: The minimum initial capacity for the hashtable.
+     *     Default is 0.
+     * loadFactor: The desired load factor for the hashtable, between 0 and 1.
+     *     Default is 0.75.
+     */
+    BasicHashtable(size_t minimumInitialCapacity = 0, float loadFactor = 0.75f);
+
+    /* Copies a hashtable.
+     * The underlying storage is shared copy-on-write.
+     */
+    BasicHashtable(const BasicHashtable& other);
+
+    /* Clears and destroys the hashtable.
+     */
+    virtual ~BasicHashtable();
+
+    /* Making this hashtable a copy of the other hashtable.
+     * The underlying storage is shared copy-on-write.
+     *
+     * other: The hashtable to copy.
+     */
+    inline BasicHashtable<TKey, TEntry>& operator =(const BasicHashtable<TKey, TEntry> & other) {
+        setTo(other);
+        return *this;
+    }
+
+    /* Returns the number of entries in the hashtable.
+     */
+    inline size_t size() const {
+        return mSize;
+    }
+
+    /* Returns the capacity of the hashtable, which is the number of elements that can
+     * added to the hashtable without requiring it to be grown.
+     */
+    inline size_t capacity() const {
+        return mCapacity;
+    }
+
+    /* Returns the number of buckets that the hashtable has, which is the size of its
+     * underlying array.
+     */
+    inline size_t bucketCount() const {
+        return mBucketCount;
+    }
+
+    /* Returns the load factor of the hashtable. */
+    inline float loadFactor() const {
+        return mLoadFactor;
+    };
+
+    /* Returns a const reference to the entry at the specified index.
+     *
+     * index:   The index of the entry to retrieve.  Must be a valid index within
+     *          the bounds of the hashtable.
+     */
+    inline const TEntry& entryAt(size_t index) const {
+        return entryFor(bucketAt(mBuckets, index));
+    }
+
+    /* Returns a non-const reference to the entry at the specified index.
+     *
+     * index: The index of the entry to edit.  Must be a valid index within
+     *        the bounds of the hashtable.
+     */
+    inline TEntry& editEntryAt(size_t index) {
+        edit();
+        return entryFor(bucketAt(mBuckets, index));
+    }
+
+    /* Clears the hashtable.
+     * All entries in the hashtable are destroyed immediately.
+     * If you need to do something special with the entries in the hashtable then iterate
+     * over them and do what you need before clearing the hashtable.
+     */
+    inline void clear() {
+        BasicHashtableImpl::clear();
+    }
+
+    /* Returns the index of the next entry in the hashtable given the index of a previous entry.
+     * If the given index is -1, then returns the index of the first entry in the hashtable,
+     * if there is one, or -1 otherwise.
+     * If the given index is not -1, then returns the index of the next entry in the hashtable,
+     * in strictly increasing order, or -1 if there are none left.
+     *
+     * index:   The index of the previous entry that was iterated, or -1 to begin
+     *          iteration at the beginning of the hashtable.
+     */
+    inline ssize_t next(ssize_t index) const {
+        return BasicHashtableImpl::next(index);
+    }
+
+    /* Finds the index of an entry with the specified key.
+     * If the given index is -1, then returns the index of the first matching entry,
+     * otherwise returns the index of the next matching entry.
+     * If the hashtable contains multiple entries with keys that match the requested
+     * key, then the sequence of entries returned is arbitrary.
+     * Returns -1 if no entry was found.
+     *
+     * index:   The index of the previous entry with the specified key, or -1 to
+     *          find the first matching entry.
+     * hash:    The hashcode of the key.
+     * key:     The key.
+     */
+    inline ssize_t find(ssize_t index, hash_t hash, const TKey& key) const {
+        return BasicHashtableImpl::find(index, hash, &key);
+    }
+
+    /* Adds the entry to the hashtable.
+     * Returns the index of the newly added entry.
+     * If an entry with the same key already exists, then a duplicate entry is added.
+     * If the entry will not fit, then the hashtable's capacity is increased and
+     * its contents are rehashed.  See rehash().
+     *
+     * hash:    The hashcode of the key.
+     * entry:   The entry to add.
+     */
+    inline size_t add(hash_t hash, const TEntry& entry) {
+        return BasicHashtableImpl::add(hash, &entry);
+    }
+
+    /* Removes the entry with the specified index from the hashtable.
+     * The entry is destroyed immediately.
+     * The index must be valid.
+     *
+     * The hashtable is not compacted after an item is removed, so it is legal
+     * to continue iterating over the hashtable using next() or find().
+     *
+     * index:   The index of the entry to remove.  Must be a valid index within the
+     *          bounds of the hashtable, and it must refer to an existing entry.
+     */
+    inline void removeAt(size_t index) {
+        BasicHashtableImpl::removeAt(index);
+    }
+
+    /* Rehashes the contents of the hashtable.
+     * Grows the hashtable to at least the specified minimum capacity or the
+     * current number of elements, whichever is larger.
+     *
+     * Rehashing causes all entries to be copied and the entry indices may change.
+     * Although the hash codes are cached by the hashtable, rehashing can be an
+     * expensive operation and should be avoided unless the hashtable's size
+     * needs to be changed.
+     *
+     * Rehashing is the only way to change the capacity or load factor of the
+     * hashtable once it has been created.  It can be used to compact the
+     * hashtable by choosing a minimum capacity that is smaller than the current
+     * capacity (such as 0).
+     *
+     * minimumCapacity: The desired minimum capacity after rehashing.
+     * loadFactor: The desired load factor after rehashing.
+     */
+    inline void rehash(size_t minimumCapacity, float loadFactor) {
+        BasicHashtableImpl::rehash(minimumCapacity, loadFactor);
+    }
+
+protected:
+    static inline const TEntry& entryFor(const Bucket& bucket) {
+        return reinterpret_cast<const TEntry&>(bucket.entry);
+    }
+
+    static inline TEntry& entryFor(Bucket& bucket) {
+        return reinterpret_cast<TEntry&>(bucket.entry);
+    }
+
+    virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const;
+    virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const;
+    virtual void destroyBucketEntry(Bucket& bucket) const;
+
+private:
+    // For dumping the raw contents of a hashtable during testing.
+    friend class BasicHashtableTest;
+    inline uint32_t cookieAt(size_t index) const {
+        return bucketAt(mBuckets, index).cookie;
+    }
+};
+
+template <typename TKey, typename TEntry>
+BasicHashtable<TKey, TEntry>::BasicHashtable(size_t minimumInitialCapacity, float loadFactor) :
+        BasicHashtableImpl(sizeof(TEntry), traits<TEntry>::has_trivial_dtor,
+                minimumInitialCapacity, loadFactor) {
+}
+
+template <typename TKey, typename TEntry>
+BasicHashtable<TKey, TEntry>::BasicHashtable(const BasicHashtable<TKey, TEntry>& other) :
+        BasicHashtableImpl(other) {
+}
+
+template <typename TKey, typename TEntry>
+BasicHashtable<TKey, TEntry>::~BasicHashtable() {
+    dispose();
+}
+
+template <typename TKey, typename TEntry>
+bool BasicHashtable<TKey, TEntry>::compareBucketKey(const Bucket& bucket,
+        const void* __restrict__ key) const {
+    return entryFor(bucket).getKey() == *static_cast<const TKey*>(key);
+}
+
+template <typename TKey, typename TEntry>
+void BasicHashtable<TKey, TEntry>::initializeBucketEntry(Bucket& bucket,
+        const void* __restrict__ entry) const {
+    if (!traits<TEntry>::has_trivial_copy) {
+        new (&entryFor(bucket)) TEntry(*(static_cast<const TEntry*>(entry)));
+    } else {
+        memcpy(&entryFor(bucket), entry, sizeof(TEntry));
+    }
+}
+
+template <typename TKey, typename TEntry>
+void BasicHashtable<TKey, TEntry>::destroyBucketEntry(Bucket& bucket) const {
+    if (!traits<TEntry>::has_trivial_dtor) {
+        entryFor(bucket).~TEntry();
+    }
+}
+
+}; // namespace android
+
+#endif // ANDROID_BASIC_HASHTABLE_H
diff --git a/include/utils/TypeHelpers.h b/include/utils/TypeHelpers.h
index a1663f3..7538817 100644
--- a/include/utils/TypeHelpers.h
+++ b/include/utils/TypeHelpers.h
@@ -213,6 +213,9 @@
 
 template <typename KEY, typename VALUE>
 struct key_value_pair_t {
+    typedef KEY key_t;
+    typedef VALUE value_t;
+
     KEY     key;
     VALUE   value;
     key_value_pair_t() { }
@@ -222,6 +225,12 @@
     inline bool operator < (const key_value_pair_t& o) const {
         return strictly_order_type(key, o.key);
     }
+    inline const KEY& getKey() const {
+        return key;
+    }
+    inline const VALUE& getValue() const {
+        return value;
+    }
 };
 
 template<>
@@ -243,6 +252,41 @@
 
 // ---------------------------------------------------------------------------
 
+/*
+ * Hash codes.
+ */
+typedef uint32_t hash_t;
+
+template <typename TKey>
+hash_t hash_type(const TKey& key);
+
+/* Built-in hash code specializations.
+ * Assumes pointers are 32bit. */
+#define ANDROID_INT32_HASH(T) \
+        template <> inline hash_t hash_type(const T& value) { return hash_t(value); }
+#define ANDROID_INT64_HASH(T) \
+        template <> inline hash_t hash_type(const T& value) { \
+                return hash_t((value >> 32) ^ value); }
+#define ANDROID_REINTERPRET_HASH(T, R) \
+        template <> inline hash_t hash_type(const T& value) { \
+                return hash_type(*reinterpret_cast<const R*>(&value)); }
+
+ANDROID_INT32_HASH(bool)
+ANDROID_INT32_HASH(char)
+ANDROID_INT32_HASH(unsigned char)
+ANDROID_INT32_HASH(short)
+ANDROID_INT32_HASH(unsigned short)
+ANDROID_INT32_HASH(int)
+ANDROID_INT32_HASH(unsigned int)
+ANDROID_INT64_HASH(long)
+ANDROID_INT64_HASH(unsigned long)
+ANDROID_REINTERPRET_HASH(float, uint32_t)
+ANDROID_REINTERPRET_HASH(double, uint64_t)
+
+template <typename T> inline hash_t hash_type(const T*& value) {
+    return hash_type(uintptr_t(value));
+}
+
 }; // namespace android
 
 // ---------------------------------------------------------------------------