Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 1 | //===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// |
| 2 | // |
| 3 | // The LLVM Compiler Infrastructure |
| 4 | // |
| 5 | // This file was developed by James M. Laskey and is distributed under |
| 6 | // the University of Illinois Open Source License. See LICENSE.TXT for details. |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | // |
| 10 | // This file implements a hash set that can be used to remove duplication of |
| 11 | // nodes in a graph. This code was originally created by Chris Lattner for use |
| 12 | // with SelectionDAGCSEMap, but was isolated to provide use across the llvm code |
| 13 | // set. |
| 14 | // |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | |
| 17 | #include "llvm/ADT/FoldingSet.h" |
Bill Wendling | 160db5d | 2006-10-27 18:47:29 +0000 | [diff] [blame] | 18 | #include "llvm/Support/MathExtras.h" |
Rafael Espindola | 39c6d3a | 2006-11-03 01:38:14 +0000 | [diff] [blame] | 19 | #include <cassert> |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 20 | using namespace llvm; |
| 21 | |
| 22 | //===----------------------------------------------------------------------===// |
| 23 | // FoldingSetImpl::NodeID Implementation |
| 24 | |
| 25 | /// Add* - Add various data types to Bit data. |
| 26 | /// |
| 27 | void FoldingSetImpl::NodeID::AddPointer(const void *Ptr) { |
| 28 | // Note: this adds pointers to the hash using sizes and endianness that |
| 29 | // depend on the host. It doesn't matter however, because hashing on |
| 30 | // pointer values in inherently unstable. Nothing should depend on the |
| 31 | // ordering of nodes in the folding set. |
| 32 | intptr_t PtrI = (intptr_t)Ptr; |
| 33 | Bits.push_back(unsigned(PtrI)); |
| 34 | if (sizeof(intptr_t) > sizeof(unsigned)) |
| 35 | Bits.push_back(unsigned(uint64_t(PtrI) >> 32)); |
| 36 | } |
| 37 | void FoldingSetImpl::NodeID::AddInteger(signed I) { |
| 38 | Bits.push_back(I); |
| 39 | } |
| 40 | void FoldingSetImpl::NodeID::AddInteger(unsigned I) { |
| 41 | Bits.push_back(I); |
| 42 | } |
Dan Gohman | f82e1e6 | 2007-09-14 20:48:42 +0000 | [diff] [blame] | 43 | void FoldingSetImpl::NodeID::AddInteger(int64_t I) { |
| 44 | AddInteger((uint64_t)I); |
| 45 | } |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 46 | void FoldingSetImpl::NodeID::AddInteger(uint64_t I) { |
| 47 | Bits.push_back(unsigned(I)); |
Chris Lattner | e4116f8 | 2007-02-04 01:48:10 +0000 | [diff] [blame] | 48 | |
| 49 | // If the integer is small, encode it just as 32-bits. |
| 50 | if ((uint64_t)(int)I != I) |
| 51 | Bits.push_back(unsigned(I >> 32)); |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 52 | } |
| 53 | void FoldingSetImpl::NodeID::AddFloat(float F) { |
| 54 | Bits.push_back(FloatToBits(F)); |
| 55 | } |
| 56 | void FoldingSetImpl::NodeID::AddDouble(double D) { |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 57 | AddInteger(DoubleToBits(D)); |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 58 | } |
Dale Johannesen | 9e3d3ab | 2007-09-14 22:26:36 +0000 | [diff] [blame] | 59 | void FoldingSetImpl::NodeID::AddAPFloat(const APFloat& apf) { |
| 60 | APInt api = apf.convertToAPInt(); |
| 61 | const uint64_t *p = api.getRawData(); |
Chris Lattner | 2204906 | 2007-09-14 22:57:00 +0000 | [diff] [blame] | 62 | for (unsigned i=0; i<api.getNumWords(); i++) |
Dale Johannesen | 9e3d3ab | 2007-09-14 22:26:36 +0000 | [diff] [blame] | 63 | AddInteger(*p++); |
| 64 | } |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 65 | void FoldingSetImpl::NodeID::AddString(const std::string &String) { |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 66 | unsigned Size = String.size(); |
Jim Laskey | a97c67c | 2006-10-29 09:19:59 +0000 | [diff] [blame] | 67 | Bits.push_back(Size); |
| 68 | if (!Size) return; |
| 69 | |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 70 | unsigned Units = Size / 4; |
| 71 | unsigned Pos = 0; |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 72 | const unsigned *Base = (const unsigned *)String.data(); |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 73 | |
| 74 | // If the string is aligned do a bulk transfer. |
| 75 | if (!((intptr_t)Base & 3)) { |
Jim Laskey | 2ac33c4 | 2006-10-27 19:38:32 +0000 | [diff] [blame] | 76 | Bits.append(Base, Base + Units); |
Jim Laskey | a97c67c | 2006-10-29 09:19:59 +0000 | [diff] [blame] | 77 | Pos = (Units + 1) * 4; |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 78 | } else { |
| 79 | // Otherwise do it the hard way. |
Jim Laskey | d8cb446 | 2006-10-29 08:27:07 +0000 | [diff] [blame] | 80 | for ( Pos += 4; Pos <= Size; Pos += 4) { |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 81 | unsigned V = ((unsigned char)String[Pos - 4] << 24) | |
| 82 | ((unsigned char)String[Pos - 3] << 16) | |
| 83 | ((unsigned char)String[Pos - 2] << 8) | |
| 84 | (unsigned char)String[Pos - 1]; |
| 85 | Bits.push_back(V); |
| 86 | } |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 87 | } |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 88 | |
| 89 | // With the leftover bits. |
| 90 | unsigned V = 0; |
| 91 | // Pos will have overshot size by 4 - #bytes left over. |
| 92 | switch (Pos - Size) { |
| 93 | case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru. |
| 94 | case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru. |
| 95 | case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; |
Jim Laskey | d8cb446 | 2006-10-29 08:27:07 +0000 | [diff] [blame] | 96 | default: return; // Nothing left. |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 97 | } |
| 98 | |
| 99 | Bits.push_back(V); |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 100 | } |
| 101 | |
| 102 | /// ComputeHash - Compute a strong hash value for this NodeID, used to |
| 103 | /// lookup the node in the FoldingSetImpl. |
| 104 | unsigned FoldingSetImpl::NodeID::ComputeHash() const { |
| 105 | // This is adapted from SuperFastHash by Paul Hsieh. |
| 106 | unsigned Hash = Bits.size(); |
| 107 | for (const unsigned *BP = &Bits[0], *E = BP+Bits.size(); BP != E; ++BP) { |
| 108 | unsigned Data = *BP; |
| 109 | Hash += Data & 0xFFFF; |
| 110 | unsigned Tmp = ((Data >> 16) << 11) ^ Hash; |
| 111 | Hash = (Hash << 16) ^ Tmp; |
| 112 | Hash += Hash >> 11; |
| 113 | } |
| 114 | |
| 115 | // Force "avalanching" of final 127 bits. |
| 116 | Hash ^= Hash << 3; |
| 117 | Hash += Hash >> 5; |
| 118 | Hash ^= Hash << 4; |
| 119 | Hash += Hash >> 17; |
| 120 | Hash ^= Hash << 25; |
| 121 | Hash += Hash >> 6; |
| 122 | return Hash; |
| 123 | } |
| 124 | |
| 125 | /// operator== - Used to compare two nodes to each other. |
| 126 | /// |
| 127 | bool FoldingSetImpl::NodeID::operator==(const FoldingSetImpl::NodeID &RHS)const{ |
| 128 | if (Bits.size() != RHS.Bits.size()) return false; |
| 129 | return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0; |
| 130 | } |
| 131 | |
| 132 | |
| 133 | //===----------------------------------------------------------------------===// |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 134 | /// Helper functions for FoldingSetImpl. |
| 135 | |
| 136 | /// GetNextPtr - In order to save space, each bucket is a |
| 137 | /// singly-linked-list. In order to make deletion more efficient, we make |
| 138 | /// the list circular, so we can delete a node without computing its hash. |
| 139 | /// The problem with this is that the start of the hash buckets are not |
Chris Lattner | 3cab071 | 2007-01-30 23:16:22 +0000 | [diff] [blame] | 140 | /// Nodes. If NextInBucketPtr is a bucket pointer, this method returns null: |
| 141 | /// use GetBucketPtr when this happens. |
Chris Lattner | 9a7288b | 2007-10-03 20:45:43 +0000 | [diff] [blame^] | 142 | static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) { |
| 143 | // The low bit is set if this is the pointer back to the bucket. |
| 144 | if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1) |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 145 | return 0; |
Chris Lattner | 9a7288b | 2007-10-03 20:45:43 +0000 | [diff] [blame^] | 146 | |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 147 | return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr); |
| 148 | } |
| 149 | |
| 150 | /// GetBucketPtr - Provides a casting of a bucket pointer for isNode |
| 151 | /// testing. |
| 152 | static void **GetBucketPtr(void *NextInBucketPtr) { |
Chris Lattner | 9a7288b | 2007-10-03 20:45:43 +0000 | [diff] [blame^] | 153 | intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr); |
| 154 | return reinterpret_cast<void**>(Ptr & ~intptr_t(1)); |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 155 | } |
| 156 | |
| 157 | /// GetBucketFor - Hash the specified node ID and return the hash bucket for |
| 158 | /// the specified ID. |
| 159 | static void **GetBucketFor(const FoldingSetImpl::NodeID &ID, |
| 160 | void **Buckets, unsigned NumBuckets) { |
| 161 | // NumBuckets is always a power of 2. |
| 162 | unsigned BucketNum = ID.ComputeHash() & (NumBuckets-1); |
| 163 | return Buckets + BucketNum; |
| 164 | } |
| 165 | |
| 166 | //===----------------------------------------------------------------------===// |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 167 | // FoldingSetImpl Implementation |
| 168 | |
Jim Laskey | 1f67a99 | 2006-11-02 14:21:26 +0000 | [diff] [blame] | 169 | FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) : NumNodes(0) { |
| 170 | assert(5 < Log2InitSize && Log2InitSize < 32 && |
| 171 | "Initial hash table size out of range"); |
| 172 | NumBuckets = 1 << Log2InitSize; |
Chris Lattner | 9a7288b | 2007-10-03 20:45:43 +0000 | [diff] [blame^] | 173 | Buckets = new void*[NumBuckets+1]; |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 174 | memset(Buckets, 0, NumBuckets*sizeof(void*)); |
Chris Lattner | 9a7288b | 2007-10-03 20:45:43 +0000 | [diff] [blame^] | 175 | |
| 176 | // Set the very last bucket to be a non-null "pointer". |
| 177 | Buckets[NumBuckets] = reinterpret_cast<void*>(-2); |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 178 | } |
| 179 | FoldingSetImpl::~FoldingSetImpl() { |
| 180 | delete [] Buckets; |
| 181 | } |
| 182 | |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 183 | /// GrowHashTable - Double the size of the hash table and rehash everything. |
| 184 | /// |
| 185 | void FoldingSetImpl::GrowHashTable() { |
| 186 | void **OldBuckets = Buckets; |
| 187 | unsigned OldNumBuckets = NumBuckets; |
| 188 | NumBuckets <<= 1; |
| 189 | |
| 190 | // Reset the node count to zero: we're going to reinsert everything. |
| 191 | NumNodes = 0; |
| 192 | |
| 193 | // Clear out new buckets. |
Chris Lattner | 9a7288b | 2007-10-03 20:45:43 +0000 | [diff] [blame^] | 194 | Buckets = new void*[NumBuckets+1]; |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 195 | memset(Buckets, 0, NumBuckets*sizeof(void*)); |
| 196 | |
Chris Lattner | 9a7288b | 2007-10-03 20:45:43 +0000 | [diff] [blame^] | 197 | // Set the very last bucket to be a non-null "pointer". |
| 198 | Buckets[NumBuckets] = reinterpret_cast<void*>(-1); |
| 199 | |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 200 | // Walk the old buckets, rehashing nodes into their new place. |
| 201 | for (unsigned i = 0; i != OldNumBuckets; ++i) { |
| 202 | void *Probe = OldBuckets[i]; |
| 203 | if (!Probe) continue; |
Chris Lattner | 9a7288b | 2007-10-03 20:45:43 +0000 | [diff] [blame^] | 204 | while (Node *NodeInBucket = GetNextPtr(Probe)) { |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 205 | // Figure out the next link, remove NodeInBucket from the old link. |
| 206 | Probe = NodeInBucket->getNextInBucket(); |
| 207 | NodeInBucket->SetNextInBucket(0); |
| 208 | |
| 209 | // Insert the node into the new bucket, after recomputing the hash. |
| 210 | NodeID ID; |
| 211 | GetNodeProfile(ID, NodeInBucket); |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 212 | InsertNode(NodeInBucket, GetBucketFor(ID, Buckets, NumBuckets)); |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 213 | } |
| 214 | } |
| 215 | |
| 216 | delete[] OldBuckets; |
| 217 | } |
| 218 | |
| 219 | /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, |
| 220 | /// return it. If not, return the insertion token that will make insertion |
| 221 | /// faster. |
| 222 | FoldingSetImpl::Node *FoldingSetImpl::FindNodeOrInsertPos(const NodeID &ID, |
| 223 | void *&InsertPos) { |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 224 | void **Bucket = GetBucketFor(ID, Buckets, NumBuckets); |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 225 | void *Probe = *Bucket; |
| 226 | |
| 227 | InsertPos = 0; |
| 228 | |
Chris Lattner | 9a7288b | 2007-10-03 20:45:43 +0000 | [diff] [blame^] | 229 | while (Node *NodeInBucket = GetNextPtr(Probe)) { |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 230 | NodeID OtherID; |
| 231 | GetNodeProfile(OtherID, NodeInBucket); |
| 232 | if (OtherID == ID) |
| 233 | return NodeInBucket; |
| 234 | |
| 235 | Probe = NodeInBucket->getNextInBucket(); |
| 236 | } |
| 237 | |
| 238 | // Didn't find the node, return null with the bucket as the InsertPos. |
| 239 | InsertPos = Bucket; |
| 240 | return 0; |
| 241 | } |
| 242 | |
| 243 | /// InsertNode - Insert the specified node into the folding set, knowing that it |
| 244 | /// is not already in the map. InsertPos must be obtained from |
| 245 | /// FindNodeOrInsertPos. |
| 246 | void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { |
Chris Lattner | 0de4439 | 2007-02-01 05:33:21 +0000 | [diff] [blame] | 247 | assert(N->getNextInBucket() == 0); |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 248 | // Do we need to grow the hashtable? |
Chris Lattner | b85210f | 2007-01-31 06:04:41 +0000 | [diff] [blame] | 249 | if (NumNodes+1 > NumBuckets*2) { |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 250 | GrowHashTable(); |
| 251 | NodeID ID; |
| 252 | GetNodeProfile(ID, N); |
Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 253 | InsertPos = GetBucketFor(ID, Buckets, NumBuckets); |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 254 | } |
Chris Lattner | b85210f | 2007-01-31 06:04:41 +0000 | [diff] [blame] | 255 | |
| 256 | ++NumNodes; |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 257 | |
| 258 | /// The insert position is actually a bucket pointer. |
| 259 | void **Bucket = static_cast<void**>(InsertPos); |
| 260 | |
| 261 | void *Next = *Bucket; |
| 262 | |
| 263 | // If this is the first insertion into this bucket, its next pointer will be |
Chris Lattner | 9a7288b | 2007-10-03 20:45:43 +0000 | [diff] [blame^] | 264 | // null. Pretend as if it pointed to itself, setting the low bit to indicate |
| 265 | // that it is a pointer to the bucket. |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 266 | if (Next == 0) |
Chris Lattner | 9a7288b | 2007-10-03 20:45:43 +0000 | [diff] [blame^] | 267 | Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1); |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 268 | |
Chris Lattner | b85210f | 2007-01-31 06:04:41 +0000 | [diff] [blame] | 269 | // Set the node's next pointer, and make the bucket point to the node. |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 270 | N->SetNextInBucket(Next); |
| 271 | *Bucket = N; |
| 272 | } |
| 273 | |
| 274 | /// RemoveNode - Remove a node from the folding set, returning true if one was |
| 275 | /// removed or false if the node was not in the folding set. |
| 276 | bool FoldingSetImpl::RemoveNode(Node *N) { |
| 277 | // Because each bucket is a circular list, we don't need to compute N's hash |
Chris Lattner | 0de4439 | 2007-02-01 05:33:21 +0000 | [diff] [blame] | 278 | // to remove it. |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 279 | void *Ptr = N->getNextInBucket(); |
| 280 | if (Ptr == 0) return false; // Not in folding set. |
| 281 | |
| 282 | --NumNodes; |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 283 | N->SetNextInBucket(0); |
Chris Lattner | 0de4439 | 2007-02-01 05:33:21 +0000 | [diff] [blame] | 284 | |
| 285 | // Remember what N originally pointed to, either a bucket or another node. |
| 286 | void *NodeNextPtr = Ptr; |
| 287 | |
| 288 | // Chase around the list until we find the node (or bucket) which points to N. |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 289 | while (true) { |
Chris Lattner | 9a7288b | 2007-10-03 20:45:43 +0000 | [diff] [blame^] | 290 | if (Node *NodeInBucket = GetNextPtr(Ptr)) { |
Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 291 | // Advance pointer. |
| 292 | Ptr = NodeInBucket->getNextInBucket(); |
| 293 | |
| 294 | // We found a node that points to N, change it to point to N's next node, |
| 295 | // removing N from the list. |
| 296 | if (Ptr == N) { |
| 297 | NodeInBucket->SetNextInBucket(NodeNextPtr); |
| 298 | return true; |
| 299 | } |
| 300 | } else { |
| 301 | void **Bucket = GetBucketPtr(Ptr); |
| 302 | Ptr = *Bucket; |
| 303 | |
| 304 | // If we found that the bucket points to N, update the bucket to point to |
| 305 | // whatever is next. |
| 306 | if (Ptr == N) { |
| 307 | *Bucket = NodeNextPtr; |
| 308 | return true; |
| 309 | } |
| 310 | } |
| 311 | } |
| 312 | } |
| 313 | |
| 314 | /// GetOrInsertNode - If there is an existing simple Node exactly |
| 315 | /// equal to the specified node, return it. Otherwise, insert 'N' and it |
| 316 | /// instead. |
| 317 | FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { |
| 318 | NodeID ID; |
| 319 | GetNodeProfile(ID, N); |
| 320 | void *IP; |
| 321 | if (Node *E = FindNodeOrInsertPos(ID, IP)) |
| 322 | return E; |
| 323 | InsertNode(N, IP); |
| 324 | return N; |
| 325 | } |