| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 1 | //===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===// | 
 | 2 | // | 
 | 3 | //                     The LLVM Compiler Infrastructure | 
 | 4 | // | 
 | 5 | // This file was developed by James M. Laskey and is distributed under | 
 | 6 | // the University of Illinois Open Source License. See LICENSE.TXT for details. | 
 | 7 | // | 
 | 8 | //===----------------------------------------------------------------------===// | 
 | 9 | // | 
 | 10 | // This file implements a hash set that can be used to remove duplication of | 
 | 11 | // nodes in a graph.  This code was originally created by Chris Lattner for use | 
 | 12 | // with SelectionDAGCSEMap, but was isolated to provide use across the llvm code | 
 | 13 | // set.  | 
 | 14 | // | 
 | 15 | //===----------------------------------------------------------------------===// | 
 | 16 |  | 
 | 17 | #include "llvm/ADT/FoldingSet.h" | 
| Bill Wendling | 160db5d | 2006-10-27 18:47:29 +0000 | [diff] [blame] | 18 | #include "llvm/Support/MathExtras.h" | 
| Rafael Espindola | 39c6d3a | 2006-11-03 01:38:14 +0000 | [diff] [blame] | 19 | #include <cassert> | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 20 | using namespace llvm; | 
 | 21 |  | 
 | 22 | //===----------------------------------------------------------------------===// | 
 | 23 | // FoldingSetImpl::NodeID Implementation | 
 | 24 |  | 
 | 25 | /// Add* - Add various data types to Bit data. | 
 | 26 | /// | 
 | 27 | void FoldingSetImpl::NodeID::AddPointer(const void *Ptr) { | 
 | 28 |   // Note: this adds pointers to the hash using sizes and endianness that | 
 | 29 |   // depend on the host.  It doesn't matter however, because hashing on | 
 | 30 |   // pointer values in inherently unstable.  Nothing  should depend on the  | 
 | 31 |   // ordering of nodes in the folding set. | 
 | 32 |   intptr_t PtrI = (intptr_t)Ptr; | 
 | 33 |   Bits.push_back(unsigned(PtrI)); | 
 | 34 |   if (sizeof(intptr_t) > sizeof(unsigned)) | 
 | 35 |     Bits.push_back(unsigned(uint64_t(PtrI) >> 32)); | 
 | 36 | } | 
 | 37 | void FoldingSetImpl::NodeID::AddInteger(signed I) { | 
 | 38 |   Bits.push_back(I); | 
 | 39 | } | 
 | 40 | void FoldingSetImpl::NodeID::AddInteger(unsigned I) { | 
 | 41 |   Bits.push_back(I); | 
 | 42 | } | 
| Dan Gohman | f82e1e6 | 2007-09-14 20:48:42 +0000 | [diff] [blame] | 43 | void FoldingSetImpl::NodeID::AddInteger(int64_t I) { | 
 | 44 |   AddInteger((uint64_t)I); | 
 | 45 | } | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 46 | void FoldingSetImpl::NodeID::AddInteger(uint64_t I) { | 
 | 47 |   Bits.push_back(unsigned(I)); | 
| Chris Lattner | e4116f8 | 2007-02-04 01:48:10 +0000 | [diff] [blame] | 48 |    | 
 | 49 |   // If the integer is small, encode it just as 32-bits. | 
 | 50 |   if ((uint64_t)(int)I != I) | 
 | 51 |     Bits.push_back(unsigned(I >> 32)); | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 52 | } | 
 | 53 | void FoldingSetImpl::NodeID::AddFloat(float F) { | 
 | 54 |   Bits.push_back(FloatToBits(F)); | 
 | 55 | } | 
 | 56 | void FoldingSetImpl::NodeID::AddDouble(double D) { | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 57 |  AddInteger(DoubleToBits(D)); | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 58 | } | 
| Dale Johannesen | 9e3d3ab | 2007-09-14 22:26:36 +0000 | [diff] [blame^] | 59 | void FoldingSetImpl::NodeID::AddAPFloat(const APFloat& apf) { | 
 | 60 |   APInt api = apf.convertToAPInt(); | 
 | 61 |   const uint64_t *p = api.getRawData(); | 
 | 62 |   for (int i=0; i<api.getNumWords(); i++) | 
 | 63 |     AddInteger(*p++); | 
 | 64 | } | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 65 | void FoldingSetImpl::NodeID::AddString(const std::string &String) { | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 66 |   unsigned Size = String.size(); | 
| Jim Laskey | a97c67c | 2006-10-29 09:19:59 +0000 | [diff] [blame] | 67 |   Bits.push_back(Size); | 
 | 68 |   if (!Size) return; | 
 | 69 |  | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 70 |   unsigned Units = Size / 4; | 
 | 71 |   unsigned Pos = 0; | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 72 |   const unsigned *Base = (const unsigned *)String.data(); | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 73 |    | 
 | 74 |   // If the string is aligned do a bulk transfer. | 
 | 75 |   if (!((intptr_t)Base & 3)) { | 
| Jim Laskey | 2ac33c4 | 2006-10-27 19:38:32 +0000 | [diff] [blame] | 76 |     Bits.append(Base, Base + Units); | 
| Jim Laskey | a97c67c | 2006-10-29 09:19:59 +0000 | [diff] [blame] | 77 |     Pos = (Units + 1) * 4; | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 78 |   } else { | 
 | 79 |     // Otherwise do it the hard way. | 
| Jim Laskey | d8cb446 | 2006-10-29 08:27:07 +0000 | [diff] [blame] | 80 |     for ( Pos += 4; Pos <= Size; Pos += 4) { | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 81 |       unsigned V = ((unsigned char)String[Pos - 4] << 24) | | 
 | 82 |                    ((unsigned char)String[Pos - 3] << 16) | | 
 | 83 |                    ((unsigned char)String[Pos - 2] << 8) | | 
 | 84 |                     (unsigned char)String[Pos - 1]; | 
 | 85 |       Bits.push_back(V); | 
 | 86 |     } | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 87 |   } | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 88 |    | 
 | 89 |   // With the leftover bits. | 
 | 90 |   unsigned V = 0; | 
 | 91 |   // Pos will have overshot size by 4 - #bytes left over.  | 
 | 92 |   switch (Pos - Size) { | 
 | 93 |   case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru. | 
 | 94 |   case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru. | 
 | 95 |   case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break; | 
| Jim Laskey | d8cb446 | 2006-10-29 08:27:07 +0000 | [diff] [blame] | 96 |   default: return; // Nothing left. | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 97 |   } | 
 | 98 |  | 
 | 99 |   Bits.push_back(V); | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 100 | } | 
 | 101 |  | 
 | 102 | /// ComputeHash - Compute a strong hash value for this NodeID, used to  | 
 | 103 | /// lookup the node in the FoldingSetImpl. | 
 | 104 | unsigned FoldingSetImpl::NodeID::ComputeHash() const { | 
 | 105 |   // This is adapted from SuperFastHash by Paul Hsieh. | 
 | 106 |   unsigned Hash = Bits.size(); | 
 | 107 |   for (const unsigned *BP = &Bits[0], *E = BP+Bits.size(); BP != E; ++BP) { | 
 | 108 |     unsigned Data = *BP; | 
 | 109 |     Hash         += Data & 0xFFFF; | 
 | 110 |     unsigned Tmp  = ((Data >> 16) << 11) ^ Hash; | 
 | 111 |     Hash          = (Hash << 16) ^ Tmp; | 
 | 112 |     Hash         += Hash >> 11; | 
 | 113 |   } | 
 | 114 |    | 
 | 115 |   // Force "avalanching" of final 127 bits. | 
 | 116 |   Hash ^= Hash << 3; | 
 | 117 |   Hash += Hash >> 5; | 
 | 118 |   Hash ^= Hash << 4; | 
 | 119 |   Hash += Hash >> 17; | 
 | 120 |   Hash ^= Hash << 25; | 
 | 121 |   Hash += Hash >> 6; | 
 | 122 |   return Hash; | 
 | 123 | } | 
 | 124 |  | 
 | 125 | /// operator== - Used to compare two nodes to each other. | 
 | 126 | /// | 
 | 127 | bool FoldingSetImpl::NodeID::operator==(const FoldingSetImpl::NodeID &RHS)const{ | 
 | 128 |   if (Bits.size() != RHS.Bits.size()) return false; | 
 | 129 |   return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0; | 
 | 130 | } | 
 | 131 |  | 
 | 132 |  | 
 | 133 | //===----------------------------------------------------------------------===// | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 134 | /// Helper functions for FoldingSetImpl. | 
 | 135 |  | 
 | 136 | /// GetNextPtr - In order to save space, each bucket is a | 
 | 137 | /// singly-linked-list. In order to make deletion more efficient, we make | 
 | 138 | /// the list circular, so we can delete a node without computing its hash. | 
 | 139 | /// The problem with this is that the start of the hash buckets are not | 
| Chris Lattner | 3cab071 | 2007-01-30 23:16:22 +0000 | [diff] [blame] | 140 | /// Nodes.  If NextInBucketPtr is a bucket pointer, this method returns null: | 
 | 141 | /// use GetBucketPtr when this happens. | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 142 | static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr, | 
 | 143 |                                         void **Buckets, unsigned NumBuckets) { | 
 | 144 |   if (NextInBucketPtr >= Buckets && NextInBucketPtr < Buckets + NumBuckets) | 
 | 145 |     return 0; | 
 | 146 |   return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr); | 
 | 147 | } | 
 | 148 |  | 
 | 149 | /// GetBucketPtr - Provides a casting of a bucket pointer for isNode | 
 | 150 | /// testing. | 
 | 151 | static void **GetBucketPtr(void *NextInBucketPtr) { | 
 | 152 |   return static_cast<void**>(NextInBucketPtr); | 
 | 153 | } | 
 | 154 |  | 
 | 155 | /// GetBucketFor - Hash the specified node ID and return the hash bucket for | 
 | 156 | /// the specified ID. | 
 | 157 | static void **GetBucketFor(const FoldingSetImpl::NodeID &ID, | 
 | 158 |                            void **Buckets, unsigned NumBuckets) { | 
 | 159 |   // NumBuckets is always a power of 2. | 
 | 160 |   unsigned BucketNum = ID.ComputeHash() & (NumBuckets-1); | 
 | 161 |   return Buckets + BucketNum; | 
 | 162 | } | 
 | 163 |  | 
 | 164 | //===----------------------------------------------------------------------===// | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 165 | // FoldingSetImpl Implementation | 
 | 166 |  | 
| Jim Laskey | 1f67a99 | 2006-11-02 14:21:26 +0000 | [diff] [blame] | 167 | FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) : NumNodes(0) { | 
 | 168 |   assert(5 < Log2InitSize && Log2InitSize < 32 && | 
 | 169 |          "Initial hash table size out of range"); | 
 | 170 |   NumBuckets = 1 << Log2InitSize; | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 171 |   Buckets = new void*[NumBuckets]; | 
 | 172 |   memset(Buckets, 0, NumBuckets*sizeof(void*)); | 
 | 173 | } | 
 | 174 | FoldingSetImpl::~FoldingSetImpl() { | 
 | 175 |   delete [] Buckets; | 
 | 176 | } | 
 | 177 |  | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 178 | /// GrowHashTable - Double the size of the hash table and rehash everything. | 
 | 179 | /// | 
 | 180 | void FoldingSetImpl::GrowHashTable() { | 
 | 181 |   void **OldBuckets = Buckets; | 
 | 182 |   unsigned OldNumBuckets = NumBuckets; | 
 | 183 |   NumBuckets <<= 1; | 
 | 184 |    | 
 | 185 |   // Reset the node count to zero: we're going to reinsert everything. | 
 | 186 |   NumNodes = 0; | 
 | 187 |    | 
 | 188 |   // Clear out new buckets. | 
 | 189 |   Buckets = new void*[NumBuckets]; | 
 | 190 |   memset(Buckets, 0, NumBuckets*sizeof(void*)); | 
 | 191 |  | 
 | 192 |   // Walk the old buckets, rehashing nodes into their new place. | 
 | 193 |   for (unsigned i = 0; i != OldNumBuckets; ++i) { | 
 | 194 |     void *Probe = OldBuckets[i]; | 
 | 195 |     if (!Probe) continue; | 
| Chris Lattner | b85210f | 2007-01-31 06:04:41 +0000 | [diff] [blame] | 196 |     while (Node *NodeInBucket = GetNextPtr(Probe, OldBuckets, OldNumBuckets)) { | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 197 |       // Figure out the next link, remove NodeInBucket from the old link. | 
 | 198 |       Probe = NodeInBucket->getNextInBucket(); | 
 | 199 |       NodeInBucket->SetNextInBucket(0); | 
 | 200 |  | 
 | 201 |       // Insert the node into the new bucket, after recomputing the hash. | 
 | 202 |       NodeID ID; | 
 | 203 |       GetNodeProfile(ID, NodeInBucket); | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 204 |       InsertNode(NodeInBucket, GetBucketFor(ID, Buckets, NumBuckets)); | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 205 |     } | 
 | 206 |   } | 
 | 207 |    | 
 | 208 |   delete[] OldBuckets; | 
 | 209 | } | 
 | 210 |  | 
 | 211 | /// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists, | 
 | 212 | /// return it.  If not, return the insertion token that will make insertion | 
 | 213 | /// faster. | 
 | 214 | FoldingSetImpl::Node *FoldingSetImpl::FindNodeOrInsertPos(const NodeID &ID, | 
 | 215 |                                                           void *&InsertPos) { | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 216 |   void **Bucket = GetBucketFor(ID, Buckets, NumBuckets); | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 217 |   void *Probe = *Bucket; | 
 | 218 |    | 
 | 219 |   InsertPos = 0; | 
 | 220 |    | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 221 |   while (Node *NodeInBucket = GetNextPtr(Probe, Buckets, NumBuckets)) { | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 222 |     NodeID OtherID; | 
 | 223 |     GetNodeProfile(OtherID, NodeInBucket); | 
 | 224 |     if (OtherID == ID) | 
 | 225 |       return NodeInBucket; | 
 | 226 |  | 
 | 227 |     Probe = NodeInBucket->getNextInBucket(); | 
 | 228 |   } | 
 | 229 |    | 
 | 230 |   // Didn't find the node, return null with the bucket as the InsertPos. | 
 | 231 |   InsertPos = Bucket; | 
 | 232 |   return 0; | 
 | 233 | } | 
 | 234 |  | 
 | 235 | /// InsertNode - Insert the specified node into the folding set, knowing that it | 
 | 236 | /// is not already in the map.  InsertPos must be obtained from  | 
 | 237 | /// FindNodeOrInsertPos. | 
 | 238 | void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) { | 
| Chris Lattner | 0de4439 | 2007-02-01 05:33:21 +0000 | [diff] [blame] | 239 |   assert(N->getNextInBucket() == 0); | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 240 |   // Do we need to grow the hashtable? | 
| Chris Lattner | b85210f | 2007-01-31 06:04:41 +0000 | [diff] [blame] | 241 |   if (NumNodes+1 > NumBuckets*2) { | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 242 |     GrowHashTable(); | 
 | 243 |     NodeID ID; | 
 | 244 |     GetNodeProfile(ID, N); | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 245 |     InsertPos = GetBucketFor(ID, Buckets, NumBuckets); | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 246 |   } | 
| Chris Lattner | b85210f | 2007-01-31 06:04:41 +0000 | [diff] [blame] | 247 |  | 
 | 248 |   ++NumNodes; | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 249 |    | 
 | 250 |   /// The insert position is actually a bucket pointer. | 
 | 251 |   void **Bucket = static_cast<void**>(InsertPos); | 
 | 252 |    | 
 | 253 |   void *Next = *Bucket; | 
 | 254 |    | 
 | 255 |   // If this is the first insertion into this bucket, its next pointer will be | 
 | 256 |   // null.  Pretend as if it pointed to itself. | 
 | 257 |   if (Next == 0) | 
 | 258 |     Next = Bucket; | 
 | 259 |  | 
| Chris Lattner | b85210f | 2007-01-31 06:04:41 +0000 | [diff] [blame] | 260 |   // Set the node's next pointer, and make the bucket point to the node. | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 261 |   N->SetNextInBucket(Next); | 
 | 262 |   *Bucket = N; | 
 | 263 | } | 
 | 264 |  | 
 | 265 | /// RemoveNode - Remove a node from the folding set, returning true if one was | 
 | 266 | /// removed or false if the node was not in the folding set. | 
 | 267 | bool FoldingSetImpl::RemoveNode(Node *N) { | 
 | 268 |   // Because each bucket is a circular list, we don't need to compute N's hash | 
| Chris Lattner | 0de4439 | 2007-02-01 05:33:21 +0000 | [diff] [blame] | 269 |   // to remove it. | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 270 |   void *Ptr = N->getNextInBucket(); | 
 | 271 |   if (Ptr == 0) return false;  // Not in folding set. | 
 | 272 |  | 
 | 273 |   --NumNodes; | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 274 |   N->SetNextInBucket(0); | 
| Chris Lattner | 0de4439 | 2007-02-01 05:33:21 +0000 | [diff] [blame] | 275 |  | 
 | 276 |   // Remember what N originally pointed to, either a bucket or another node. | 
 | 277 |   void *NodeNextPtr = Ptr; | 
 | 278 |    | 
 | 279 |   // Chase around the list until we find the node (or bucket) which points to N. | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 280 |   while (true) { | 
| Jim Laskey | 18529f3 | 2006-10-27 18:05:12 +0000 | [diff] [blame] | 281 |     if (Node *NodeInBucket = GetNextPtr(Ptr, Buckets, NumBuckets)) { | 
| Jim Laskey | 0e5af19 | 2006-10-27 16:16:16 +0000 | [diff] [blame] | 282 |       // Advance pointer. | 
 | 283 |       Ptr = NodeInBucket->getNextInBucket(); | 
 | 284 |        | 
 | 285 |       // We found a node that points to N, change it to point to N's next node, | 
 | 286 |       // removing N from the list. | 
 | 287 |       if (Ptr == N) { | 
 | 288 |         NodeInBucket->SetNextInBucket(NodeNextPtr); | 
 | 289 |         return true; | 
 | 290 |       } | 
 | 291 |     } else { | 
 | 292 |       void **Bucket = GetBucketPtr(Ptr); | 
 | 293 |       Ptr = *Bucket; | 
 | 294 |        | 
 | 295 |       // If we found that the bucket points to N, update the bucket to point to | 
 | 296 |       // whatever is next. | 
 | 297 |       if (Ptr == N) { | 
 | 298 |         *Bucket = NodeNextPtr; | 
 | 299 |         return true; | 
 | 300 |       } | 
 | 301 |     } | 
 | 302 |   } | 
 | 303 | } | 
 | 304 |  | 
 | 305 | /// GetOrInsertNode - If there is an existing simple Node exactly | 
 | 306 | /// equal to the specified node, return it.  Otherwise, insert 'N' and it | 
 | 307 | /// instead. | 
 | 308 | FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) { | 
 | 309 |   NodeID ID; | 
 | 310 |   GetNodeProfile(ID, N); | 
 | 311 |   void *IP; | 
 | 312 |   if (Node *E = FindNodeOrInsertPos(ID, IP)) | 
 | 313 |     return E; | 
 | 314 |   InsertNode(N, IP); | 
 | 315 |   return N; | 
 | 316 | } |