Track the full (not mod the hash table size) hash value for each token.
This lets us find interesting properties of the hash distribution.

llvm-svn: 39056
diff --git a/clang/Lex/IdentifierTable.cpp b/clang/Lex/IdentifierTable.cpp
index dffaa2f..734e82f 100644
--- a/clang/Lex/IdentifierTable.cpp
+++ b/clang/Lex/IdentifierTable.cpp
@@ -179,7 +179,8 @@
                                      const char *NameEnd) {
   IdentifierBucket **TableArray = (IdentifierBucket**)TheTable;
 
-  unsigned Hash = HashString(NameStart, NameEnd) & (HASH_TABLE_SIZE-1);
+  unsigned FullHash = HashString(NameStart, NameEnd);
+  unsigned Hash = FullHash & (HASH_TABLE_SIZE-1);
   unsigned Length = NameEnd-NameStart;
   
   IdentifierBucket *IdentHead = TableArray[Hash];
@@ -218,6 +219,7 @@
   Identifier->TokInfo.IsPoisoned = false;
   Identifier->TokInfo.IsOtherTargetMacro = false;
   Identifier->TokInfo.FETokenInfo = 0;
+  Identifier->TokInfo.HashValue = FullHash;
 
   // Copy the string information.
   char *StrBuffer = (char*)(Identifier+1);
@@ -350,8 +352,19 @@
         MaxIdentifierLength = Id->TokInfo.getNameLength();
       ++NumIdentifiersInBucket;
     }
-    if (NumIdentifiersInBucket > MaxBucketLength) 
+    if (NumIdentifiersInBucket > MaxBucketLength) {
       MaxBucketLength = NumIdentifiersInBucket;
+      
+#if 0 // This code can be enabled to see (with -stats) a sample of some of the
+      // longest buckets in the hash table.  Useful for inspecting density of
+      // buckets etc.
+      std::cerr << "Bucket length " << MaxBucketLength << ":\n";
+      for (IdentifierBucket *Id = TableArray[i]; Id; Id = Id->Next) {
+        std::cerr << "  " << Id->TokInfo.getName() << " hash = "
+                  << Id->TokInfo.HashValue << "\n";
+      }
+#endif
+    }
     if (NumIdentifiersInBucket == 0)
       ++NumEmptyBuckets;