IdentifierInfo:
- IdentifierInfo can now (optionally) have its string data not be
  co-located with itself.  This is for use with PTH.  This aspect is a
  little gross, as getName() and getLength() now make assumptions
  about a possible alternate representation of IdentifierInfo.
  Perhaps we should make IdentifierInfo have virtual methods?

IdentifierTable:
- Added class "IdentifierInfoLookup" that can be used by
  IdentifierTable to perform "string -> IdentifierInfo" lookups using
  an auxilliary data structure.  This is used by PTH.
- Perform tests show that IdentifierTable::get() does not slow down
  because of the extra check for the IdentiferInfoLookup object (the
  regular StringMap lookup does enough work to mitigate the impact of
  an extra null pointer check).
- The upshot is that now that some IdentifierInfo objects might be
  owned by the IdentiferInfoLookup object.  This should be reviewed.

PTH:
- Modified PTHManager::GetIdentifierInfo to *not* insert entries in
  IdentifierTable's string map, and instead create IdentifierInfo
  objects on the fly when mapping from persistent IDs to
  IdentifierInfos.  This saves a ton of work with string copies,
  hashing, and StringMap lookup and resizing.  This change was
  motivated because when processing source files in the PTH cache we
  don't need to do any string -> IdentifierInfo lookups.
- PTHManager now subclasses IdentifierInfoLookup, allowing clients of
  IdentifierTable to transparently use IdentifierInfo objects managed
  by the PTH file.  PTHManager resolves "string -> IdentifierInfo"
  queries by doing a binary search over a sorted table of identifier
  strings in the PTH file (the exact algorithm we use can be changed
  as needed).

These changes lead to the following performance changes when using PTH on Cocoa.h:
- fsyntax-only: 10% performance improvement
- Eonly: 30% performance improvement


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@62273 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index 86c6577..a2d66fe 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -506,9 +506,10 @@
 
 PTHManager::PTHManager(const llvm::MemoryBuffer* buf, void* fileLookup,
                        const char* idDataTable, IdentifierInfo** perIDCache, 
-                       Preprocessor& pp)
+                       const char* sortedIdTable, unsigned numIds)
 : Buf(buf), PerIDCache(perIDCache), FileLookup(fileLookup),
-  IdDataTable(idDataTable), ITable(pp.getIdentifierTable()), PP(pp) {}
+  IdDataTable(idDataTable), SortedIdTable(sortedIdTable),
+  NumIds(numIds), PP(0) {}
 
 PTHManager::~PTHManager() {
   delete Buf;
@@ -516,7 +517,7 @@
   free(PerIDCache);
 }
 
-PTHManager* PTHManager::Create(const std::string& file, Preprocessor& PP) {
+PTHManager* PTHManager::Create(const std::string& file) {
   
   // Memory map the PTH file.
   llvm::OwningPtr<llvm::MemoryBuffer>
@@ -563,6 +564,14 @@
     return 0; // FIXME: Proper error diagnostic?
   }
   
+  // Get the location of the lexigraphically-sorted table of persistent IDs.
+  const char* SortedIdTableOffset = EndTable + sizeof(uint32_t)*2;
+  const char* SortedIdTable = BufBeg + Read32(SortedIdTableOffset);
+  if (!(SortedIdTable > BufBeg && SortedIdTable < BufEnd)) {
+    assert(false && "Invalid PTH file.");
+    return 0; // FIXME: Proper error diagnostic?
+  }
+  
   // Get the number of IdentifierInfos and pre-allocate the identifier cache.
   uint32_t NumIds = Read32(IData);
 
@@ -577,14 +586,15 @@
     return 0;
   }
   
-  // Create the new lexer.
-  return new PTHManager(File.take(), FL.take(), IData, PerIDCache, PP);
+  // Create the new PTHManager.
+  return new PTHManager(File.take(), FL.take(), IData, PerIDCache,
+                        SortedIdTable, NumIds);
 }
 
 IdentifierInfo* PTHManager::GetIdentifierInfo(unsigned persistentID) {
     
   // Check if the IdentifierInfo has already been resolved.
-  IdentifierInfo*& II = PerIDCache[persistentID];
+  IdentifierInfo* II = PerIDCache[persistentID];
   if (II) return II;
   
   // Look in the PTH file for the string data for the IdentifierInfo object.
@@ -592,14 +602,66 @@
   const char* IDData = Buf->getBufferStart() + Read32(TableEntry);
   assert(IDData < Buf->getBufferEnd());
   
-  // Read the length of the string.
-  uint32_t len = Read32(IDData);  
+  // Allocate the object.
+  std::pair<IdentifierInfo,const char*> *Mem =
+    Alloc.Allocate<std::pair<IdentifierInfo,const char*> >();
+
+  Mem->second = IDData;
+  II = new ((void*) Mem) IdentifierInfo(true);
   
-  // Get the IdentifierInfo* with the specified string.
-  II = &ITable.get(IDData, IDData+len);
+  // Store the new IdentifierInfo in the cache.
+  PerIDCache[persistentID] = II;
   return II;
 }
 
+IdentifierInfo* PTHManager::get(const char *NameStart, const char *NameEnd) {
+  unsigned min = 0;
+  unsigned max = NumIds;
+  unsigned len = NameEnd - NameStart;
+  
+  do {
+    unsigned i = (max - min) / 2 + min;
+    const char* p = SortedIdTable + (i * 4);
+    
+    // Read the persistentID.
+    unsigned perID = 
+      ((unsigned) ((uint8_t) p[0]))
+      | (((unsigned) ((uint8_t) p[1])) << 8)
+      | (((unsigned) ((uint8_t) p[2])) << 16)
+      | (((unsigned) ((uint8_t) p[3])) << 24);
+    
+    // Get the IdentifierInfo.
+    IdentifierInfo* II = GetIdentifierInfo(perID);
+    
+    // First compare the lengths.
+    unsigned IILen = II->getLength();
+    if (len < IILen) goto IsLess;
+    if (len > IILen) goto IsGreater;
+    
+    // Now compare the strings!
+    {
+      signed comp = strncmp(NameStart, II->getName(), len);
+      if (comp < 0) goto IsLess;
+      if (comp > 0) goto IsGreater;
+    }    
+    // We found a match!
+    return II;
+    
+  IsGreater:
+    if (i == min) break;
+    min = i;
+    continue;
+    
+  IsLess:
+    max = i;
+    assert(!(max == min) || (min == i));
+  }
+  while (1);
+  
+  return 0;
+}
+
+
 PTHLexer* PTHManager::CreateLexer(unsigned FileID, const FileEntry* FE) {
   
   if (!FE)
@@ -634,6 +696,7 @@
   PTHSpellingSearch* ss = new PTHSpellingSearch(*this, len, spellingTable);
   SpellingMap[FileID] = ss;
   
-  return new PTHLexer(PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond,
+  assert(PP && "No preprocessor set yet!");
+  return new PTHLexer(*PP, SourceLocation::getFileLoc(FileID, 0), data, ppcond,
                       *ss, *this); 
 }
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index e09ce13..cac78fe 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -45,9 +45,10 @@
 
 Preprocessor::Preprocessor(Diagnostic &diags, const LangOptions &opts,
                            TargetInfo &target, SourceManager &SM, 
-                           HeaderSearch &Headers) 
+                           HeaderSearch &Headers,
+                           IdentifierInfoLookup* IILookup)
   : Diags(diags), Features(opts), Target(target), FileMgr(Headers.getFileMgr()),
-    SourceMgr(SM), HeaderInfo(Headers), Identifiers(opts),
+    SourceMgr(SM), HeaderInfo(Headers), Identifiers(opts, IILookup),
     CurPPLexer(0), CurDirLookup(0), Callbacks(0) {
   ScratchBuf = new ScratchBuffer(SourceMgr);