PTH: Fix remaining cases where the spelling cache in the PTH file was being missed when it shouldn't.  This shaves another 7% off PTH time for -Eonly on Cocoa.h

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@62186 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp
index a9eb88a..4a84f9b 100644
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -23,7 +23,6 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Support/Streams.h"
 
 using namespace clang;
 
@@ -311,8 +310,8 @@
 
   if (I == SpellingMap.end())
       return 0;
-    
-  return I->second->getSpellingBinarySearch(fpos, Buffer);  
+
+  return I->second->getSpellingBinarySearch(fpos, Buffer);
 }
 
 unsigned PTHManager::getSpellingAtPTHOffset(unsigned PTHOffset,
@@ -335,7 +334,7 @@
   const char* p = LinearItr;
   unsigned len = 0;
   
-  if (!SpellingsLeft)
+  if (p == TableEnd)
     return getSpellingBinarySearch(fpos, Buffer);
   
   do {
@@ -348,8 +347,6 @@
     if (TokOffset > fpos)
       return getSpellingBinarySearch(fpos, Buffer);
     
-    --SpellingsLeft;
-    
     // Did we find a matching token offset for this spelling?
     if (TokOffset == fpos) {
       uint32_t SpellingPTHOffset = 
@@ -358,14 +355,15 @@
         | (((uint32_t) ((uint8_t) p[6])) << 16)
         | (((uint32_t) ((uint8_t) p[7])) << 24);
       
+      p += SpellingEntrySize;
       len = PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer);
       break;
     }
 
     // No match.  Keep on looking.
-    p += sizeof(uint32_t)*2;
+    p += SpellingEntrySize;
   }
-  while (SpellingsLeft);
+  while (p != TableEnd);
 
   LinearItr = p;
   return len;
@@ -374,13 +372,18 @@
 unsigned PTHSpellingSearch::getSpellingBinarySearch(unsigned fpos,
                                                     const char *& Buffer) {
   
-  assert ((TableEnd - TableBeg) % SpellingEntrySize == 0);
+  assert((TableEnd - TableBeg) % SpellingEntrySize == 0);
+  
+  if (TableEnd == TableBeg)
+    return 0;
+  
+  assert(TableEnd > TableBeg);
   
   unsigned min = 0;
   const char* tb = TableBeg;
-  unsigned max = (TableEnd - tb) / SpellingEntrySize;
+  unsigned max = NumSpellings;
 
-  while (min != max) {
+  do {
     unsigned i = (max - min) / 2 + min;
     const char* p = tb + (i * SpellingEntrySize);
     
@@ -392,6 +395,7 @@
     
     if (TokOffset > fpos) {
       max = i;
+      assert(!(max == min) || (min == i));
       continue;
     }
     
@@ -408,6 +412,7 @@
     
     return PTHMgr.getSpellingAtPTHOffset(SpellingPTHOffset, Buffer);
   }
+  while (min != max);
   
   return 0;
 }
@@ -415,13 +420,11 @@
 unsigned PTHLexer::getSpelling(SourceLocation sloc, const char *&Buffer) {
   SourceManager& SM = PP->getSourceManager();
   sloc = SM.getPhysicalLoc(sloc);
-  unsigned fid = SM.getCanonicalFileID(sloc);
+  unsigned fid = sloc.getFileID();
   unsigned fpos = SM.getFullFilePos(sloc);
   
-  if (fid == FileID)
-    return MySpellingSrch.getSpellingLinearSearch(fpos, Buffer);
-
-  return PTHMgr.getSpelling(fid, fpos, Buffer);
+  return (fid == FileID ) ? MySpellingSrch.getSpellingLinearSearch(fpos, Buffer)
+                          : PTHMgr.getSpelling(fid, fpos, Buffer);  
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index ee6b0f8..a815265 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -195,9 +195,20 @@
 /// UCNs, etc.
 std::string Preprocessor::getSpelling(const Token &Tok) const {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+  const char* TokStart;
+  
+  if (PTH) {
+    SourceLocation sloc = SourceMgr.getPhysicalLoc(Tok.getLocation());
+    unsigned fid = sloc.getFileID();
+    unsigned fpos = SourceMgr.getFullFilePos(sloc);
+    if (unsigned len = PTH->getSpelling(fid, fpos, TokStart)) {
+      assert(!Tok.needsCleaning());
+      return std::string(TokStart, TokStart+len);
+    }
+  }
   
   // If this token contains nothing interesting, return it directly.
-  const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  TokStart = SourceMgr.getCharacterData(Tok.getLocation());
   if (!Tok.needsCleaning())
     return std::string(TokStart, TokStart+Tok.getLength());
   
@@ -238,21 +249,32 @@
   }
 
   // If using PTH, try and get the spelling from the PTH file.
-  if (CurPTHLexer) {
-    // We perform the const_cast<> here because we will only have a PTHLexer 
-    // when grabbing a stream of tokens from the PTH file (and thus the
-    // Preprocessor state is allowed to change).  The PTHLexer can assume we are
-    // getting token spellings in the order of tokens, and thus can update
-    // its internal state so that it can quickly fetch spellings from the PTH
-    // file.
-    unsigned len =
-      const_cast<PTHLexer*>(CurPTHLexer.get())->getSpelling(Tok.getLocation(),
-                                                            Buffer);
+  if (PTH) {
+    unsigned len;
     
+    if (CurPTHLexer) {
+      // We perform the const_cast<> here because we will only have a PTHLexer 
+      // when grabbing a stream of tokens from the PTH file (and thus the
+      // Preprocessor state is allowed to change).  The PTHLexer can assume we are
+      // getting token spellings in the order of tokens, and thus can update
+      // its internal state so that it can quickly fetch spellings from the PTH
+      // file.
+      len =
+        const_cast<PTHLexer*>(CurPTHLexer.get())->getSpelling(Tok.getLocation(),
+                                                              Buffer);      
+    }
+    else {
+      SourceLocation sloc = SourceMgr.getPhysicalLoc(Tok.getLocation());
+      unsigned fid = sloc.getFileID();
+      unsigned fpos = SourceMgr.getFullFilePos(sloc);      
+      len = PTH->getSpelling(fid, fpos, Buffer);      
+    }
+
     // Did we find a spelling?  If so return its length.  Otherwise fall
     // back to the default behavior for getting the spelling by looking at
-    // at the source code.
-    if (len) return len;
+    // at the source code.    
+    if (len)
+      return len;
   }
 
   // Otherwise, compute the start of the token in the input lexer buffer.