Reimplement SourceLocation.  Instead of having a 
fileid/offset pair, it now contains a bit discriminating between
mapped locations and file locations.  This separates the tables for
macros and files in SourceManager, and allows better separation of
concepts in the rest of the compiler.  This allows us to have *many*
macro instantiations before running out of 'addressing space'.

This is also more efficient, because testing whether something is a
macro expansion is now a bit test instead of a table lookup (which
also used to require having a srcmgr around, now it doesn't).

This is fully functional, but there are several refinements and
optimizations left.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@40103 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/Lex/Lexer.cpp b/Lex/Lexer.cpp
index 1775b2f..a1db060 100644
--- a/Lex/Lexer.cpp
+++ b/Lex/Lexer.cpp
@@ -27,17 +27,17 @@
 #include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Basic/Diagnostic.h"
-#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <cctype>
 using namespace clang;
 
 static void InitCharacterInfo();
 
-Lexer::Lexer(const llvm::MemoryBuffer *File, unsigned fileid, Preprocessor &pp,
-             const char *BufStart, const char *BufEnd)
+Lexer::Lexer(const llvm::MemoryBuffer *File, SourceLocation fileloc,
+             Preprocessor &pp, const char *BufStart, const char *BufEnd)
   : BufferEnd(BufEnd ? BufEnd : File->getBufferEnd()),
-    InputFile(File), CurFileID(fileid), PP(pp), Features(PP.getLangOptions()) {
+    InputFile(File), FileLoc(fileloc), PP(pp), Features(PP.getLangOptions()) {
   Is_PragmaLexer = false;
   IsMainFile = false;
   InitCharacterInfo();
@@ -151,7 +151,24 @@
 SourceLocation Lexer::getSourceLocation(const char *Loc) const {
   assert(Loc >= InputFile->getBufferStart() && Loc <= BufferEnd &&
          "Location out of range for this buffer!");
-  return SourceLocation(CurFileID, Loc-InputFile->getBufferStart());
+
+  // In the normal case, we're just lexing from a simple file buffer, return
+  // the file id from FileLoc with the offset specified.
+  unsigned CharNo = Loc-InputFile->getBufferStart();
+  if (FileLoc.isFileID())
+    return SourceLocation::getFileLoc(FileLoc.getFileID(), CharNo);
+  
+  // Otherwise, we're lexing "mapped tokens".  This is used for things like
+  // _Pragma handling.  Combine the instantiation location of FileLoc with the
+  // physical location.
+  SourceManager &SourceMgr = PP.getSourceManager();
+
+  // Create a new SLoc which is expanded from logical(FileLoc) but whose
+  // characters come from phys(FileLoc)+Offset.
+  SourceLocation VirtLoc = SourceMgr.getLogicalLoc(FileLoc);
+  SourceLocation PhysLoc = SourceMgr.getPhysicalLoc(FileLoc);
+  PhysLoc = SourceLocation::getFileLoc(PhysLoc.getFileID(), CharNo);
+  return SourceMgr.getInstantiationLoc(PhysLoc, VirtLoc);
 }
 
 
diff --git a/Lex/MacroExpander.cpp b/Lex/MacroExpander.cpp
index e474906..9a80ac3 100644
--- a/Lex/MacroExpander.cpp
+++ b/Lex/MacroExpander.cpp
@@ -582,7 +582,8 @@
       assert(FileID && "Could not get FileID for paste?");
       
       // Make a lexer object so that we lex and expand the paste result.
-      Lexer *TL = new Lexer(SourceMgr.getBuffer(FileID), FileID, PP,
+      Lexer *TL = new Lexer(SourceMgr.getBuffer(FileID),
+                            SourceLocation::getFileLoc(FileID, 0), PP,
                             ResultStrData, 
                             ResultStrData+LHSLen+RHSLen /*don't include null*/);
       
diff --git a/Lex/Pragma.cpp b/Lex/Pragma.cpp
index de59934..596b7e7 100644
--- a/Lex/Pragma.cpp
+++ b/Lex/Pragma.cpp
@@ -140,12 +140,12 @@
   SourceLocation TokLoc = CreateString(&StrVal[0], StrVal.size(), StrLoc);
   const char *StrData = SourceMgr.getCharacterData(TokLoc);
 
-  unsigned FileID = TokLoc.getFileID();
+  unsigned FileID = SourceMgr.getPhysicalLoc(TokLoc).getFileID();
   assert(FileID && "Could not get FileID for _Pragma?");
 
   // Make and enter a lexer object so that we lex and expand the tokens just
   // like any others.
-  Lexer *TL = new Lexer(SourceMgr.getBuffer(FileID), FileID, *this,
+  Lexer *TL = new Lexer(SourceMgr.getBuffer(FileID), TokLoc, *this,
                         StrData, StrData+StrVal.size()-1 /* no null */);
   
   // Ensure that the lexer thinks it is inside a directive, so that end \n will
@@ -175,10 +175,10 @@
   }
   
   // Get the current file lexer we're looking at.  Ignore _Pragma 'files' etc.
-  unsigned FileID = getCurrentFileLexer()->getCurFileID();
+  SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc();
   
   // Mark the file as a once-only file now.
-  HeaderInfo.MarkFileIncludeOnce(SourceMgr.getFileEntryForFileID(FileID));
+  HeaderInfo.MarkFileIncludeOnce(SourceMgr.getFileEntryForLoc(FileLoc));
 }
 
 /// HandlePragmaPoison - Handle #pragma GCC poison.  PoisonTok is the 'poison'.
@@ -233,8 +233,7 @@
   Lexer *TheLexer = getCurrentFileLexer();
   
   // Mark the file as a system header.
-  const FileEntry *File = 
-    SourceMgr.getFileEntryForFileID(TheLexer->getCurFileID());
+  const FileEntry *File = SourceMgr.getFileEntryForLoc(TheLexer->getFileLoc());
   HeaderInfo.MarkFileSystemHeader(File);
   
   // Notify the client, if desired, that we are in a new source file.
@@ -274,8 +273,8 @@
     return Diag(FilenameTok, diag::err_pp_file_not_found,
                 std::string(FilenameStart, FilenameEnd));
   
-  unsigned FileID = getCurrentFileLexer()->getCurFileID();
-  const FileEntry *CurFile = SourceMgr.getFileEntryForFileID(FileID);
+  SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc();
+  const FileEntry *CurFile = SourceMgr.getFileEntryForLoc(FileLoc);
 
   // If this file is older than the file it depends on, emit a diagnostic.
   if (CurFile && CurFile->getModificationTime() < File->getModificationTime()) {
diff --git a/Lex/Preprocessor.cpp b/Lex/Preprocessor.cpp
index 7b6c571..c3fd554 100644
--- a/Lex/Preprocessor.cpp
+++ b/Lex/Preprocessor.cpp
@@ -258,34 +258,41 @@
 /// token, return a new location that specifies a character within the token.
 SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, 
                                                      unsigned CharNo) {
-  // If they request the first char of the token, we're trivially done.
-  if (CharNo == 0) return TokStart;
+  // If they request the first char of the token, we're trivially done.  If this
+  // is a macro expansion, it doesn't make sense to point to a character within
+  // the instantiation point (the name).  We could point to the source
+  // character, but without also pointing to instantiation info, this is
+  // confusing.
+  if (CharNo == 0 || TokStart.isMacroID()) return TokStart;
   
   // Figure out how many physical characters away the specified logical
   // character is.  This needs to take into consideration newlines and
   // trigraphs.
-  const char *TokStartPtr = SourceMgr.getCharacterData(TokStart);
-  const char *TokPtr = TokStartPtr;
+  const char *TokPtr = SourceMgr.getCharacterData(TokStart);
+  unsigned PhysOffset = 0;
   
   // The usual case is that tokens don't contain anything interesting.  Skip
   // over the uninteresting characters.  If a token only consists of simple
   // chars, this method is extremely fast.
   while (CharNo && Lexer::isObviouslySimpleCharacter(*TokPtr))
-    ++TokPtr, --CharNo;
+    ++TokPtr, --CharNo, ++PhysOffset;
   
   // If we have a character that may be a trigraph or escaped newline, create a
   // lexer to parse it correctly.
-  unsigned FileID = TokStart.getFileID();
-  const llvm::MemoryBuffer *SrcBuf = SourceMgr.getBuffer(FileID);
   if (CharNo != 0) {
     // Create a lexer starting at this token position.
-    Lexer TheLexer(SrcBuf, FileID, *this, TokPtr);
+    const llvm::MemoryBuffer *SrcBuf =SourceMgr.getBuffer(TokStart.getFileID());
+    Lexer TheLexer(SrcBuf, TokStart, *this, TokPtr);
     LexerToken Tok;
     // Skip over characters the remaining characters.
+    const char *TokStartPtr = TokPtr;
     for (; CharNo; --CharNo)
       TheLexer.getAndAdvanceChar(TokPtr, Tok);
+    
+    PhysOffset += TokPtr-TokStartPtr;
   }
-  return SourceLocation(FileID, TokPtr-SrcBuf->getBufferStart());
+  
+  return TokStart.getFileLocWithOffset(PhysOffset);
 }
 
 
@@ -306,8 +313,8 @@
   // info about where the current file is.
   const FileEntry *CurFileEnt = 0;
   if (!FromDir) {
-    unsigned TheFileID = getCurrentFileLexer()->getCurFileID();
-    CurFileEnt = SourceMgr.getFileEntryForFileID(TheFileID);
+    SourceLocation FileLoc = getCurrentFileLexer()->getFileLoc();
+    CurFileEnt = SourceMgr.getFileEntryForLoc(FileLoc);
   }
   
   // Do a standard file entry lookup.
@@ -321,7 +328,7 @@
   // to one of the headers on the #include stack.  Walk the list of the current
   // headers on the #include stack and pass them to HeaderInfo.
   if (CurLexer && !CurLexer->Is_PragmaLexer) {
-    CurFileEnt = SourceMgr.getFileEntryForFileID(CurLexer->getCurFileID());
+    CurFileEnt = SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc());
     if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart, FilenameEnd,
                                                   CurFileEnt)))
       return FE;
@@ -330,8 +337,7 @@
   for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
     IncludeStackInfo &ISEntry = IncludeMacroStack[e-i-1];
     if (ISEntry.TheLexer && !ISEntry.TheLexer->Is_PragmaLexer) {
-      CurFileEnt =
-        SourceMgr.getFileEntryForFileID(ISEntry.TheLexer->getCurFileID());
+      CurFileEnt = SourceMgr.getFileEntryForLoc(ISEntry.TheLexer->getFileLoc());
       if ((FE = HeaderInfo.LookupSubframeworkHeader(FilenameStart, FilenameEnd,
                                                     CurFileEnt)))
         return FE;
@@ -385,7 +391,8 @@
     MaxIncludeStackDepth = IncludeMacroStack.size();
 
   const llvm::MemoryBuffer *Buffer = SourceMgr.getBuffer(FileID);
-  Lexer *TheLexer = new Lexer(Buffer, FileID, *this);
+  Lexer *TheLexer = new Lexer(Buffer, SourceLocation::getFileLoc(FileID, 0),
+                              *this);
   if (isMainFile) TheLexer->setIsMainFile();
   EnterSourceFileWithLexer(TheLexer, CurDir);
 }  
@@ -410,10 +417,10 @@
     
     // Get the file entry for the current file.
     if (const FileEntry *FE = 
-          SourceMgr.getFileEntryForFileID(CurLexer->getCurFileID()))
+           SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
       FileType = HeaderInfo.getFileDirFlavor(FE);
     
-    Callbacks->FileChanged(SourceLocation(CurLexer->getCurFileID(), 0),
+    Callbacks->FileChanged(CurLexer->getFileLoc(),
                            PPCallbacks::EnterFile, FileType);
   }
 }
@@ -878,7 +885,7 @@
   
   if (II == Ident__LINE__) {
     // __LINE__ expands to a simple numeric value.
-    sprintf(TmpBuffer, "%u", SourceMgr.getLineNumber(Tok.getLocation()));
+    sprintf(TmpBuffer, "%u", SourceMgr.getLogicalLineNumber(Tok.getLocation()));
     unsigned Length = strlen(TmpBuffer);
     Tok.setKind(tok::numeric_constant);
     Tok.setLength(Length);
@@ -887,15 +894,15 @@
     SourceLocation Loc = Tok.getLocation();
     if (II == Ident__BASE_FILE__) {
       Diag(Tok, diag::ext_pp_base_file);
-      SourceLocation NextLoc = SourceMgr.getIncludeLoc(Loc.getFileID());
-      while (NextLoc.getFileID() != 0) {
+      SourceLocation NextLoc = SourceMgr.getIncludeLoc(Loc);
+      while (NextLoc.isValid()) {
         Loc = NextLoc;
-        NextLoc = SourceMgr.getIncludeLoc(Loc.getFileID());
+        NextLoc = SourceMgr.getIncludeLoc(Loc);
       }
     }
     
     // Escape this filename.  Turn '\' -> '\\' '"' -> '\"'
-    std::string FN = SourceMgr.getSourceName(Loc);
+    std::string FN = SourceMgr.getSourceName(SourceMgr.getLogicalLoc(Loc));
     FN = '"' + Lexer::Stringify(FN) + '"';
     Tok.setKind(tok::string_literal);
     Tok.setLength(FN.size());
@@ -917,9 +924,9 @@
 
     // Compute the include depth of this token.
     unsigned Depth = 0;
-    SourceLocation Loc = SourceMgr.getIncludeLoc(Tok.getLocation().getFileID());
-    for (; Loc.getFileID() != 0; ++Depth)
-      Loc = SourceMgr.getIncludeLoc(Loc.getFileID());
+    SourceLocation Loc = SourceMgr.getIncludeLoc(Tok.getLocation());
+    for (; Loc.isValid(); ++Depth)
+      Loc = SourceMgr.getIncludeLoc(Loc);
     
     // __INCLUDE_LEVEL__ expands to a simple numeric value.
     sprintf(TmpBuffer, "%u", Depth);
@@ -938,7 +945,7 @@
     Lexer *TheLexer = getCurrentFileLexer();
     
     if (TheLexer)
-      CurFile = SourceMgr.getFileEntryForFileID(TheLexer->getCurFileID());
+      CurFile = SourceMgr.getFileEntryForLoc(TheLexer->getFileLoc());
     
     // If this file is older than the file it depends on, emit a diagnostic.
     const char *Result;
@@ -1061,7 +1068,7 @@
           CurLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
       // Okay, this has a controlling macro, remember in PerFileInfo.
       if (const FileEntry *FE = 
-            SourceMgr.getFileEntryForFileID(CurLexer->getCurFileID()))
+            SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
         HeaderInfo.SetFileControllingMacro(FE, ControllingMacro);
     }
   }
@@ -1078,7 +1085,7 @@
       
       // Get the file entry for the current file.
       if (const FileEntry *FE = 
-            SourceMgr.getFileEntryForFileID(CurLexer->getCurFileID()))
+            SourceMgr.getFileEntryForLoc(CurLexer->getFileLoc()))
         FileType = HeaderInfo.getFileDirFlavor(FE);
 
       Callbacks->FileChanged(CurLexer->getSourceLocation(CurLexer->BufferPtr),
diff --git a/Lex/ScratchBuffer.cpp b/Lex/ScratchBuffer.cpp
index 12cb096..6678cef 100644
--- a/Lex/ScratchBuffer.cpp
+++ b/Lex/ScratchBuffer.cpp
@@ -43,7 +43,7 @@
   assert(BytesUsed-Len < (1 << SourceLocation::FilePosBits) &&
          "Out of range file position!");
   
-  return SourceLocation(FileID, BytesUsed-Len);
+  return SourceLocation::getFileLoc(FileID, BytesUsed-Len);
 }