Make the Preprocessor more memory efficient and improve macro instantiation diagnostics.

When a macro instantiation occurs, reserve a SLocEntry chunk with length the
full length of the macro definition source. Set the spelling location of this chunk
to point to the start of the macro definition and any tokens that are lexed directly
from the macro definition will get a location from this chunk with the appropriate offset.

For any tokens that come from argument expansion, '##' paste operator, etc. have their
instantiation location point at the appropriate place in the instantiated macro definition
(the argument identifier and the '##' token respectively).
This improves macro instantiation diagnostics:

Before:

t.c:5:9: error: invalid operands to binary expression ('struct S' and 'int')
int y = M(/);
        ^~~~
t.c:5:11: note: instantiated from:
int y = M(/);
          ^

After:

t.c:5:9: error: invalid operands to binary expression ('struct S' and 'int')
int y = M(/);
        ^~~~
t.c:3:20: note: instantiated from:
\#define M(op) (foo op 3);
                ~~~ ^  ~
t.c:5:11: note: instantiated from:
int y = M(/);
          ^

The memory savings for a candidate boost library that abuses the preprocessor are:

- 32% less SLocEntries (37M -> 25M)
- 30% reduction in PCH file size (900M -> 635M)
- 50% reduction in memory usage for the SLocEntry table (1.6G -> 800M)

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@134587 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 608bd9d..6d25d2b 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -683,7 +683,7 @@
     return SourceLocation();
 
   if (Loc.isMacroID()) {
-    if (Offset > 0 || !SM.isAtEndOfMacroInstantiation(Loc))
+    if (Offset > 0 || !SM.isAtEndOfMacroInstantiation(Loc, Features))
       return SourceLocation(); // Points inside the macro instantiation.
 
     // Continue and find the location just after the macro instantiation.
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index dee7da3..968c15e 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -185,7 +185,8 @@
 /// a character literal for the Microsoft charize (#@) extension.
 ///
 Token MacroArgs::StringifyArgument(const Token *ArgToks,
-                                   Preprocessor &PP, bool Charify) {
+                                   Preprocessor &PP, bool Charify,
+                                   SourceLocation hashInstLoc) {
   Token Tok;
   Tok.startToken();
   Tok.setKind(Charify ? tok::char_constant : tok::string_literal);
@@ -273,14 +274,15 @@
     }
   }
 
-  PP.CreateString(&Result[0], Result.size(), Tok);
+  PP.CreateString(&Result[0], Result.size(), Tok, hashInstLoc);
   return Tok;
 }
 
 /// getStringifiedArgument - Compute, cache, and return the specified argument
 /// that has been 'stringified' as required by the # operator.
 const Token &MacroArgs::getStringifiedArgument(unsigned ArgNo,
-                                               Preprocessor &PP) {
+                                               Preprocessor &PP,
+                                               SourceLocation hashInstLoc) {
   assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!");
   if (StringifiedArgs.empty()) {
     StringifiedArgs.resize(getNumArguments());
@@ -288,6 +290,7 @@
            sizeof(StringifiedArgs[0])*getNumArguments());
   }
   if (StringifiedArgs[ArgNo].isNot(tok::string_literal))
-    StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP);
+    StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP,
+                                               /*Charify=*/false, hashInstLoc);
   return StringifiedArgs[ArgNo];
 }
diff --git a/lib/Lex/MacroArgs.h b/lib/Lex/MacroArgs.h
index 6ff4856..a962dac 100644
--- a/lib/Lex/MacroArgs.h
+++ b/lib/Lex/MacroArgs.h
@@ -20,6 +20,7 @@
   class MacroInfo;
   class Preprocessor;
   class Token;
+  class SourceLocation;
 
 /// MacroArgs - An instance of this class captures information about
 /// the formal arguments specified to a function-like macro invocation.
@@ -86,7 +87,8 @@
 
   /// getStringifiedArgument - Compute, cache, and return the specified argument
   /// that has been 'stringified' as required by the # operator.
-  const Token &getStringifiedArgument(unsigned ArgNo, Preprocessor &PP);
+  const Token &getStringifiedArgument(unsigned ArgNo, Preprocessor &PP,
+                                      SourceLocation hashInstLoc);
 
   /// getNumArguments - Return the number of arguments passed into this macro
   /// invocation.
@@ -106,7 +108,8 @@
   /// a character literal for the Microsoft charize (#@) extension.
   ///
   static Token StringifyArgument(const Token *ArgToks,
-                                 Preprocessor &PP, bool Charify = false);
+                                 Preprocessor &PP, bool Charify,
+                                 SourceLocation hashInstLoc);
   
   
   /// deallocate - This should only be called by the Preprocessor when managing
diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp
index 66d87a1..0a16a25 100644
--- a/lib/Lex/MacroInfo.cpp
+++ b/lib/Lex/MacroInfo.cpp
@@ -25,6 +25,7 @@
   IsUsed = false;
   IsAllowRedefinitionsWithoutWarning = false;
   IsWarnIfUnused = false;
+  IsDefinitionLengthCached = false;
 
   ArgumentList = 0;
   NumArguments = 0;
@@ -43,11 +44,42 @@
   IsUsed = MI.IsUsed;
   IsAllowRedefinitionsWithoutWarning = MI.IsAllowRedefinitionsWithoutWarning;
   IsWarnIfUnused = MI.IsWarnIfUnused;
+  IsDefinitionLengthCached = MI.IsDefinitionLengthCached;
+  DefinitionLength = MI.DefinitionLength;
   ArgumentList = 0;
   NumArguments = 0;
   setArgumentList(MI.ArgumentList, MI.NumArguments, PPAllocator);
 }
 
+unsigned MacroInfo::getDefinitionLengthSlow(SourceManager &SM) const {
+  assert(!IsDefinitionLengthCached);
+  IsDefinitionLengthCached = true;
+
+  if (ReplacementTokens.empty())
+    return (DefinitionLength = 0);
+
+  const Token &firstToken = ReplacementTokens.front();
+  const Token &lastToken = ReplacementTokens.back();
+  SourceLocation macroStart = firstToken.getLocation();
+  SourceLocation macroEnd = lastToken.getLocation();
+  assert(macroStart.isValid() && macroEnd.isValid());
+  assert((macroStart.isFileID() || firstToken.is(tok::comment)) &&
+         "Macro defined in macro?");
+  assert((macroEnd.isFileID() || lastToken.is(tok::comment)) &&
+         "Macro defined in macro?");
+  std::pair<FileID, unsigned>
+      startInfo = SM.getDecomposedInstantiationLoc(macroStart);
+  std::pair<FileID, unsigned>
+      endInfo = SM.getDecomposedInstantiationLoc(macroEnd);
+  assert(startInfo.first == endInfo.first &&
+         "Macro definition spanning multiple FileIDs ?");
+  assert(startInfo.second <= endInfo.second);
+  DefinitionLength = endInfo.second - startInfo.second;
+  DefinitionLength += lastToken.getLength();
+
+  return DefinitionLength;
+}
+
 /// isIdenticalTo - Return true if the specified macro definition is equal to
 /// this macro in spelling, arguments, and whitespace.  This is used to emit
 /// duplicate definition warnings.  This implements the rules in C99 6.10.3.
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index f30c44e..db37fe1 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -40,6 +40,28 @@
   OwnsTokens = false;
   DisableMacroExpansion = false;
   NumTokens = Macro->tokens_end()-Macro->tokens_begin();
+  MacroExpansionStart = SourceLocation();
+
+  SourceManager &SM = PP.getSourceManager();
+  MacroStartSLocOffset = SM.getNextOffset();
+
+  if (NumTokens > 0) {
+    assert(Tokens[0].getLocation().isValid());
+    assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) &&
+           "Macro defined in macro?");
+    assert(InstantiateLocStart.isValid());
+
+    // Reserve a source location entry chunk for the length of the macro
+    // definition. Tokens that get lexed directly from the definition will
+    // have their locations pointing inside this chunk. This is to avoid
+    // creating separate source location entries for each token.
+    SourceLocation macroStart = SM.getInstantiationLoc(Tokens[0].getLocation());
+    MacroDefStartInfo = SM.getDecomposedLoc(macroStart);
+    MacroExpansionStart = SM.createInstantiationLoc(macroStart,
+                                              InstantiateLocStart,
+                                              InstantiateLocEnd,
+                                              Macro->getDefinitionLength(SM));
+  }
 
   // If this is a function-like macro, expand the arguments and change
   // Tokens to point to the expanded tokens.
@@ -72,6 +94,7 @@
   InstantiateLocStart = InstantiateLocEnd = SourceLocation();
   AtStartOfLine = false;
   HasLeadingSpace = false;
+  MacroExpansionStart = SourceLocation();
 
   // Set HasLeadingSpace/AtStartOfLine so that the first token will be
   // returned unmodified.
@@ -119,13 +142,19 @@
       int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo());
       assert(ArgNo != -1 && "Token following # is not an argument?");
 
+      SourceLocation hashInstLoc;
+      if(InstantiateLocStart.isValid()) {
+        hashInstLoc = getMacroExpansionLocation(CurTok.getLocation());
+        assert(hashInstLoc.isValid() && "Expected '#' to come from definition");
+      }
+
       Token Res;
       if (CurTok.is(tok::hash))  // Stringify
-        Res = ActualArgs->getStringifiedArgument(ArgNo, PP);
+        Res = ActualArgs->getStringifiedArgument(ArgNo, PP, hashInstLoc);
       else {
         // 'charify': don't bother caching these.
         Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo),
-                                           PP, true);
+                                           PP, true, hashInstLoc);
       }
 
       // The stringified/charified string leading space flag gets set to match
@@ -185,6 +214,20 @@
         unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
         ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
 
+        if(InstantiateLocStart.isValid()) {
+          SourceManager &SM = PP.getSourceManager();
+          SourceLocation curInst =
+              getMacroExpansionLocation(CurTok.getLocation());
+          assert(curInst.isValid() &&
+                 "Expected arg identifier to come from definition");
+          for (unsigned i = FirstResult, e = ResultToks.size(); i != e; ++i) {
+            Token &Tok = ResultToks[i];
+            Tok.setLocation(SM.createInstantiationLoc(Tok.getLocation(),
+                                                      curInst, curInst,
+                                                      Tok.getLength()));
+          }
+        }
+
         // If any tokens were substituted from the argument, the whitespace
         // before the first token should match the whitespace of the arg
         // identifier.
@@ -220,6 +263,21 @@
 
       ResultToks.append(ArgToks, ArgToks+NumToks);
 
+      if(InstantiateLocStart.isValid()) {
+        SourceManager &SM = PP.getSourceManager();
+        SourceLocation curInst =
+            getMacroExpansionLocation(CurTok.getLocation());
+        assert(curInst.isValid() &&
+               "Expected arg identifier to come from definition");
+        for (unsigned i = ResultToks.size() - NumToks, e = ResultToks.size();
+               i != e; ++i) {
+          Token &Tok = ResultToks[i];
+          Tok.setLocation(SM.createInstantiationLoc(Tok.getLocation(),
+                                                    curInst, curInst,
+                                                    Tok.getLength()));
+        }
+      }
+
       // If this token (the macro argument) was supposed to get leading
       // whitespace, transfer this information onto the first token of the
       // expansion.
@@ -333,17 +391,29 @@
     TokenIsFromPaste = true;
   }
 
+  SourceManager &SM = PP.getSourceManager();
   // The token's current location indicate where the token was lexed from.  We
   // need this information to compute the spelling of the token, but any
   // diagnostics for the expanded token should appear as if they came from
   // InstantiationLoc.  Pull this information together into a new SourceLocation
   // that captures all of this.
-  if (InstantiateLocStart.isValid()) {   // Don't do this for token streams.
-    SourceManager &SM = PP.getSourceManager();
-    Tok.setLocation(SM.createInstantiationLoc(Tok.getLocation(),
-                                              InstantiateLocStart,
-                                              InstantiateLocEnd,
-                                              Tok.getLength()));
+  if (InstantiateLocStart.isValid() &&   // Don't do this for token streams.
+      // Check that the token's location was not already set properly.
+      SM.isBeforeInSourceLocationOffset(Tok.getLocation(),
+                                        MacroStartSLocOffset)) {
+    SourceLocation instLoc;
+    if (Tok.is(tok::comment)) {
+      instLoc = SM.createInstantiationLoc(Tok.getLocation(),
+                                          InstantiateLocStart,
+                                          InstantiateLocEnd,
+                                          Tok.getLength());
+    } else {
+      instLoc = getMacroExpansionLocation(Tok.getLocation());
+      assert(instLoc.isValid() &&
+             "Location for token not coming from definition was not set!");
+    }
+
+    Tok.setLocation(instLoc);
   }
 
   // If this is the first token, set the lexical properties of the token to
@@ -381,9 +451,10 @@
 bool TokenLexer::PasteTokens(Token &Tok) {
   llvm::SmallString<128> Buffer;
   const char *ResultTokStrPtr = 0;
+  SourceLocation PasteOpLoc;
   do {
     // Consume the ## operator.
-    SourceLocation PasteOpLoc = Tokens[CurToken].getLocation();
+    PasteOpLoc = Tokens[CurToken].getLocation();
     ++CurToken;
     assert(!isAtEnd() && "No token on the RHS of a paste operator!");
 
@@ -509,12 +580,30 @@
     // Transfer properties of the LHS over the the Result.
     Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine());
     Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace());
-
+    
     // Finally, replace LHS with the result, consume the RHS, and iterate.
     ++CurToken;
     Tok = Result;
   } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash));
 
+  // The token's current location indicate where the token was lexed from.  We
+  // need this information to compute the spelling of the token, but any
+  // diagnostics for the expanded token should appear as if the token was
+  // instantiated from the (##) operator. Pull this information together into
+  // a new SourceLocation that captures all of this.
+  if (InstantiateLocStart.isValid()) {
+    SourceManager &SM = PP.getSourceManager();
+    SourceLocation pasteLocInst =
+        getMacroExpansionLocation(PasteOpLoc);
+    assert(pasteLocInst.isValid() &&
+           "Expected '##' to come from definition");
+
+    Tok.setLocation(SM.createInstantiationLoc(Tok.getLocation(),
+                                              pasteLocInst,
+                                              pasteLocInst,
+                                              Tok.getLength()));
+  }
+
   // Now that we got the result token, it will be subject to expansion.  Since
   // token pasting re-lexes the result token in raw mode, identifier information
   // isn't looked up.  As such, if the result is an identifier, look up id info.
@@ -558,3 +647,23 @@
 
   PP.HandleMicrosoftCommentPaste(Tok);
 }
+
+/// \brief If \arg loc is a FileID and points inside the current macro
+/// definition, returns the appropriate source location pointing at the
+/// macro expansion source location entry.
+SourceLocation TokenLexer::getMacroExpansionLocation(SourceLocation loc) const {
+  assert(InstantiateLocStart.isValid() && MacroExpansionStart.isValid() &&
+         "Not appropriate for token streams");
+  assert(loc.isValid());
+  
+  SourceManager &SM = PP.getSourceManager();
+  unsigned relativeOffset;
+  if (loc.isFileID() &&
+      SM.isInFileID(loc,
+                    MacroDefStartInfo.first, MacroDefStartInfo.second,
+                    Macro->getDefinitionLength(SM), &relativeOffset)) {
+    return MacroExpansionStart.getFileLocWithOffset(relativeOffset);
+  }
+
+  return SourceLocation();
+}