Introduce a caching mechanism for macro expanded tokens. Previously macro expanded tokens were added to Preprocessor's bump allocator and never released, even after the TokenLexer that were lexing them was finished, thus they were wasting memory. A very "useful" boost library was causing clang to eat 1 GB just for the expanded macro tokens. Introduce a special cache that works like a stack; a TokenLexer can add the macro expanded tokens in the cache, and when it finishes, the tokens are removed from the end of the cache. Now consumed memory by expanded tokens for that library is ~ 1.5 MB. Part of rdar://9327049. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@134105 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 5b3284a9275a27f5c4410e25eb8933be540601d6 [log] [tgz]
author: Argyrios Kyrtzidis <akyrtzi@gmail.com> Wed Jun 29 22:20:11 2011 +0000
committer: Argyrios Kyrtzidis <akyrtzi@gmail.com> Wed Jun 29 22:20:11 2011 +0000
tree: de24f9c31897686380bf3b7feeaecb2e344f5064
parent: 8ccac3de1335f1cfd7cea56ba1cefcf0b724ce3f [diff]
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index bf0a7fb..bf28199 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp

@@ -265,6 +265,10 @@
   assert(CurTokenLexer && !CurPPLexer &&
          "Ending a macro when currently in a #include file!");
 
+  if (!MacroExpandingLexersStack.empty() &&
+      MacroExpandingLexersStack.back().first == CurTokenLexer.get())
+    removeCachedMacroExpandedTokensOfLastLexer();
+
   // Delete or cache the now-dead macro expander.
   if (NumCachedTokenLexers == TokenLexerCacheSize)
     CurTokenLexer.reset();

diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index 9e8533a..d7c4def 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp

@@ -22,6 +22,7 @@
 #include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/ExternalPreprocessorSource.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdio>
@@ -490,6 +491,46 @@
                            isVarargsElided, *this);
 }
 
+/// \brief Keeps macro expanded tokens for TokenLexers.
+//
+/// Works like a stack; a TokenLexer adds the macro expanded tokens that is
+/// going to lex in the cache and when it finishes the tokens are removed
+/// from the end of the cache.
+Token *Preprocessor::cacheMacroExpandedTokens(TokenLexer *tokLexer,
+                                              llvm::ArrayRef<Token> tokens) {
+  assert(tokLexer);
+  if (tokens.empty())
+    return 0;
+
+  size_t newIndex = MacroExpandedTokens.size();
+  bool cacheNeedsToGrow = tokens.size() >
+                      MacroExpandedTokens.capacity()-MacroExpandedTokens.size(); 
+  MacroExpandedTokens.append(tokens.begin(), tokens.end());
+
+  if (cacheNeedsToGrow) {
+    // Go through all the TokenLexers whose 'Tokens' pointer points in the
+    // buffer and update the pointers to the (potential) new buffer array.
+    for (unsigned i = 0, e = MacroExpandingLexersStack.size(); i != e; ++i) {
+      TokenLexer *prevLexer;
+      size_t tokIndex;
+      llvm::tie(prevLexer, tokIndex) = MacroExpandingLexersStack[i];
+      prevLexer->Tokens = MacroExpandedTokens.data() + tokIndex;
+    }
+  }
+
+  MacroExpandingLexersStack.push_back(std::make_pair(tokLexer, newIndex));
+  return MacroExpandedTokens.data() + newIndex;
+}
+
+void Preprocessor::removeCachedMacroExpandedTokensOfLastLexer() {
+  assert(!MacroExpandingLexersStack.empty());
+  size_t tokIndex = MacroExpandingLexersStack.back().second;
+  assert(tokIndex < MacroExpandedTokens.size());
+  // Pop the cached macro expanded tokens from the end.
+  MacroExpandedTokens.resize(tokIndex);
+  MacroExpandingLexersStack.pop_back();
+}
+
 /// ComputeDATE_TIME - Compute the current time, enter it into the specified
 /// scratch buffer, then return DATELoc/TIMELoc locations with the position of
 /// the identifier tokens inserted.

diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 48a2388..2f43c8e 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp

@@ -118,6 +118,8 @@
 
 Preprocessor::~Preprocessor() {
   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
+  assert(MacroExpandingLexersStack.empty() && MacroExpandedTokens.empty() &&
+         "Preprocessor::HandleEndOfTokenLexer should have cleared those");
 
   while (!IncludeMacroStack.empty()) {
     delete IncludeMacroStack.back().TheLexer;
@@ -226,7 +228,7 @@
 }
 
 size_t Preprocessor::getTotalMemory() const {
-  return BP.getTotalMemory();
+  return BP.getTotalMemory() + MacroExpandedTokens.capacity()*sizeof(Token);
 }
 
 Preprocessor::macro_iterator

diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index e7cff8b..f30c44e 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp

@@ -284,15 +284,11 @@
     assert(!OwnsTokens && "This would leak if we already own the token list");
     // This is deleted in the dtor.
     NumTokens = ResultToks.size();
-    llvm::BumpPtrAllocator &Alloc = PP.getPreprocessorAllocator();
-    Token *Res =
-      static_cast<Token *>(Alloc.Allocate(sizeof(Token)*ResultToks.size(),
-                                          llvm::alignOf<Token>()));
-    if (NumTokens)
-      memcpy(Res, &ResultToks[0], NumTokens*sizeof(Token));
-    Tokens = Res;
+    // The tokens will be added to Preprocessor's cache and will be removed
+    // when this TokenLexer finishes lexing them.
+    Tokens = PP.cacheMacroExpandedTokens(this, ResultToks);
 
-    // The preprocessor bump pointer owns these tokens, not us.
+    // The preprocessor cache of macro expanded tokens owns these tokens,not us.
     OwnsTokens = false;
   }
 }
commit	5b3284a9275a27f5c4410e25eb8933be540601d6	[log] [tgz]
author	Argyrios Kyrtzidis <akyrtzi@gmail.com>	Wed Jun 29 22:20:11 2011 +0000
committer	Argyrios Kyrtzidis <akyrtzi@gmail.com>	Wed Jun 29 22:20:11 2011 +0000
tree	de24f9c31897686380bf3b7feeaecb2e344f5064
parent	8ccac3de1335f1cfd7cea56ba1cefcf0b724ce3f [diff]