This change refactors some of the low-level lexer interfaces a bit.
Token now has a class of kinds for "literals", which include 
numeric constants, strings, etc.  These tokens can optionally have
a pointer to the start of the token in the lexer buffer.  This 
makes it faster to get spelling and do other gymnastics, because we
don't have to go through source locations.

This change is performance neutral, but will make other changes
more feasible down the road.



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@63028 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 9e8d1aa..03d81b3 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -624,7 +624,9 @@
     return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
   
   // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
+  Result.setLiteralData(TokStart);
 }
 
 /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
@@ -655,8 +657,10 @@
     Diag(NulCharacter, diag::null_in_string);
 
   // Update the location of the token as well as the BufferPtr instance var.
+  const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr,
                      Wide ? tok::wide_string_literal : tok::string_literal);
+  Result.setLiteralData(TokStart);
 }
 
 /// LexAngledStringLiteral - Lex the remainder of an angled string literal,
@@ -687,7 +691,9 @@
     Diag(NulCharacter, diag::null_in_string);
   
   // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
+  Result.setLiteralData(TokStart);
 }
 
 
@@ -735,7 +741,9 @@
     Diag(NulCharacter, diag::null_in_char);
 
   // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
   FormTokenWithChars(Result, CurPtr, tok::char_constant);
+  Result.setLiteralData(TokStart);
 }
 
 /// SkipWhitespace - Efficiently skip over a series of whitespace characters.
@@ -901,9 +909,8 @@
   Spelling += "*/";    // add suffix.
   
   Result.setKind(tok::comment);
-  Result.setLocation(PP->CreateString(&Spelling[0], Spelling.size(),
-                                      Result.getLocation()));
-  Result.setLength(Spelling.size());
+  PP->CreateString(&Spelling[0], Spelling.size(), Result,
+                   Result.getLocation());
   return true;
 }
 
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp
index 77c96e0..c3d7738 100644
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -225,8 +225,7 @@
     }
   }
   
-  Tok.setLength(Result.size());
-  Tok.setLocation(PP.CreateString(&Result[0], Result.size()));
+  PP.CreateString(&Result[0], Result.size(), Tok);
   return Tok;
 }
 
diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp
index ed67754..53aa09c 100644
--- a/lib/Lex/PPCaching.cpp
+++ b/lib/Lex/PPCaching.cpp
@@ -89,7 +89,7 @@
 }
 
 void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) {
-  assert(Tok.isAnnotationToken() && "Expected annotation token");
+  assert(Tok.isAnnotation() && "Expected annotation token");
   assert(CachedLexPos != 0 && "Expected to have some cached tokens");
   assert(CachedTokens[CachedLexPos-1].getLocation() == Tok.getAnnotationEndLoc()
          && "The annotation should be until the most recent cached token");
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index b14df73..b3e921c 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -429,10 +429,15 @@
   char TmpBuffer[100];
   sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday, 
           TM->tm_year+1900);
-  DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
+  
+  Token TmpTok;
+  TmpTok.startToken();
+  PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
+  DATELoc = TmpTok.getLocation();
 
   sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec);
-  TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
+  PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
+  TIMELoc = TmpTok.getLocation();
 }
 
 /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
@@ -463,8 +468,8 @@
             SourceMgr.getInstantiationLineNumber(Tok.getLocation()));
     unsigned Length = strlen(TmpBuffer)-1;
     Tok.setKind(tok::numeric_constant);
-    Tok.setLength(Length);
-    Tok.setLocation(CreateString(TmpBuffer, Length+1, Tok.getLocation()));
+    CreateString(TmpBuffer, Length+1, Tok, Tok.getLocation());
+    Tok.setLength(Length);  // Trim off space.
   } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
     SourceLocation Loc = Tok.getLocation();
     if (II == Ident__BASE_FILE__) {
@@ -480,8 +485,7 @@
     std::string FN =SourceMgr.getSourceName(SourceMgr.getInstantiationLoc(Loc));
     FN = '"' + Lexer::Stringify(FN) + '"';
     Tok.setKind(tok::string_literal);
-    Tok.setLength(FN.size());
-    Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation()));
+    CreateString(&FN[0], FN.size(), Tok, Tok.getLocation());
   } else if (II == Ident__DATE__) {
     if (!DATELoc.isValid())
       ComputeDATE_TIME(DATELoc, TIMELoc, *this);
@@ -511,8 +515,8 @@
     sprintf(TmpBuffer, "%u ", Depth);
     unsigned Length = strlen(TmpBuffer)-1;
     Tok.setKind(tok::numeric_constant);
-    Tok.setLength(Length);
-    Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation()));
+    CreateString(TmpBuffer, Length, Tok, Tok.getLocation());
+    Tok.setLength(Length);  // Trim off space.
   } else if (II == Ident__TIMESTAMP__) {
     // MSVC, ICC, GCC, VisualAge C++ extension.  The generated string should be
     // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
@@ -540,8 +544,8 @@
     unsigned Len = strlen(TmpBuffer);
     TmpBuffer[Len-1] = '"';  // Replace the newline with a quote.
     Tok.setKind(tok::string_literal);
-    Tok.setLength(Len);
-    Tok.setLocation(CreateString(TmpBuffer, Len+1, Tok.getLocation()));
+    CreateString(TmpBuffer, Len+1, Tok, Tok.getLocation());
+    Tok.setLength(Len);  // Trim off space.
   } else {
     assert(0 && "Unknown identifier!");
   }
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index 922af09..a80ba6b 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -156,7 +156,10 @@
   
   // Plop the string (including the newline and trailing null) into a buffer
   // where we can lex it.
-  SourceLocation TokLoc = CreateString(&StrVal[0], StrVal.size());
+  Token TmpTok;
+  TmpTok.startToken();
+  CreateString(&StrVal[0], StrVal.size(), TmpTok);
+  SourceLocation TokLoc = TmpTok.getLocation();
 
   // Make and enter a lexer object so that we lex and expand the tokens just
   // like any others.
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index d0a15e4..cb0c850 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -266,13 +266,20 @@
   }
 
   // Otherwise, compute the start of the token in the input lexer buffer.
-  const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  const char *TokStart = 0;
+  
+  if (Tok.isLiteral())
+    TokStart = Tok.getLiteralData();
+  
+  if (TokStart == 0)
+    TokStart = SourceMgr.getCharacterData(Tok.getLocation());
 
   // If this token contains nothing interesting, return it directly.
   if (!Tok.needsCleaning()) {
     Buffer = TokStart;
     return Tok.getLength();
   }
+  
   // Otherwise, hard case, relex the characters into the string.
   char *OutBuf = const_cast<char*>(Buffer);
   for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
@@ -291,11 +298,20 @@
 /// CreateString - Plop the specified string into a scratch buffer and return a
 /// location for it.  If specified, the source location provides a source
 /// location for the token.
-SourceLocation Preprocessor::
-CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) {
-  if (SLoc.isValid())
-    return ScratchBuf->getToken(Buf, Len, SLoc);
-  return ScratchBuf->getToken(Buf, Len);
+void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok,
+                                SourceLocation InstantiationLoc) {
+  Tok.setLength(Len);
+  
+  const char *DestPtr;
+  SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr);
+  
+  if (InstantiationLoc.isValid())
+    Loc = SourceMgr.createInstantiationLoc(Loc, InstantiationLoc, Len);
+  Tok.setLocation(Loc);
+  
+  // If this is a literal token, set the pointer data.
+  if (Tok.isLiteral())
+    Tok.setLiteralData(DestPtr);
 }
 
 
diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp
index 695a536..9253bc0 100644
--- a/lib/Lex/ScratchBuffer.cpp
+++ b/lib/Lex/ScratchBuffer.cpp
@@ -30,10 +30,14 @@
 /// return a SourceLocation that refers to the token.  This is just like the
 /// method below, but returns a location that indicates the physloc of the
 /// token.
-SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) {
+SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
+                                       const char *&DestPtr) {
   if (BytesUsed+Len > ScratchBufSize)
     AllocScratchBuffer(Len);
   
+  // Return a pointer to the character data.
+  DestPtr = CurBuffer+BytesUsed;
+  
   // Copy the token data into the buffer.
   memcpy(CurBuffer+BytesUsed, Buf, Len);
 
@@ -43,16 +47,6 @@
   return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len);
 }
 
-
-/// getToken - Splat the specified text into a temporary MemoryBuffer and
-/// return a SourceLocation that refers to the token.  The SourceLoc value
-/// gives a virtual location that the token will appear to be from.
-SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
-                                       SourceLocation SourceLoc) {
-  // Map the physloc to the specified sourceloc.
-  return SourceMgr.createInstantiationLoc(getToken(Buf, Len), SourceLoc, Len);
-}
-
 void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
   // Only pay attention to the requested length if it is larger than our default
   // page size.  If it is, we allocate an entire chunk for it.  This is to
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index 3707ef1..7ae61be 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -352,6 +352,7 @@
 /// If this returns true, the caller should immediately return the token.
 bool TokenLexer::PasteTokens(Token &Tok) {
   llvm::SmallVector<char, 128> Buffer;
+  const char *ResultTokStrPtr = 0;
   do {
     // Consume the ## operator.
     SourceLocation PasteOpLoc = Tokens[CurToken].getLocation();
@@ -386,8 +387,16 @@
     
     // Plop the pasted result (including the trailing newline and null) into a
     // scratch buffer where we can lex it.
-    SourceLocation ResultTokLoc = PP.CreateString(&Buffer[0], Buffer.size());
+    Token ResultTokTmp;
+    ResultTokTmp.startToken();
     
+    // Claim that the tmp token is a string_literal so that we can get the
+    // character pointer back from CreateString.
+    ResultTokTmp.setKind(tok::string_literal);
+    PP.CreateString(&Buffer[0], Buffer.size(), ResultTokTmp);
+    SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
+    ResultTokStrPtr = ResultTokTmp.getLiteralData();
+
     // Lex the resultant pasted token into Result.
     Token Result;
     
@@ -405,20 +414,16 @@
       assert(ResultTokLoc.isFileID() &&
              "Should be a raw location into scratch buffer");
       SourceManager &SourceMgr = PP.getSourceManager();
-      std::pair<FileID, unsigned> LocInfo =
-        SourceMgr.getDecomposedLoc(ResultTokLoc);
+      FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
       
-      const char *ScratchBufStart =SourceMgr.getBufferData(LocInfo.first).first;
+      const char *ScratchBufStart = SourceMgr.getBufferData(LocFileID).first;
       
       // Make a lexer to lex this string from.  Lex just this one token.
-      const char *ResultStrData = ScratchBufStart+LocInfo.second;
-      
       // Make a lexer object so that we lex and expand the paste result.
-      Lexer TL(SourceMgr.getLocForStartOfFile(LocInfo.first),
-               PP.getLangOptions(), 
-               ScratchBufStart,
-               ResultStrData, 
-               ResultStrData+LHSLen+RHSLen /*don't include null*/);
+      Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
+               PP.getLangOptions(), ScratchBufStart,
+               ResultTokStrPtr, 
+               ResultTokStrPtr+LHSLen+RHSLen /*don't include null*/);
       
       // Lex a token in raw mode.  This way it won't look up identifiers
       // automatically, lexing off the end will return an eof token, and
@@ -442,12 +447,12 @@
           RHS.is(tok::slash)) {
         HandleMicrosoftCommentPaste(Tok);
         return true;
-      } else {
-        // TODO: If not in assembler language mode.
-        PP.Diag(PasteOpLoc, diag::err_pp_bad_paste)
-          << std::string(Buffer.begin(), Buffer.end()-1);
-        return false;
       }
+      
+      // TODO: If not in assembler language mode.
+      PP.Diag(PasteOpLoc, diag::err_pp_bad_paste)
+        << std::string(Buffer.begin(), Buffer.end()-1);
+      return false;
     }
     
     // Turn ## into 'unknown' to avoid # ## # from looking like a paste
@@ -471,7 +476,7 @@
   if (Tok.is(tok::identifier)) {
     // Look up the identifier info for the token.  We disabled identifier lookup
     // by saying we're skipping contents, so we need to do this manually.
-    Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
+    Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr));
   }
   return false;
 }
diff --git a/lib/Rewrite/TokenRewriter.cpp b/lib/Rewrite/TokenRewriter.cpp
index aab6fb0..e17e801 100644
--- a/lib/Rewrite/TokenRewriter.cpp
+++ b/lib/Rewrite/TokenRewriter.cpp
@@ -78,14 +78,15 @@
   
 
 TokenRewriter::token_iterator
-TokenRewriter::AddTokenBefore(token_iterator I, const char *Val){
+TokenRewriter::AddTokenBefore(token_iterator I, const char *Val) {
   unsigned Len = strlen(Val);
   
   // Plop the string into the scratch buffer, then create a token for this
   // string.
   Token Tok;
   Tok.startToken();
-  Tok.setLocation(ScratchBuf->getToken(Val, Len));
+  const char *Spelling;
+  Tok.setLocation(ScratchBuf->getToken(Val, Len, Spelling));
   Tok.setLength(Len);
   
   // TODO: Form a whole lexer around this and relex the token!  For now, just