Introduced raw_identifier token kind.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@122394 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index da68495..5d9536f 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -266,21 +266,23 @@
                             const SourceManager &SourceMgr,
                             const LangOptions &Features, bool *Invalid) {
   assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
-  
-  // If this token is an identifier, just return the string from the identifier
-  // table, which is very quick.
-  if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+
+  const char *TokStart = 0;
+  // NOTE: this has to be checked *before* testing for an IdentifierInfo.
+  if (Tok.is(tok::raw_identifier))
+    TokStart = Tok.getRawIdentifierData();
+  else if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+    // Just return the string from the identifier table, which is very quick.
     Buffer = II->getNameStart();
     return II->getLength();
   }
-  
-  // Otherwise, compute the start of the token in the input lexer buffer.
-  const char *TokStart = 0;
-  
+
+  // NOTE: this can be checked even after testing for an IdentifierInfo.
   if (Tok.isLiteral())
     TokStart = Tok.getLiteralData();
-  
+
   if (TokStart == 0) {
+    // Compute the start of the token in the input lexer buffer.
     bool CharDataInvalid = false;
     TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
     if (Invalid)
@@ -290,13 +292,13 @@
       return 0;
     }
   }
-  
+
   // If this token contains nothing interesting, return it directly.
   if (!Tok.needsCleaning()) {
     Buffer = TokStart;
     return Tok.getLength();
   }
-  
+
   // Otherwise, hard case, relex the characters into the string.
   char *OutBuf = const_cast<char*>(Buffer);
   for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
@@ -307,7 +309,7 @@
   }
   assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
          "NeedsCleaning flag set on something that didn't need cleaning!");
-  
+
   return OutBuf-Buffer;
 }
 
@@ -473,10 +475,9 @@
       // we don't have an identifier table available. Instead, just look at
       // the raw identifier to recognize and categorize preprocessor directives.
       TheLexer.LexFromRawLexer(TheTok);
-      if (TheTok.getKind() == tok::identifier && !TheTok.needsCleaning()) {
-        const char *IdStart = Buffer->getBufferStart() 
-                            + TheTok.getLocation().getRawEncoding() - 1;
-        llvm::StringRef Keyword(IdStart, TheTok.getLength());
+      if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {
+        llvm::StringRef Keyword(TheTok.getRawIdentifierData(),
+                                TheTok.getLength());
         PreambleDirectiveKind PDK
           = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
               .Case("include", PDK_Skipped)
@@ -1046,19 +1047,17 @@
   if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
 FinishIdentifier:
     const char *IdStart = BufferPtr;
-    FormTokenWithChars(Result, CurPtr, tok::identifier);
+    FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
+    Result.setRawIdentifierData(IdStart);
 
     // If we are in raw mode, return this identifier raw.  There is no need to
     // look up identifier information or attempt to macro expand it.
-    if (LexingRawMode) return;
+    if (LexingRawMode)
+      return;
 
-    // Fill in Result.IdentifierInfo, looking up the identifier in the
-    // identifier table.
-    IdentifierInfo *II = PP->LookUpIdentifierInfo(Result, IdStart);
-
-    // Change the kind of this identifier to the appropriate token kind, e.g.
-    // turning "for" into a keyword.
-    Result.setKind(II->getTokenID());
+    // Fill in Result.IdentifierInfo and update the token kind,
+    // looking up the identifier in the identifier table.
+    IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
 
     // Finally, now that we know we have an identifier, pass this off to the
     // preprocessor, which may macro expand it or something.
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index 467d485..5b65fd3 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -245,7 +245,7 @@
 
     // If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
     // something bogus), skip it.
-    if (Tok.isNot(tok::identifier)) {
+    if (Tok.isNot(tok::raw_identifier)) {
       CurPPLexer->ParsingPreprocessorDirective = false;
       // Restore comment saving mode.
       if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
@@ -257,12 +257,8 @@
     // to spell an i/e in a strange way that is another letter.  Skipping this
     // allows us to avoid looking up the identifier info for #define/#undef and
     // other common directives.
-    bool Invalid = false;
-    const char *RawCharData = SourceMgr.getCharacterData(Tok.getLocation(),
-                                                         &Invalid);
-    if (Invalid)
-      return;
-    
+    const char *RawCharData = Tok.getRawIdentifierData();
+
     char FirstChar = RawCharData[0];
     if (FirstChar >= 'a' && FirstChar <= 'z' &&
         FirstChar != 'i' && FirstChar != 'e') {
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index e6a53a1..da66b50 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -292,7 +292,7 @@
     if (Tok.is(tok::eom)) return;
 
     // Can only poison identifiers.
-    if (Tok.isNot(tok::identifier)) {
+    if (Tok.isNot(tok::raw_identifier)) {
       Diag(Tok, diag::err_pp_invalid_poison);
       return;
     }
@@ -599,7 +599,7 @@
   // Create a Token from the string.
   Token MacroTok;
   MacroTok.startToken();
-  MacroTok.setKind(tok::identifier);
+  MacroTok.setKind(tok::raw_identifier);
   CreateString(&StrVal[1], StrVal.size() - 2, MacroTok);
 
   // Get the IdentifierInfo of MacroToPushTok.
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 2d8f1a5..6fe414b 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -285,9 +285,12 @@
 llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
                                           llvm::SmallVectorImpl<char> &Buffer,
                                           bool *Invalid) const {
-  // Try the fast path.
-  if (const IdentifierInfo *II = Tok.getIdentifierInfo())
-    return II->getName();
+  // NOTE: this has to be checked *before* testing for an IdentifierInfo.
+  if (Tok.isNot(tok::raw_identifier)) {
+    // Try the fast path.
+    if (const IdentifierInfo *II = Tok.getIdentifierInfo())
+      return II->getName();
+  }
 
   // Resize the buffer if we need to copy into it.
   if (Tok.needsCleaning())
@@ -313,8 +316,10 @@
                                            InstantiationLoc, Len);
   Tok.setLocation(Loc);
 
-  // If this is a literal token, set the pointer data.
-  if (Tok.isLiteral())
+  // If this is a raw identifier or a literal token, set the pointer data.
+  if (Tok.is(tok::raw_identifier))
+    Tok.setRawIdentifierData(DestPtr);
+  else if (Tok.isLiteral())
     Tok.setLiteralData(DestPtr);
 }
 
@@ -369,25 +374,29 @@
 // Lexer Event Handling.
 //===----------------------------------------------------------------------===//
 
-/// LookUpIdentifierInfo - Given a tok::identifier token, look up the
-/// identifier information for the token and install it into the token.
-IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier,
-                                                   const char *BufPtr) const {
-  assert(Identifier.is(tok::identifier) && "Not an identifier!");
-  assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!");
+/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
+/// identifier information for the token and install it into the token,
+/// updating the token kind accordingly.
+IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
+  assert(Identifier.getRawIdentifierData() != 0 && "No raw identifier data!");
 
   // Look up this token, see if it is a macro, or if it is a language keyword.
   IdentifierInfo *II;
-  if (BufPtr && !Identifier.needsCleaning()) {
+  if (!Identifier.needsCleaning()) {
     // No cleaning needed, just use the characters from the lexed buffer.
-    II = getIdentifierInfo(llvm::StringRef(BufPtr, Identifier.getLength()));
+    II = getIdentifierInfo(llvm::StringRef(Identifier.getRawIdentifierData(),
+                                           Identifier.getLength()));
   } else {
     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
     llvm::SmallString<64> IdentifierBuffer;
     llvm::StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
     II = getIdentifierInfo(CleanedStr);
   }
+
+  // Update the token info (identifier info and appropriate token kind).
   Identifier.setIdentifierInfo(II);
+  Identifier.setKind(II->getTokenID());
+
   return II;
 }
 
diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index fc6db21..3e9e855 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp
@@ -13,6 +13,7 @@
 
 #include "clang/Lex/TokenConcatenation.h"
 #include "clang/Lex/Preprocessor.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace clang;
 
 
@@ -165,7 +166,14 @@
   }
 
   switch (PrevKind) {
-  default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
+  default:
+    llvm_unreachable("InitAvoidConcatTokenInfo built wrong");
+    return true;
+
+  case tok::raw_identifier:
+    llvm_unreachable("tok::raw_identifier in non-raw lexing mode!");
+    return true;
+
   case tok::identifier:   // id+id or id+number or id+L"foo".
     // id+'.'... will not append.
     if (Tok.is(tok::numeric_constant))
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index a0e5ae3..ea39b47 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -435,12 +435,13 @@
     // Lex the resultant pasted token into Result.
     Token Result;
 
-    if (Tok.is(tok::identifier) && RHS.is(tok::identifier)) {
+    if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) {
       // Common paste case: identifier+identifier = identifier.  Avoid creating
       // a lexer and other overhead.
       PP.IncrementPasteCounter(true);
       Result.startToken();
-      Result.setKind(tok::identifier);
+      Result.setKind(tok::raw_identifier);
+      Result.setRawIdentifierData(ResultTokStrPtr);
       Result.setLocation(ResultTokLoc);
       Result.setLength(LHSLen+RHSLen);
     } else {
@@ -524,10 +525,10 @@
   // Now that we got the result token, it will be subject to expansion.  Since
   // token pasting re-lexes the result token in raw mode, identifier information
   // isn't looked up.  As such, if the result is an identifier, look up id info.
-  if (Tok.is(tok::identifier)) {
+  if (Tok.is(tok::raw_identifier)) {
     // Look up the identifier info for the token.  We disabled identifier lookup
     // by saying we're skipping contents, so we need to do this manually.
-    PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr);
+    PP.LookUpIdentifierInfo(Tok);
   }
   return false;
 }