PR37189 Fix incorrect end source location and spelling for a split '>>' token.

When a '>>' token is split into two '>' tokens (in C++11 onwards), or (as an
extension) when we do the same for other tokens starting with a '>', we can't
just use a location pointing to the first '>' as the location of the split
token, because that would result in our miscomputing the length and spelling
for the token. As a consequence, for example, a refactoring replacing 'A<X>'
with something else would sometimes replace one character too many, and
similarly diagnostics highlighting a template-id source range would highlight
one character too many.

Fix this by creating an expansion range covering the first character of the
'>>' token, whose spelling is '>'. For this to work, we generalize the
expansion range of a macro FileID to be either a token range (the common case)
or a character range (used in this new case).

llvm-svn: 331155
diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp
index 88a5745..d089b24 100644
--- a/clang/lib/Parse/ParseTemplate.cpp
+++ b/clang/lib/Parse/ParseTemplate.cpp
@@ -775,6 +775,7 @@
   // What will be left once we've consumed the '>'.
   tok::TokenKind RemainingToken;
   const char *ReplacementStr = "> >";
+  bool MergeWithNextToken = false;
 
   switch (Tok.getKind()) {
   default:
@@ -800,6 +801,15 @@
   case tok::greaterequal:
     RemainingToken = tok::equal;
     ReplacementStr = "> =";
+
+    // Join two adjacent '=' tokens into one, for cases like:
+    //   void (*p)() = f<int>;
+    //   return f<int>==p;
+    if (NextToken().is(tok::equal) &&
+        areTokensAdjacent(Tok, NextToken())) {
+      RemainingToken = tok::equalequal;
+      MergeWithNextToken = true;
+    }
     break;
 
   case tok::greatergreaterequal:
@@ -807,22 +817,35 @@
     break;
   }
 
-  // This template-id is terminated by a token which starts with a '>'. Outside
-  // C++11, this is now error recovery, and in C++11, this is error recovery if
-  // the token isn't '>>' or '>>>'.
-  // '>>>' is for CUDA, where this sequence of characters is parsed into
-  // tok::greatergreatergreater, rather than two separate tokens.
+  // This template-id is terminated by a token that starts with a '>'.
+  // Outside C++11 and Objective-C, this is now error recovery.
   //
-  // We always allow this for Objective-C type parameter and type argument
-  // lists.
-  RAngleLoc = Tok.getLocation();
+  // C++11 allows this when the token is '>>', and in CUDA + C++11 mode, we
+  // extend that treatment to also apply to the '>>>' token.
+  //
+  // Objective-C allows this in its type parameter / argument lists.
+
+  SourceLocation TokBeforeGreaterLoc = PrevTokLocation;
+  SourceLocation TokLoc = Tok.getLocation();
   Token Next = NextToken();
+
+  // Whether splitting the current token after the '>' would undesirably result
+  // in the remaining token pasting with the token after it. This excludes the
+  // MergeWithNextToken cases, which we've already handled.
+  bool PreventMergeWithNextToken =
+      (RemainingToken == tok::greater ||
+       RemainingToken == tok::greatergreater) &&
+      (Next.isOneOf(tok::greater, tok::greatergreater,
+                    tok::greatergreatergreater, tok::equal, tok::greaterequal,
+                    tok::greatergreaterequal, tok::equalequal)) &&
+      areTokensAdjacent(Tok, Next);
+
+  // Diagnose this situation as appropriate.
   if (!ObjCGenericList) {
-    // The source range of the '>>' or '>=' at the start of the token.
-    CharSourceRange ReplacementRange =
-        CharSourceRange::getCharRange(RAngleLoc,
-            Lexer::AdvanceToTokenCharacter(RAngleLoc, 2, PP.getSourceManager(),
-                                           getLangOpts()));
+    // The source range of the replaced token(s).
+    CharSourceRange ReplacementRange = CharSourceRange::getCharRange(
+        TokLoc, Lexer::AdvanceToTokenCharacter(TokLoc, 2, PP.getSourceManager(),
+                                               getLangOpts()));
 
     // A hint to put a space between the '>>'s. In order to make the hint as
     // clear as possible, we include the characters either side of the space in
@@ -833,13 +856,7 @@
     // A hint to put another space after the token, if it would otherwise be
     // lexed differently.
     FixItHint Hint2;
-    if ((RemainingToken == tok::greater ||
-         RemainingToken == tok::greatergreater) &&
-        (Next.isOneOf(tok::greater, tok::greatergreater,
-                      tok::greatergreatergreater, tok::equal,
-                      tok::greaterequal, tok::greatergreaterequal,
-                      tok::equalequal)) &&
-        areTokensAdjacent(Tok, Next))
+    if (PreventMergeWithNextToken)
       Hint2 = FixItHint::CreateInsertion(Next.getLocation(), " ");
 
     unsigned DiagId = diag::err_two_right_angle_brackets_need_space;
@@ -848,50 +865,63 @@
       DiagId = diag::warn_cxx98_compat_two_right_angle_brackets;
     else if (Tok.is(tok::greaterequal))
       DiagId = diag::err_right_angle_bracket_equal_needs_space;
-    Diag(Tok.getLocation(), DiagId) << Hint1 << Hint2;
+    Diag(TokLoc, DiagId) << Hint1 << Hint2;
   }
 
+  // Find the "length" of the resulting '>' token. This is not always 1, as it
+  // can contain escaped newlines.
+  unsigned GreaterLength = Lexer::getTokenPrefixLength(
+      TokLoc, 1, PP.getSourceManager(), getLangOpts());
+
+  // Annotate the source buffer to indicate that we split the token after the
+  // '>'. This allows us to properly find the end of, and extract the spelling
+  // of, the '>' token later.
+  RAngleLoc = PP.SplitToken(TokLoc, GreaterLength);
+
   // Strip the initial '>' from the token.
-  Token PrevTok = Tok;
-  if (RemainingToken == tok::equal && Next.is(tok::equal) &&
-      areTokensAdjacent(Tok, Next)) {
-    // Join two adjacent '=' tokens into one, for cases like:
-    //   void (*p)() = f<int>;
-    //   return f<int>==p;
+  bool CachingTokens = PP.IsPreviousCachedToken(Tok);
+
+  Token Greater = Tok;
+  Greater.setLocation(RAngleLoc);
+  Greater.setKind(tok::greater);
+  Greater.setLength(GreaterLength);
+
+  unsigned OldLength = Tok.getLength();
+  if (MergeWithNextToken) {
     ConsumeToken();
-    Tok.setKind(tok::equalequal);
-    Tok.setLength(Tok.getLength() + 1);
-  } else {
-    Tok.setKind(RemainingToken);
-    Tok.setLength(Tok.getLength() - 1);
+    OldLength += Tok.getLength();
   }
-  Tok.setLocation(Lexer::AdvanceToTokenCharacter(RAngleLoc, 1,
-                                                 PP.getSourceManager(),
-                                                 getLangOpts()));
 
-  // The advance from '>>' to '>' in a ObjectiveC template argument list needs
-  // to be properly reflected in the token cache to allow correct interaction
-  // between annotation and backtracking.
-  if (ObjCGenericList && PrevTok.getKind() == tok::greatergreater &&
-      RemainingToken == tok::greater && PP.IsPreviousCachedToken(PrevTok)) {
-    PrevTok.setKind(RemainingToken);
-    PrevTok.setLength(1);
-    // Break tok::greatergreater into two tok::greater but only add the second
-    // one in case the client asks to consume the last token.
+  Tok.setKind(RemainingToken);
+  Tok.setLength(OldLength - GreaterLength);
+
+  // Split the second token if lexing it normally would lex a different token
+  // (eg, the fifth token in 'A<B>>>' should re-lex as '>', not '>>').
+  SourceLocation AfterGreaterLoc = TokLoc.getLocWithOffset(GreaterLength);
+  if (PreventMergeWithNextToken)
+    AfterGreaterLoc = PP.SplitToken(AfterGreaterLoc, Tok.getLength());
+  Tok.setLocation(AfterGreaterLoc);
+
+  // Update the token cache to match what we just did if necessary.
+  if (CachingTokens) {
+    // If the previous cached token is being merged, delete it.
+    if (MergeWithNextToken)
+      PP.ReplacePreviousCachedToken({});
+
     if (ConsumeLastToken)
-      PP.ReplacePreviousCachedToken({PrevTok, Tok});
+      PP.ReplacePreviousCachedToken({Greater, Tok});
     else
-      PP.ReplacePreviousCachedToken({PrevTok});
+      PP.ReplacePreviousCachedToken({Greater});
   }
 
-  if (!ConsumeLastToken) {
-    // Since we're not supposed to consume the '>' token, we need to push
-    // this token and revert the current token back to the '>'.
+  if (ConsumeLastToken) {
+    PrevTokLocation = RAngleLoc;
+  } else {
+    PrevTokLocation = TokBeforeGreaterLoc;
     PP.EnterToken(Tok);
-    Tok.setKind(tok::greater);
-    Tok.setLength(1);
-    Tok.setLocation(RAngleLoc);
+    Tok = Greater;
   }
+
   return false;
 }