PR37189 Fix incorrect end source location and spelling for a split '>>' token.

When a '>>' token is split into two '>' tokens (in C++11 onwards), or (as an
extension) when we do the same for other tokens starting with a '>', we can't
just use a location pointing to the first '>' as the location of the split
token, because that would result in our miscomputing the length and spelling
for the token. As a consequence, for example, a refactoring replacing 'A<X>'
with something else would sometimes replace one character too many, and
similarly diagnostics highlighting a template-id source range would highlight
one character too many.

Fix this by creating an expansion range covering the first character of the
'>>' token, whose spelling is '>'. For this to work, we generalize the
expansion range of a macro FileID to be either a token range (the common case)
or a character range (used in this new case).

llvm-svn: 331155
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index 9179650..e9478d6 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -708,12 +708,9 @@
                         TheTok.isAtStartOfLine());
 }
 
-/// AdvanceToTokenCharacter - Given a location that specifies the start of a
-/// token, return a new location that specifies a character within the token.
-SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
-                                              unsigned CharNo,
-                                              const SourceManager &SM,
-                                              const LangOptions &LangOpts) {
+unsigned Lexer::getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo,
+                                     const SourceManager &SM,
+                                     const LangOptions &LangOpts) {
   // Figure out how many physical characters away the specified expansion
   // character is.  This needs to take into consideration newlines and
   // trigraphs.
@@ -722,7 +719,7 @@
 
   // If they request the first char of the token, we're trivially done.
   if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
-    return TokStart;
+    return 0;
 
   unsigned PhysOffset = 0;
 
@@ -731,7 +728,7 @@
   // chars, this method is extremely fast.
   while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
     if (CharNo == 0)
-      return TokStart.getLocWithOffset(PhysOffset);
+      return PhysOffset;
     ++TokPtr;
     --CharNo;
     ++PhysOffset;
@@ -753,7 +750,7 @@
   if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
     PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
 
-  return TokStart.getLocWithOffset(PhysOffset);
+  return PhysOffset;
 }
 
 /// \brief Computes the source location just past the end of the
@@ -987,7 +984,7 @@
 
     // Loc points to the argument id of the macro definition, move to the
     // macro expansion.
-    Loc = SM.getImmediateExpansionRange(Loc).first;
+    Loc = SM.getImmediateExpansionRange(Loc).getBegin();
     SourceLocation SpellLoc = Expansion.getSpellingLoc();
     if (SpellLoc.isFileID())
       break; // No inner macro.
@@ -1020,7 +1017,7 @@
   assert(Loc.isMacroID() && "Only reasonable to call this on macros");
   // Walk past macro argument expanions.
   while (SM.isMacroArgExpansion(Loc))
-    Loc = SM.getImmediateExpansionRange(Loc).first;
+    Loc = SM.getImmediateExpansionRange(Loc).getBegin();
 
   // If the macro's spelling has no FileID, then it's actually a token paste
   // or stringization (or similar) and not a macro at all.
@@ -1030,7 +1027,7 @@
   // Find the spelling location of the start of the non-argument expansion
   // range. This is where the macro name was spelled in order to begin
   // expanding this macro.
-  Loc = SM.getSpellingLoc(SM.getImmediateExpansionRange(Loc).first);
+  Loc = SM.getSpellingLoc(SM.getImmediateExpansionRange(Loc).getBegin());
 
   // Dig out the buffer where the macro name was spelled and the extents of the
   // name so that we can render it into the expansion note.
@@ -1112,10 +1109,9 @@
 
   // Figure out the expansion loc range, which is the range covered by the
   // original _Pragma(...) sequence.
-  std::pair<SourceLocation,SourceLocation> II =
-    SM.getImmediateExpansionRange(FileLoc);
+  CharSourceRange II = SM.getImmediateExpansionRange(FileLoc);
 
-  return SM.createExpansionLoc(SpellingLoc, II.first, II.second, TokLen);
+  return SM.createExpansionLoc(SpellingLoc, II.getBegin(), II.getEnd(), TokLen);
 }
 
 /// getSourceLocation - Return a source location identifier for the specified
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index e1a0438..31c8e13 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -2043,7 +2043,7 @@
   // If the filename string was the result of macro expansions, set the include
   // position on the file where it will be included and after the expansions.
   if (IncludePos.isMacroID())
-    IncludePos = SourceMgr.getExpansionRange(IncludePos).second;
+    IncludePos = SourceMgr.getExpansionRange(IncludePos).getEnd();
   FileID FID = SourceMgr.createFileID(File, IncludePos, FileCharacter);
   assert(FID.isValid() && "Expected valid file ID");
 
diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp
index aa8414f..edf508a 100644
--- a/clang/lib/Lex/PPMacroExpansion.cpp
+++ b/clang/lib/Lex/PPMacroExpansion.cpp
@@ -1690,7 +1690,7 @@
     // can matter for a function-like macro that expands to contain __LINE__.
     // Skip down through expansion points until we find a file loc for the
     // end of the expansion history.
-    Loc = SourceMgr.getExpansionRange(Loc).second;
+    Loc = SourceMgr.getExpansionRange(Loc).getEnd();
     PresumedLoc PLoc = SourceMgr.getPresumedLoc(Loc);
 
     // __LINE__ expands to a simple numeric value.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 4370bc1..94490bd 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -487,6 +487,22 @@
     Tok.setLiteralData(DestPtr);
 }
 
+SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
+  auto &SM = getSourceManager();
+  SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
+  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(SpellingLoc);
+  bool Invalid = false;
+  StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+  if (Invalid)
+    return SourceLocation();
+
+  // FIXME: We could consider re-using spelling for tokens we see repeatedly.
+  const char *DestPtr;
+  SourceLocation Spelling =
+      ScratchBuf->getToken(Buffer.data() + LocInfo.second, Length, DestPtr);
+  return SM.createTokenSplitLoc(Spelling, Loc, Loc.getLocWithOffset(Length));
+}
+
 Module *Preprocessor::getCurrentModule() {
   if (!getLangOpts().isCompilingModule())
     return nullptr;
diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index 6bf0b84..724ccb3 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -865,9 +865,9 @@
     EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
   FileID MacroFID = SM.getFileID(MacroExpansionStart);
   while (SM.getFileID(StartLoc) != MacroFID)
-    StartLoc = SM.getImmediateExpansionRange(StartLoc).first;
+    StartLoc = SM.getImmediateExpansionRange(StartLoc).getBegin();
   while (SM.getFileID(EndLoc) != MacroFID)
-    EndLoc = SM.getImmediateExpansionRange(EndLoc).second;
+    EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd();
     
   LHSTok.setLocation(SM.createExpansionLoc(LHSTok.getLocation(), StartLoc, EndLoc,
                                         LHSTok.getLength()));