[clangd] Implement getBeginning for overloaded operators.

Summary:
This will fix some bugs where navigation doesn't work on cases like
`std::cout <^< "hello"`.

Reviewers: ilya-biryukov

Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D67695

llvm-svn: 373323
diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp
index ee0b74a..05ca7aa 100644
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@@ -237,6 +237,45 @@
   return halfOpenToRange(SM, CharSourceRange::getCharRange(TokLoc, End));
 }
 
+namespace {
+
+enum TokenFlavor { Identifier, Operator, Whitespace, Other };
+
+bool isOverloadedOperator(const Token &Tok) {
+  switch (Tok.getKind()) {
+#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemOnly)     \
+  case tok::Token:
+#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemOnly)
+#include "clang/Basic/OperatorKinds.def"
+    return true;
+
+  default:
+    break;
+  }
+  return false;
+}
+
+TokenFlavor getTokenFlavor(SourceLocation Loc, const SourceManager &SM,
+                           const LangOptions &LangOpts) {
+  Token Tok;
+  Tok.setKind(tok::NUM_TOKENS);
+  if (Lexer::getRawToken(Loc, Tok, SM, LangOpts,
+                         /*IgnoreWhiteSpace*/ false))
+    return Other;
+
+  // getRawToken will return false without setting Tok when the token is
+  // whitespace, so if the flag is not set, we are sure this is a whitespace.
+  if (Tok.is(tok::TokenKind::NUM_TOKENS))
+    return Whitespace;
+  if (Tok.is(tok::TokenKind::raw_identifier))
+    return Identifier;
+  if (isOverloadedOperator(Tok))
+    return Operator;
+  return Other;
+}
+
+} // namespace
+
 SourceLocation getBeginningOfIdentifier(const Position &Pos,
                                         const SourceManager &SM,
                                         const LangOptions &LangOpts) {
@@ -247,27 +286,57 @@
     return SourceLocation();
   }
 
-  // GetBeginningOfToken(pos) is almost what we want, but does the wrong thing
-  // if the cursor is at the end of the identifier.
-  // Instead, we lex at GetBeginningOfToken(pos - 1). The cases are:
-  //  1) at the beginning of an identifier, we'll be looking at something
-  //  that isn't an identifier.
-  //  2) at the middle or end of an identifier, we get the identifier.
-  //  3) anywhere outside an identifier, we'll get some non-identifier thing.
-  // We can't actually distinguish cases 1 and 3, but returning the original
-  // location is correct for both!
+  // GetBeginningOfToken(InputLoc) is almost what we want, but does the wrong
+  // thing if the cursor is at the end of the token (identifier or operator).
+  // The cases are:
+  //   1) at the beginning of the token
+  //   2) at the middle of the token
+  //   3) at the end of the token
+  //   4) anywhere outside the identifier or operator
+  // To distinguish all cases, we lex both at the
+  // GetBeginningOfToken(InputLoc-1) and GetBeginningOfToken(InputLoc), for
+  // cases 1 and 4, we just return the original location.
   SourceLocation InputLoc = SM.getComposedLoc(FID, *Offset);
-  if (*Offset == 0) // Case 1 or 3.
+  if (*Offset == 0) // Case 1 or 4.
     return InputLoc;
   SourceLocation Before = SM.getComposedLoc(FID, *Offset - 1);
+  SourceLocation BeforeTokBeginning =
+      Lexer::GetBeginningOfToken(Before, SM, LangOpts);
+  TokenFlavor BeforeKind = getTokenFlavor(BeforeTokBeginning, SM, LangOpts);
 
-  Before = Lexer::GetBeginningOfToken(Before, SM, LangOpts);
-  Token Tok;
-  if (Before.isValid() &&
-      !Lexer::getRawToken(Before, Tok, SM, LangOpts, false) &&
-      Tok.is(tok::raw_identifier))
-    return Before; // Case 2.
-  return InputLoc; // Case 1 or 3.
+  SourceLocation CurrentTokBeginning =
+      Lexer::GetBeginningOfToken(InputLoc, SM, LangOpts);
+  TokenFlavor CurrentKind = getTokenFlavor(CurrentTokBeginning, SM, LangOpts);
+
+  // At the middle of the token.
+  if (BeforeTokBeginning == CurrentTokBeginning) {
+    // For interesting token, we return the beginning of the token.
+    if (CurrentKind == Identifier || CurrentKind == Operator)
+      return CurrentTokBeginning;
+    // otherwise, we return the original loc.
+    return InputLoc;
+  }
+
+  // Whitespace is not interesting.
+  if (BeforeKind == Whitespace)
+    return CurrentTokBeginning;
+  if (CurrentKind == Whitespace)
+    return BeforeTokBeginning;
+
+  // The cursor is at the token boundary, e.g. "Before^Current", we prefer
+  // identifiers to other tokens.
+  if (CurrentKind == Identifier)
+    return CurrentTokBeginning;
+  if (BeforeKind == Identifier)
+    return BeforeTokBeginning;
+  // Then prefer overloaded operators to other tokens.
+  if (CurrentKind == Operator)
+    return CurrentTokBeginning;
+  if (BeforeKind == Operator)
+    return BeforeTokBeginning;
+
+  // Non-interesting case, we just return the original location.
+  return InputLoc;
 }
 
 bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {