Improve performance during cursor traversal when a region of interest
is present. 

Rather than using clang_getCursorExtent(), which requires
us to lex the token at the ending position to determine its
length. Then, we'd be comparing [a, b) source ranges that cover the
characters in the range rather than the normal behavior for Clang's
source ranges, which covers the tokens in the range. However, relexing
causes us to read the source file (which may come from a precompiled
header), which is rather unfortunate and affects performance.

In the new scheme, we only use Clang-style source ranges that cover
the tokens in the range. At the entry points where this matters
(clang_annotateTokens, clang_getCursor), we make sure to move source
locations to the start of the token.

Addresses most of <rdar://problem/8049381>.



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@109134 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 2f11c37..b23122c 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -248,6 +248,59 @@
   return TheTok.getLength();
 }
 
+SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc,
+                                          const SourceManager &SM,
+                                          const LangOptions &LangOpts) {
+  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc);
+  bool Invalid = false;
+  llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+  if (Invalid)
+    return Loc;
+
+  // Back up from the current location until we hit the beginning of a line
+  // (or the buffer). We'll relex from that point.
+  const char *BufStart = Buffer.data();
+  const char *StrData = BufStart+LocInfo.second;
+  if (StrData[0] == '\n' || StrData[0] == '\r')
+    return Loc;
+
+  const char *LexStart = StrData;
+  while (LexStart != BufStart) {
+    if (LexStart[0] == '\n' || LexStart[0] == '\r') {
+      ++LexStart;
+      break;
+    }
+
+    --LexStart;
+  }
+  
+  // Create a lexer starting at the beginning of this token.
+  SourceLocation LexerStartLoc = Loc.getFileLocWithOffset(-LocInfo.second);
+  Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end());
+  TheLexer.SetCommentRetentionState(true);
+  
+  // Lex tokens until we find the token that contains the source location.
+  Token TheTok;
+  do {
+    TheLexer.LexFromRawLexer(TheTok);
+    
+    if (TheLexer.getBufferLocation() > StrData) {
+      // Lexing this token has taken the lexer past the source location we're
+      // looking for. If the current token encompasses our source location,
+      // return the beginning of that token.
+      if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData)
+        return TheTok.getLocation();
+      
+      // We ended up skipping over the source location entirely, which means
+      // that it points into whitespace. We're done here.
+      break;
+    }
+  } while (TheTok.getKind() != tok::eof);
+  
+  // We've passed our source location; just return the original source location.
+  return Loc;
+}
+
 namespace {
   enum PreambleDirectiveKind {
     PDK_Skipped,