Move token length calculation out of the diagnostics machinery into
the lexer, where it can be shared.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@43090 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/Lex/Lexer.cpp b/Lex/Lexer.cpp
index c7f54ca..19dcfe2 100644
--- a/Lex/Lexer.cpp
+++ b/Lex/Lexer.cpp
@@ -163,6 +163,39 @@
}
+/// MeasureTokenLength - Relex the token at the specified location and return
+/// its length in bytes in the input file. If the token needs cleaning (e.g.
+/// includes a trigraph or an escaped newline) then this count includes bytes
+/// that are part of that.
+unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
+ const SourceManager &SM) {
+ // If this comes from a macro expansion, we really do want the macro name, not
+ // the token this macro expanded to.
+ Loc = SM.getLogicalLoc(Loc);
+
+ const char *StrData = SM.getCharacterData(Loc);
+
+ // TODO: this could be special cased for common tokens like identifiers, ')',
+ // etc to make this faster, if it mattered. Just look at StrData[0] to handle
+ // all obviously single-char tokens. This could use
+ // Lexer::isObviouslySimpleCharacter for example to handle identifiers or
+ // something.
+
+
+ const char *BufEnd = SM.getBufferData(Loc.getFileID()).second;
+
+ // Create a langops struct and enable trigraphs. This is sufficient for
+ // measuring tokens.
+ LangOptions LangOpts;
+ LangOpts.Trigraphs = true;
+
+ // Create a lexer starting at the beginning of this token.
+ Lexer TheLexer(Loc, LangOpts, StrData, BufEnd);
+ Token TheTok;
+ TheLexer.LexRawToken(TheTok);
+ return TheTok.getLength();
+}
+
//===----------------------------------------------------------------------===//
// Character information.
//===----------------------------------------------------------------------===//