Move token length calculation out of the diagnostics machinery into the lexer, where it can be shared. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@43090 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 761d76bc9e98198cfe12f4995e5acf810a4d6853 [log] [tgz]
author: Chris Lattner <sabre@nondot.org> Wed Oct 17 21:18:47 2007 +0000
committer: Chris Lattner <sabre@nondot.org> Wed Oct 17 21:18:47 2007 +0000
tree: 21e478f40b3970886d586a1fb94bbf89c022e7e2
parent: 2ba8535a47f816f2c9d2911c8149a818bdaa4a63 [diff] [blame]
diff --git a/Lex/Lexer.cpp b/Lex/Lexer.cpp
index c7f54ca..19dcfe2 100644
--- a/Lex/Lexer.cpp
+++ b/Lex/Lexer.cpp

@@ -163,6 +163,39 @@
 }
 
 
+/// MeasureTokenLength - Relex the token at the specified location and return
+/// its length in bytes in the input file.  If the token needs cleaning (e.g.
+/// includes a trigraph or an escaped newline) then this count includes bytes
+/// that are part of that.
+unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
+                                   const SourceManager &SM) {
+  // If this comes from a macro expansion, we really do want the macro name, not
+  // the token this macro expanded to.
+  Loc = SM.getLogicalLoc(Loc);
+  
+  const char *StrData = SM.getCharacterData(Loc);
+  
+  // TODO: this could be special cased for common tokens like identifiers, ')',
+  // etc to make this faster, if it mattered.  Just look at StrData[0] to handle
+  // all obviously single-char tokens.  This could use 
+  // Lexer::isObviouslySimpleCharacter for example to handle identifiers or
+  // something.
+  
+  
+  const char *BufEnd = SM.getBufferData(Loc.getFileID()).second;
+  
+  // Create a langops struct and enable trigraphs.  This is sufficient for
+  // measuring tokens.
+  LangOptions LangOpts;
+  LangOpts.Trigraphs = true;
+  
+  // Create a lexer starting at the beginning of this token.
+  Lexer TheLexer(Loc, LangOpts, StrData, BufEnd);
+  Token TheTok;
+  TheLexer.LexRawToken(TheTok);
+  return TheTok.getLength();
+}
+
 //===----------------------------------------------------------------------===//
 // Character information.
 //===----------------------------------------------------------------------===//
commit	761d76bc9e98198cfe12f4995e5acf810a4d6853	[log] [tgz]
author	Chris Lattner <sabre@nondot.org>	Wed Oct 17 21:18:47 2007 +0000
committer	Chris Lattner <sabre@nondot.org>	Wed Oct 17 21:18:47 2007 +0000
tree	21e478f40b3970886d586a1fb94bbf89c022e7e2
parent	2ba8535a47f816f2c9d2911c8149a818bdaa4a63 [diff] [blame]