Fixes various problems with accounting for tabs in the original code.
We now count the original token's column directly when lexing the
tokens, where we already have all knowledge about where lines start.
Before this patch, formatting:
void f() {
\tg();
\th();
}
would incorrectly count the \t's as 1 character if only the line
containing h() was reformatted, and thus indent h() at offset 1.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@189585 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 3982ba6..3c740d9 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -487,11 +487,11 @@
class FormatTokenLexer {
public:
- FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr,
+ FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, FormatStyle &Style,
encoding::Encoding Encoding)
- : FormatTok(NULL), GreaterStashed(false), TrailingWhitespace(0), Lex(Lex),
- SourceMgr(SourceMgr), IdentTable(getFormattingLangOpts()),
- Encoding(Encoding) {
+ : FormatTok(NULL), GreaterStashed(false), Column(0),
+ TrailingWhitespace(0), Lex(Lex), SourceMgr(SourceMgr), Style(Style),
+ IdentTable(getFormattingLangOpts()), Encoding(Encoding) {
Lex.SetKeepWhitespaceMode(true);
}
@@ -509,6 +509,7 @@
FormatToken *getNextToken() {
if (GreaterStashed) {
// Create a synthesized second '>' token.
+ // FIXME: Increment Column and set OriginalColumn.
Token Greater = FormatTok->Tok;
FormatTok = new (Allocator.Allocate()) FormatToken;
FormatTok->Tok = Greater;
@@ -532,13 +533,29 @@
// Consume and record whitespace until we find a significant token.
unsigned WhitespaceLength = TrailingWhitespace;
while (FormatTok->Tok.is(tok::unknown)) {
- unsigned Newlines = FormatTok->TokenText.count('\n');
- if (Newlines > 0)
- FormatTok->LastNewlineOffset =
- WhitespaceLength + FormatTok->TokenText.rfind('\n') + 1;
- FormatTok->NewlinesBefore += Newlines;
- unsigned EscapedNewlines = FormatTok->TokenText.count("\\\n");
- FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines;
+ for (int i = 0, e = FormatTok->TokenText.size(); i != e; ++i) {
+ switch (FormatTok->TokenText[i]) {
+ case '\n':
+ ++FormatTok->NewlinesBefore;
+ // FIXME: This is technically incorrect, as it could also
+ // be a literal backslash at the end of the line.
+ if (i == 0 || FormatTok->TokenText[i-1] != '\\')
+ FormatTok->HasUnescapedNewline = true;
+ FormatTok->LastNewlineOffset = WhitespaceLength + i + 1;
+ Column = 0;
+ break;
+ case ' ':
+ ++Column;
+ break;
+ case '\t':
+ Column += Style.IndentWidth - Column % Style.IndentWidth;
+ break;
+ default:
+ ++Column;
+ break;
+ }
+ }
+
WhitespaceLength += FormatTok->Tok.getLength();
readRawToken(*FormatTok);
@@ -554,11 +571,14 @@
FormatTok->TokenText[1] == '\n') {
// FIXME: ++FormatTok->NewlinesBefore is missing...
WhitespaceLength += 2;
+ Column = 0;
FormatTok->TokenText = FormatTok->TokenText.substr(2);
}
+ FormatTok->OriginalColumn = Column;
TrailingWhitespace = 0;
if (FormatTok->Tok.is(tok::comment)) {
+ // FIXME: Add the trimmed whitespace to Column.
StringRef UntrimmedText = FormatTok->TokenText;
FormatTok->TokenText = FormatTok->TokenText.rtrim();
TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size();
@@ -576,6 +596,8 @@
FormatTok->CodePointCount =
encoding::getCodePointCount(FormatTok->TokenText, Encoding);
+ // FIXME: Add the CodePointCount to Column.
+
FormatTok->WhitespaceRange = SourceRange(
WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength));
return FormatTok;
@@ -583,9 +605,11 @@
FormatToken *FormatTok;
bool GreaterStashed;
+ unsigned Column;
unsigned TrailingWhitespace;
Lexer &Lex;
SourceManager &SourceMgr;
+ FormatStyle &Style;
IdentifierTable IdentTable;
encoding::Encoding Encoding;
llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
@@ -595,7 +619,6 @@
Lex.LexFromRawLexer(Tok.Tok);
Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
Tok.Tok.getLength());
-
// For formatting, treat unterminated string literals like normal string
// literals.
if (Tok.is(tok::unknown) && !Tok.TokenText.empty() &&
@@ -622,7 +645,7 @@
virtual ~Formatter() {}
tooling::Replacements format() {
- FormatTokenLexer Tokens(Lex, SourceMgr, Encoding);
+ FormatTokenLexer Tokens(Lex, SourceMgr, Style, Encoding);
UnwrappedLineParser Parser(Style, Tokens.lex(), *this);
bool StructuralError = Parser.parse();
@@ -692,9 +715,7 @@
formatFirstToken(*TheLine.First, PreviousLineLastToken, Indent,
TheLine.InPPDirective);
} else {
- Indent = LevelIndent =
- SourceMgr.getSpellingColumnNumber(FirstTok->Tok.getLocation()) -
- 1;
+ Indent = LevelIndent = FirstTok->OriginalColumn;
}
ContinuationIndenter Indenter(Style, SourceMgr, TheLine, Indent,
Whitespaces, Encoding,
@@ -727,8 +748,7 @@
Tok = Tok->Next) {
if (Tok == TheLine.First &&
(Tok->NewlinesBefore > 0 || Tok->IsFirst)) {
- unsigned LevelIndent =
- SourceMgr.getSpellingColumnNumber(Tok->Tok.getLocation()) - 1;
+ unsigned LevelIndent = Tok->OriginalColumn;
// Remove trailing whitespace of the previous line if it was
// touched.
if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) {