Initial version of formatting library.
This formatting library will be used by a stand-alone clang-format tool
and can also be used when writing other refactorings.
Manuel's original design document:
https://docs.google.com/a/google.com/document/d/1gpckL2U_6QuU9YW2L1ABsc4Fcogn5UngKk7fE5dDOoA/edit
The library can already successfully format itself.
Review: http://llvm-reviews.chandlerc.com/D80
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@169137 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
new file mode 100644
index 0000000..8dc1278
--- /dev/null
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -0,0 +1,341 @@
+//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the implementation of the UnwrappedLineParser,
+/// which turns a stream of tokens into UnwrappedLines.
+///
+/// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
+/// where it can be used to format real code.
+///
+//===----------------------------------------------------------------------===//
+
+#include "UnwrappedLineParser.h"
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang {
+namespace format {
+
+UnwrappedLineParser::UnwrappedLineParser(Lexer &Lex, SourceManager &SourceMgr,
+ UnwrappedLineConsumer &Callback)
+ : GreaterStashed(false),
+ Lex(Lex),
+ SourceMgr(SourceMgr),
+ IdentTable(Lex.getLangOpts()),
+ Callback(Callback) {
+ Lex.SetKeepWhitespaceMode(true);
+}
+
+void UnwrappedLineParser::parse() {
+ parseToken();
+ parseLevel();
+}
+
+void UnwrappedLineParser::parseLevel() {
+ do {
+ switch (FormatTok.Tok.getKind()) {
+ case tok::hash:
+ parsePPDirective();
+ break;
+ case tok::comment:
+ parseComment();
+ break;
+ case tok::l_brace:
+ parseBlock();
+ addUnwrappedLine();
+ break;
+ case tok::r_brace:
+ return;
+ default:
+ parseStatement();
+ break;
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::parseBlock() {
+ nextToken();
+
+ // FIXME: Remove this hack to handle namespaces.
+ bool IsNamespace = Line.Tokens[0].Tok.is(tok::kw_namespace);
+
+ addUnwrappedLine();
+
+ if (!IsNamespace)
+ ++Line.Level;
+ parseLevel();
+ if (!IsNamespace)
+ --Line.Level;
+ assert(FormatTok.Tok.is(tok::r_brace) && "expected '}'");
+ nextToken();
+ if (FormatTok.Tok.is(tok::semi))
+ nextToken();
+}
+
+void UnwrappedLineParser::parsePPDirective() {
+ while (!eof()) {
+ nextToken();
+ if (FormatTok.NewlinesBefore > 0) {
+ addUnwrappedLine();
+ return;
+ }
+ }
+}
+
+void UnwrappedLineParser::parseComment() {
+ while (!eof()) {
+ nextToken();
+ if (FormatTok.NewlinesBefore > 0) {
+ addUnwrappedLine();
+ return;
+ }
+ }
+}
+
+void UnwrappedLineParser::parseStatement() {
+ if (FormatTok.Tok.is(tok::kw_public) || FormatTok.Tok.is(tok::kw_protected) ||
+ FormatTok.Tok.is(tok::kw_private)) {
+ parseAccessSpecifier();
+ return;
+ }
+ if (FormatTok.Tok.is(tok::kw_enum)) {
+ parseEnum();
+ return;
+ }
+ int TokenNumber = 0;
+ do {
+ ++TokenNumber;
+ switch (FormatTok.Tok.getKind()) {
+ case tok::semi:
+ nextToken();
+ addUnwrappedLine();
+ return;
+ case tok::l_paren:
+ parseParens();
+ break;
+ case tok::l_brace:
+ parseBlock();
+ addUnwrappedLine();
+ return;
+ case tok::kw_if:
+ parseIfThenElse();
+ return;
+ case tok::kw_do:
+ parseDoWhile();
+ return;
+ case tok::kw_switch:
+ parseSwitch();
+ return;
+ case tok::kw_default:
+ nextToken();
+ parseLabel();
+ return;
+ case tok::kw_case:
+ parseCaseLabel();
+ return;
+ case tok::raw_identifier:
+ nextToken();
+ break;
+ default:
+ nextToken();
+ if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) {
+ parseLabel();
+ return;
+ }
+ break;
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::parseParens() {
+ assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
+ nextToken();
+ do {
+ switch (FormatTok.Tok.getKind()) {
+ case tok::l_paren:
+ parseParens();
+ break;
+ case tok::r_paren:
+ nextToken();
+ return;
+ default:
+ nextToken();
+ break;
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::parseIfThenElse() {
+ assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
+ nextToken();
+ parseParens();
+ bool NeedsUnwrappedLine = false;
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock();
+ NeedsUnwrappedLine = true;
+ } else {
+ addUnwrappedLine();
+ ++Line.Level;
+ parseStatement();
+ --Line.Level;
+ }
+ if (FormatTok.Tok.is(tok::kw_else)) {
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock();
+ addUnwrappedLine();
+ } else if (FormatTok.Tok.is(tok::kw_if)) {
+ parseIfThenElse();
+ } else {
+ addUnwrappedLine();
+ ++Line.Level;
+ parseStatement();
+ --Line.Level;
+ }
+ } else if (NeedsUnwrappedLine) {
+ addUnwrappedLine();
+ }
+}
+
+void UnwrappedLineParser::parseDoWhile() {
+ assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock();
+ } else {
+ addUnwrappedLine();
+ ++Line.Level;
+ parseStatement();
+ --Line.Level;
+ }
+
+ assert(FormatTok.Tok.is(tok::kw_while) && "'while' expected");
+ nextToken();
+ parseStatement();
+}
+
+void UnwrappedLineParser::parseLabel() {
+ // FIXME: remove all asserts.
+ assert(FormatTok.Tok.is(tok::colon) && "':' expected");
+ nextToken();
+ unsigned OldLineLevel = Line.Level;
+ if (Line.Level > 0)
+ --Line.Level;
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock();
+ }
+ addUnwrappedLine();
+ Line.Level = OldLineLevel;
+}
+
+void UnwrappedLineParser::parseCaseLabel() {
+ assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
+ // FIXME: fix handling of complex expressions here.
+ do {
+ nextToken();
+ } while (!eof() && !FormatTok.Tok.is(tok::colon));
+ parseLabel();
+}
+
+void UnwrappedLineParser::parseSwitch() {
+ assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
+ nextToken();
+ parseParens();
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock();
+ addUnwrappedLine();
+ } else {
+ addUnwrappedLine();
+ ++Line.Level;
+ parseStatement();
+ --Line.Level;
+ }
+}
+
+void UnwrappedLineParser::parseAccessSpecifier() {
+ nextToken();
+ nextToken();
+ addUnwrappedLine();
+}
+
+void UnwrappedLineParser::parseEnum() {
+ do {
+ nextToken();
+ if (FormatTok.Tok.is(tok::semi)) {
+ nextToken();
+ addUnwrappedLine();
+ return;
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::addUnwrappedLine() {
+ // Consume trailing comments.
+ while (!eof() && FormatTok.NewlinesBefore == 0 &&
+ FormatTok.Tok.is(tok::comment)) {
+ nextToken();
+ }
+ Callback.formatUnwrappedLine(Line);
+ Line.Tokens.clear();
+}
+
+bool UnwrappedLineParser::eof() const {
+ return FormatTok.Tok.is(tok::eof);
+}
+
+void UnwrappedLineParser::nextToken() {
+ if (eof())
+ return;
+ Line.Tokens.push_back(FormatTok);
+ parseToken();
+}
+
+void UnwrappedLineParser::parseToken() {
+ if (GreaterStashed) {
+ FormatTok.NewlinesBefore = 0;
+ FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation().getLocWithOffset(1);
+ FormatTok.WhiteSpaceLength = 0;
+ GreaterStashed = false;
+ return;
+ }
+
+ FormatTok = FormatToken();
+ Lex.LexFromRawLexer(FormatTok.Tok);
+ FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
+
+ // Consume and record whitespace until we find a significant token.
+ while (FormatTok.Tok.is(tok::unknown)) {
+ FormatTok.NewlinesBefore += tokenText().count('\n');
+ FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
+
+ if (eof())
+ return;
+ Lex.LexFromRawLexer(FormatTok.Tok);
+ }
+
+ if (FormatTok.Tok.is(tok::raw_identifier)) {
+ const IdentifierInfo &Info = IdentTable.get(tokenText());
+ FormatTok.Tok.setKind(Info.getTokenID());
+ }
+
+ if (FormatTok.Tok.is(tok::greatergreater)) {
+ FormatTok.Tok.setKind(tok::greater);
+ GreaterStashed = true;
+ }
+}
+
+StringRef UnwrappedLineParser::tokenText() {
+ StringRef Data(SourceMgr.getCharacterData(FormatTok.Tok.getLocation()),
+ FormatTok.Tok.getLength());
+ return Data;
+}
+
+} // end namespace format
+} // end namespace clang