Implement AST classes for comments, a real parser for Doxygen comments and a
very simple semantic analysis that just builds the AST; minor changes for lexer
to pick up source locations I didn't think about before.
Comments AST is modelled along the ideas of HTML AST: block and inline content.
* Block content is a paragraph or a command that has a paragraph as an argument
or verbatim command.
* Inline content is placed within some block. Inline content includes plain
text, inline commands and HTML as tag soup.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159790 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/unittests/AST/CommentLexer.cpp b/unittests/AST/CommentLexer.cpp
index 5b4712d..0a52364 100644
--- a/unittests/AST/CommentLexer.cpp
+++ b/unittests/AST/CommentLexer.cpp
@@ -359,6 +359,23 @@
ASSERT_EQ(tok::newline, Toks[7].getKind());
}
+TEST_F(CommentLexerTest, DoxygenCommand7) {
+ const char *Source = "// \\c\n";
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(3U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::command, Toks[1].getKind());
+ ASSERT_EQ(StringRef("c"), Toks[1].getCommandName());
+
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+}
+
// Empty verbatim block.
TEST_F(CommentLexerTest, VerbatimBlock1) {
const char *Sources[] = {
@@ -389,31 +406,45 @@
// Empty verbatim block without an end command.
TEST_F(CommentLexerTest, VerbatimBlock2) {
- const char *Sources[] = {
- "/// \\verbatim\n//",
- "/** \\verbatim*/"
- };
+ const char *Source = "/// \\verbatim";
- for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
- std::vector<Token> Toks;
+ std::vector<Token> Toks;
- lexString(Sources[i], Toks);
+ lexString(Source, Toks);
- ASSERT_EQ(4U, Toks.size());
+ ASSERT_EQ(3U, Toks.size());
- ASSERT_EQ(tok::text, Toks[0].getKind());
- ASSERT_EQ(StringRef(" "), Toks[0].getText());
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
- ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
- ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName());
+ ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
+ ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName());
- ASSERT_EQ(tok::newline, Toks[2].getKind());
- ASSERT_EQ(tok::newline, Toks[3].getKind());
- }
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+}
+
+// Empty verbatim block without an end command.
+TEST_F(CommentLexerTest, VerbatimBlock3) {
+ const char *Source = "/** \\verbatim*/";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(4U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
+ ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName());
+
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
}
// Single-line verbatim block.
-TEST_F(CommentLexerTest, VerbatimBlock3) {
+TEST_F(CommentLexerTest, VerbatimBlock4) {
const char *Sources[] = {
"/// Meow \\verbatim aaa \\endverbatim\n//",
"/** Meow \\verbatim aaa \\endverbatim*/"
@@ -444,7 +475,7 @@
}
// Single-line verbatim block without an end command.
-TEST_F(CommentLexerTest, VerbatimBlock4) {
+TEST_F(CommentLexerTest, VerbatimBlock5) {
const char *Sources[] = {
"/// Meow \\verbatim aaa \n//",
"/** Meow \\verbatim aaa */"
@@ -471,8 +502,96 @@
}
}
+TEST_F(CommentLexerTest, VerbatimBlock6) {
+ const char *Source =
+ "// \\verbatim\n"
+ "// Aaa\n"
+ "//\n"
+ "// Bbb\n"
+ "// \\endverbatim\n";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(11U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
+ ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName());
+
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+
+ ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
+ ASSERT_EQ(StringRef(" Aaa"), Toks[3].getVerbatimBlockText());
+
+ ASSERT_EQ(tok::newline, Toks[4].getKind());
+
+ ASSERT_EQ(tok::newline, Toks[5].getKind());
+
+ ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind());
+ ASSERT_EQ(StringRef(" Bbb"), Toks[6].getVerbatimBlockText());
+
+ ASSERT_EQ(tok::newline, Toks[7].getKind());
+
+ ASSERT_EQ(tok::verbatim_block_line, Toks[8].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[8].getVerbatimBlockText());
+
+ ASSERT_EQ(tok::verbatim_block_end, Toks[9].getKind());
+ ASSERT_EQ(StringRef("endverbatim"), Toks[9].getVerbatimBlockName());
+
+ ASSERT_EQ(tok::newline, Toks[10].getKind());
+}
+
+TEST_F(CommentLexerTest, VerbatimBlock7) {
+ const char *Source =
+ "/* \\verbatim\n"
+ " * Aaa\n"
+ " *\n"
+ " * Bbb\n"
+ " * \\endverbatim\n"
+ " */";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(11U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
+ ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName());
+
+ ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
+ ASSERT_EQ(StringRef(" Aaa"), Toks[2].getVerbatimBlockText());
+
+ ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
+ ASSERT_EQ(StringRef(""), Toks[3].getVerbatimBlockText());
+
+ ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind());
+ ASSERT_EQ(StringRef(" Bbb"), Toks[4].getVerbatimBlockText());
+
+ ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[5].getVerbatimBlockText());
+
+ ASSERT_EQ(tok::verbatim_block_end, Toks[6].getKind());
+ ASSERT_EQ(StringRef("endverbatim"), Toks[6].getVerbatimBlockName());
+
+ ASSERT_EQ(tok::newline, Toks[7].getKind());
+
+ ASSERT_EQ(tok::text, Toks[8].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[8].getText());
+
+ ASSERT_EQ(tok::newline, Toks[9].getKind());
+ ASSERT_EQ(tok::newline, Toks[10].getKind());
+}
+
// Complex test for verbatim blocks.
-TEST_F(CommentLexerTest, VerbatimBlock5) {
+TEST_F(CommentLexerTest, VerbatimBlock8) {
const char *Source =
"/* Meow \\verbatim aaa\\$\\@\n"
"bbb \\endverbati\r"
@@ -492,13 +611,13 @@
ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName());
ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind());
- ASSERT_EQ(StringRef(" aaa\\$\\@\n"), Toks[2].getVerbatimBlockText());
+ ASSERT_EQ(StringRef(" aaa\\$\\@"), Toks[2].getVerbatimBlockText());
ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind());
- ASSERT_EQ(StringRef("bbb \\endverbati\r"), Toks[3].getVerbatimBlockText());
+ ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText());
ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind());
- ASSERT_EQ(StringRef("ccc\r\n"), Toks[4].getVerbatimBlockText());
+ ASSERT_EQ(StringRef("ccc"), Toks[4].getVerbatimBlockText());
ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind());
ASSERT_EQ(StringRef("ddd "), Toks[5].getVerbatimBlockText());
@@ -513,7 +632,7 @@
ASSERT_EQ(StringRef("verbatim"), Toks[8].getVerbatimBlockName());
ASSERT_EQ(tok::verbatim_block_line, Toks[9].getKind());
- ASSERT_EQ(StringRef(" eee\n"), Toks[9].getVerbatimBlockText());
+ ASSERT_EQ(StringRef(" eee"), Toks[9].getVerbatimBlockText());
ASSERT_EQ(tok::verbatim_block_end, Toks[10].getKind());
ASSERT_EQ(StringRef("endverbatim"), Toks[10].getVerbatimBlockName());
@@ -526,7 +645,7 @@
}
// LaTeX verbatim blocks.
-TEST_F(CommentLexerTest, VerbatimBlock6) {
+TEST_F(CommentLexerTest, VerbatimBlock9) {
const char *Source =
"/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f}";
std::vector<Token> Toks;
@@ -968,8 +1087,25 @@
}
TEST_F(CommentLexerTest, HTML13) {
+ const char *Source = "// </tag";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(3U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::html_tag_close, Toks[1].getKind());
+ ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagCloseName());
+
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+}
+
+TEST_F(CommentLexerTest, HTML14) {
const char *Sources[] = {
- "// </tag",
"// </tag>",
"// </ tag>",
"// </ tag >"
@@ -980,7 +1116,7 @@
lexString(Sources[i], Toks);
- ASSERT_EQ(3U, Toks.size());
+ ASSERT_EQ(4U, Toks.size());
ASSERT_EQ(tok::text, Toks[0].getKind());
ASSERT_EQ(StringRef(" "), Toks[0].getText());
@@ -988,7 +1124,9 @@
ASSERT_EQ(tok::html_tag_close, Toks[1].getKind());
ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagCloseName());
- ASSERT_EQ(tok::newline, Toks[2].getKind());
+ ASSERT_EQ(tok::html_greater, Toks[2].getKind());
+
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
}
}