Comment parsing: parse "<blah" as an HTML tag only if "blah" is a known tag
name. This should reduce the amount of warning false positives about bad HTML
in comments when the comment author intended to put a reference to a template.
This change will also enable us parse the comment as intended in these cases.
Fixes part 1 of PR13374.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@162407 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/unittests/AST/CommentLexer.cpp b/unittests/AST/CommentLexer.cpp
index cab0fdd..1168d1d 100644
--- a/unittests/AST/CommentLexer.cpp
+++ b/unittests/AST/CommentLexer.cpp
@@ -822,7 +822,7 @@
TEST_F(CommentLexerTest, HTML3) {
const char *Source =
- "// < tag";
+ "// < img";
std::vector<Token> Toks;
@@ -837,15 +837,15 @@
ASSERT_EQ(StringRef("<"), Toks[1].getText());
ASSERT_EQ(tok::text, Toks[2].getKind());
- ASSERT_EQ(StringRef(" tag"), Toks[2].getText());
+ ASSERT_EQ(StringRef(" img"), Toks[2].getText());
ASSERT_EQ(tok::newline, Toks[3].getKind());
}
TEST_F(CommentLexerTest, HTML4) {
const char *Sources[] = {
- "// <tag",
- "// <tag "
+ "// <img",
+ "// <img "
};
for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
@@ -859,7 +859,7 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::newline, Toks[2].getKind());
}
@@ -867,7 +867,7 @@
TEST_F(CommentLexerTest, HTML5) {
const char *Source =
- "// <tag 42";
+ "// <img 42";
std::vector<Token> Toks;
@@ -879,7 +879,7 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::text, Toks[2].getKind());
ASSERT_EQ(StringRef("42"), Toks[2].getText());
@@ -888,7 +888,7 @@
}
TEST_F(CommentLexerTest, HTML6) {
- const char *Source = "// <tag> Meow";
+ const char *Source = "// <img> Meow";
std::vector<Token> Toks;
@@ -900,7 +900,7 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::html_greater, Toks[2].getKind());
@@ -911,7 +911,7 @@
}
TEST_F(CommentLexerTest, HTML7) {
- const char *Source = "// <tag=";
+ const char *Source = "// <img=";
std::vector<Token> Toks;
@@ -923,7 +923,7 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::text, Toks[2].getKind());
ASSERT_EQ(StringRef("="), Toks[2].getText());
@@ -932,7 +932,7 @@
}
TEST_F(CommentLexerTest, HTML8) {
- const char *Source = "// <tag attr=> Meow";
+ const char *Source = "// <img src=> Meow";
std::vector<Token> Toks;
@@ -944,10 +944,10 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::html_ident, Toks[2].getKind());
- ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
+ ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
ASSERT_EQ(tok::html_equals, Toks[3].getKind());
@@ -961,8 +961,8 @@
TEST_F(CommentLexerTest, HTML9) {
const char *Sources[] = {
- "// <tag attr",
- "// <tag attr "
+ "// <img src",
+ "// <img src "
};
for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
@@ -976,10 +976,10 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::html_ident, Toks[2].getKind());
- ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
+ ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
ASSERT_EQ(tok::newline, Toks[3].getKind());
}
@@ -987,8 +987,8 @@
TEST_F(CommentLexerTest, HTML10) {
const char *Sources[] = {
- "// <tag attr=",
- "// <tag attr ="
+ "// <img src=",
+ "// <img src ="
};
for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
@@ -1002,10 +1002,10 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::html_ident, Toks[2].getKind());
- ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
+ ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
ASSERT_EQ(tok::html_equals, Toks[3].getKind());
@@ -1015,10 +1015,10 @@
TEST_F(CommentLexerTest, HTML11) {
const char *Sources[] = {
- "// <tag attr=\"",
- "// <tag attr = \"",
- "// <tag attr=\'",
- "// <tag attr = \'"
+ "// <img src=\"",
+ "// <img src = \"",
+ "// <img src=\'",
+ "// <img src = \'"
};
for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
@@ -1032,10 +1032,10 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::html_ident, Toks[2].getKind());
- ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
+ ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
ASSERT_EQ(tok::html_equals, Toks[3].getKind());
@@ -1047,7 +1047,7 @@
}
TEST_F(CommentLexerTest, HTML12) {
- const char *Source = "// <tag attr=@";
+ const char *Source = "// <img src=@";
std::vector<Token> Toks;
@@ -1059,10 +1059,10 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::html_ident, Toks[2].getKind());
- ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
+ ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
ASSERT_EQ(tok::html_equals, Toks[3].getKind());
@@ -1074,10 +1074,10 @@
TEST_F(CommentLexerTest, HTML13) {
const char *Sources[] = {
- "// <tag attr=\"val\\\"\\'val",
- "// <tag attr=\"val\\\"\\'val\"",
- "// <tag attr=\'val\\\"\\'val",
- "// <tag attr=\'val\\\"\\'val\'"
+ "// <img src=\"val\\\"\\'val",
+ "// <img src=\"val\\\"\\'val\"",
+ "// <img src=\'val\\\"\\'val",
+ "// <img src=\'val\\\"\\'val\'"
};
for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
@@ -1091,10 +1091,10 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::html_ident, Toks[2].getKind());
- ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
+ ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
ASSERT_EQ(tok::html_equals, Toks[3].getKind());
@@ -1107,8 +1107,8 @@
TEST_F(CommentLexerTest, HTML14) {
const char *Sources[] = {
- "// <tag attr=\"val\\\"\\'val\">",
- "// <tag attr=\'val\\\"\\'val\'>"
+ "// <img src=\"val\\\"\\'val\">",
+ "// <img src=\'val\\\"\\'val\'>"
};
for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
@@ -1122,10 +1122,10 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::html_ident, Toks[2].getKind());
- ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent());
+ ASSERT_EQ(StringRef("src"), Toks[2].getHTMLIdent());
ASSERT_EQ(tok::html_equals, Toks[3].getKind());
@@ -1140,8 +1140,8 @@
TEST_F(CommentLexerTest, HTML15) {
const char *Sources[] = {
- "// <tag/>",
- "// <tag />"
+ "// <img/>",
+ "// <img />"
};
for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
@@ -1155,7 +1155,7 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind());
@@ -1165,8 +1165,8 @@
TEST_F(CommentLexerTest, HTML16) {
const char *Sources[] = {
- "// <tag/ Aaa",
- "// <tag / Aaa"
+ "// <img/ Aaa",
+ "// <img / Aaa"
};
for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
@@ -1180,7 +1180,7 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagStartName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagStartName());
ASSERT_EQ(tok::text, Toks[2].getKind());
ASSERT_EQ(StringRef("/"), Toks[2].getText());
@@ -1201,13 +1201,13 @@
ASSERT_EQ(3U, Toks.size());
- ASSERT_EQ(tok::text, Toks[0].getKind());
- ASSERT_EQ(StringRef(" "), Toks[0].getText());
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
- ASSERT_EQ(tok::html_end_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef(""), Toks[1].getHTMLTagEndName());
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("</"), Toks[1].getText());
- ASSERT_EQ(tok::newline, Toks[2].getKind());
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
}
TEST_F(CommentLexerTest, HTML18) {
@@ -1219,20 +1219,20 @@
ASSERT_EQ(4U, Toks.size());
- ASSERT_EQ(tok::text, Toks[0].getKind());
- ASSERT_EQ(StringRef(" "), Toks[0].getText());
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
- ASSERT_EQ(tok::html_end_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef(""), Toks[1].getHTMLTagEndName());
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("</"), Toks[1].getText());
- ASSERT_EQ(tok::text, Toks[2].getKind());
- ASSERT_EQ(StringRef("@"), Toks[2].getText());
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef("@"), Toks[2].getText());
- ASSERT_EQ(tok::newline, Toks[3].getKind());
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
}
TEST_F(CommentLexerTest, HTML19) {
- const char *Source = "// </tag";
+ const char *Source = "// </img";
std::vector<Token> Toks;
@@ -1244,35 +1244,51 @@
ASSERT_EQ(StringRef(" "), Toks[0].getText());
ASSERT_EQ(tok::html_end_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagEndName());
+ ASSERT_EQ(StringRef("img"), Toks[1].getHTMLTagEndName());
ASSERT_EQ(tok::newline, Toks[2].getKind());
}
-TEST_F(CommentLexerTest, HTML20) {
- const char *Sources[] = {
- "// </tag>",
- "// </ tag>",
- "// </ tag >"
- };
+TEST_F(CommentLexerTest, NotAKnownHTMLTag1) {
+ const char *Source = "// <tag>";
- for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
- std::vector<Token> Toks;
+ std::vector<Token> Toks;
- lexString(Sources[i], Toks);
+ lexString(Source, Toks);
- ASSERT_EQ(4U, Toks.size());
+ ASSERT_EQ(4U, Toks.size());
- ASSERT_EQ(tok::text, Toks[0].getKind());
- ASSERT_EQ(StringRef(" "), Toks[0].getText());
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
- ASSERT_EQ(tok::html_end_tag, Toks[1].getKind());
- ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagEndName());
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("<tag"), Toks[1].getText());
- ASSERT_EQ(tok::html_greater, Toks[2].getKind());
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef(">"), Toks[2].getText());
- ASSERT_EQ(tok::newline, Toks[3].getKind());
- }
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
+}
+
+TEST_F(CommentLexerTest, NotAKnownHTMLTag2) {
+ const char *Source = "// </tag>";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(4U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("</tag"), Toks[1].getText());
+
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef(">"), Toks[2].getText());
+
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
}
TEST_F(CommentLexerTest, HTMLCharacterReferences1) {