Enable comment parsing and semantic analysis to emit diagnostics.  A few
diagnostics implemented -- see testcases.

I created a new TableGen file for comment diagnostics,
DiagnosticCommentKinds.td, because comment diagnostics don't logically
fit into AST diagnostics file.  But I don't feel strongly about it.

This also implements support for self-closing HTML tags in comment
lexer and parser (for example, <br />).

In order to issue precise diagnostics CommentSema needs to know the
declaration the comment is attached to.  There is no easy way to find a decl by 
comment, so we match comments and decls in lockstep: after parsing one
declgroup we check if we have any new, not yet attached comments.  If we do --
then we do the usual comment-finding process.

It is interesting that this automatically handles trailing comments.
We pick up not only comments that precede the declaration, but also
comments that *follow* the declaration -- thanks to the lookahead in
the lexer: after parsing the declgroup we've consumed the semicolon
and looked ahead through comments.

Added -Wdocumentation-html flag for semantic HTML errors to allow the user to 
disable only HTML warnings (but not HTML parse errors, which we emit as
warnings in -Wdocumentation).



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@160078 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp
index a5b624f..27e4de9 100644
--- a/lib/AST/ASTContext.cpp
+++ b/lib/AST/ASTContext.cpp
@@ -56,7 +56,7 @@
   HalfRank, FloatRank, DoubleRank, LongDoubleRank
 };
 
-const RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const {
+RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const {
   if (!CommentsLoaded && ExternalSource) {
     ExternalSource->ReadComments();
     CommentsLoaded = true;
@@ -160,11 +160,13 @@
     return C.first;
   }
 
-  const RawComment *RC = getRawCommentForDeclNoCache(D);
+  RawComment *RC = getRawCommentForDeclNoCache(D);
   // If we found a comment, it should be a documentation comment.
   assert(!RC || RC->isDocumentation());
   DeclComments[D] =
       RawAndParsedComment(RC, static_cast<comments::FullComment *>(NULL));
+  if (RC)
+    RC->setAttached();
   return RC;
 }
 
@@ -187,8 +189,10 @@
   comments::Lexer L(RC->getSourceRange().getBegin(), comments::CommentOptions(),
                     RawText.begin(), RawText.end());
 
-  comments::Sema S(this->BumpAlloc);
-  comments::Parser P(L, S, this->BumpAlloc);
+  comments::Sema S(getAllocator(), getSourceManager(), getDiagnostics());
+  S.setDecl(D);
+  comments::Parser P(L, S, getAllocator(), getSourceManager(),
+                     getDiagnostics());
 
   comments::FullComment *FC = P.parseFullComment();
   DeclComments[D].second = FC;
diff --git a/lib/AST/CMakeLists.txt b/lib/AST/CMakeLists.txt
index c45f721..5f6a099 100644
--- a/lib/AST/CMakeLists.txt
+++ b/lib/AST/CMakeLists.txt
@@ -64,6 +64,7 @@
   ClangAttrList
   ClangAttrImpl
   ClangDiagnosticAST
+  ClangDiagnosticComment
   ClangCommentNodes
   ClangDeclNodes
   ClangStmtNodes
diff --git a/lib/AST/Comment.cpp b/lib/AST/Comment.cpp
index 4681d5a..1520d13 100644
--- a/lib/AST/Comment.cpp
+++ b/lib/AST/Comment.cpp
@@ -86,6 +86,38 @@
   llvm_unreachable("Unknown comment kind!");
 }
 
+bool TextComment::isWhitespace() const {
+  for (StringRef::const_iterator I = Text.begin(), E = Text.end();
+       I != E; ++I) {
+    const char C = *I;
+    if (C != ' ' && C != '\n' && C != '\r' &&
+        C != '\t' && C != '\f' && C != '\v')
+      return false;
+  }
+  return true;
+}
+
+bool ParagraphComment::isWhitespace() const {
+  for (child_iterator I = child_begin(), E = child_end(); I != E; ++I) {
+    if (const TextComment *TC = dyn_cast<TextComment>(*I)) {
+      if (!TC->isWhitespace())
+        return false;
+    }
+  }
+  return true;
+}
+
+const char *ParamCommandComment::getDirectionAsString(PassDirection D) {
+  switch (D) {
+  case ParamCommandComment::In:
+    return "[in]";
+  case ParamCommandComment::Out:
+    return "[out]";
+  case ParamCommandComment::InOut:
+    return "[in,out]";
+  }
+  llvm_unreachable("unknown PassDirection");
+}
 
 } // end namespace comments
 } // end namespace clang
diff --git a/lib/AST/CommentDumper.cpp b/lib/AST/CommentDumper.cpp
index fd7a394..267657b 100644
--- a/lib/AST/CommentDumper.cpp
+++ b/lib/AST/CommentDumper.cpp
@@ -121,6 +121,8 @@
       OS << " \"" << Attr.Name << "=\"" << Attr.Value << "\"";
     }
   }
+  if (C->isSelfClosing())
+    OS << " SelfClosing";
 }
 
 void CommentDumper::visitHTMLCloseTagComment(const HTMLCloseTagComment *C) {
@@ -142,17 +144,7 @@
 void CommentDumper::visitParamCommandComment(const ParamCommandComment *C) {
   dumpComment(C);
 
-  switch (C->getDirection()) {
-  case ParamCommandComment::In:
-    OS << " [in]";
-    break;
-  case ParamCommandComment::Out:
-    OS << " [out]";
-    break;
-  case ParamCommandComment::InOut:
-    OS << " [in,out]";
-    break;
-  }
+  OS << " " << ParamCommandComment::getDirectionAsString(C->getDirection());
 
   if (C->isDirectionExplicit())
     OS << " explicitly";
diff --git a/lib/AST/CommentLexer.cpp b/lib/AST/CommentLexer.cpp
index 55cd409..1f4955d 100644
--- a/lib/AST/CommentLexer.cpp
+++ b/lib/AST/CommentLexer.cpp
@@ -509,7 +509,7 @@
 
   const char C = *BufferPtr;
   if (BufferPtr != CommentEnd &&
-      (C == '>' || isHTMLIdentifierStartingCharacter(C)))
+      (C == '>' || C == '/' || isHTMLIdentifierStartingCharacter(C)))
     State = LS_HTMLOpenTag;
 }
 
@@ -546,6 +546,18 @@
       formTokenWithChars(T, TokenPtr, tok::html_greater);
       State = LS_Normal;
       return;
+    case '/':
+      TokenPtr++;
+      if (TokenPtr != CommentEnd && *TokenPtr == '>') {
+        TokenPtr++;
+        formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
+      } else {
+        StringRef Text(BufferPtr, TokenPtr - BufferPtr);
+        formTokenWithChars(T, TokenPtr, tok::text);
+        T.setText(Text);
+      }
+      State = LS_Normal;
+      return;
     }
   }
 
diff --git a/lib/AST/CommentParser.cpp b/lib/AST/CommentParser.cpp
index 2df3759..eabe61c 100644
--- a/lib/AST/CommentParser.cpp
+++ b/lib/AST/CommentParser.cpp
@@ -9,13 +9,16 @@
 
 #include "clang/AST/CommentParser.h"
 #include "clang/AST/CommentSema.h"
+#include "clang/AST/CommentDiagnostic.h"
+#include "clang/Basic/SourceManager.h"
 #include "llvm/Support/ErrorHandling.h"
 
 namespace clang {
 namespace comments {
 
-Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator):
-    L(L), S(S), Allocator(Allocator) {
+Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
+               const SourceManager &SourceMgr, DiagnosticsEngine &Diags):
+    L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) {
   consumeToken();
 }
 
@@ -26,18 +29,16 @@
   // Check if argument looks like direction specification: [dir]
   // e.g., [in], [out], [in,out]
   if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
-    PC = S.actOnParamCommandArg(PC,
-                                Arg.getLocation(),
-                                Arg.getEndLocation(),
-                                Arg.getText(),
-                                /* IsDirection = */ true);
+    PC = S.actOnParamCommandDirectionArg(PC,
+                                         Arg.getLocation(),
+                                         Arg.getEndLocation(),
+                                         Arg.getText());
 
   if (Retokenizer.lexWord(Arg))
-    PC = S.actOnParamCommandArg(PC,
-                                Arg.getLocation(),
-                                Arg.getEndLocation(),
-                                Arg.getText(),
-                                /* IsDirection = */ false);
+    PC = S.actOnParamCommandParamNameArg(PC,
+                                         Arg.getLocation(),
+                                         Arg.getEndLocation(),
+                                         Arg.getText());
 
   return PC;
 }
@@ -84,7 +85,6 @@
   if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
     // Block command ahead.  We can't nest block commands, so pretend that this
     // command has an empty argument.
-    // TODO: Diag() Warn empty arg to block command
     ParagraphComment *PC = S.actOnParagraphComment(
                                 ArrayRef<InlineContentComment *>());
     return S.actOnBlockCommandFinish(BC, PC);
@@ -164,7 +164,8 @@
 
   SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs;
   while (true) {
-    if (Tok.is(tok::html_ident)) {
+    switch (Tok.getKind()) {
+    case tok::html_ident: {
       Token Ident = Tok;
       consumeToken();
       if (Tok.isNot(tok::html_equals)) {
@@ -175,9 +176,14 @@
       Token Equals = Tok;
       consumeToken();
       if (Tok.isNot(tok::html_quoted_string)) {
-        // TODO: Diag() expected quoted string
+        Diag(Tok.getLocation(),
+             diag::warn_doc_html_open_tag_expected_quoted_string)
+          << SourceRange(Equals.getLocation());
         Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
                                                       Ident.getHTMLIdent()));
+        while (Tok.is(tok::html_equals) ||
+               Tok.is(tok::html_quoted_string))
+          consumeToken();
         continue;
       }
       Attrs.push_back(HTMLOpenTagComment::Attribute(
@@ -189,24 +195,66 @@
                               Tok.getHTMLQuotedString()));
       consumeToken();
       continue;
-    } else if (Tok.is(tok::html_greater)) {
+    }
+
+    case tok::html_greater:
       HOT = S.actOnHTMLOpenTagFinish(HOT,
                                      copyArray(llvm::makeArrayRef(Attrs)),
-                                     Tok.getLocation());
+                                     Tok.getLocation(),
+                                     /* IsSelfClosing = */ false);
       consumeToken();
       return HOT;
-    } else if (Tok.is(tok::html_equals) ||
-               Tok.is(tok::html_quoted_string)) {
-      // TODO: Diag() Err expected ident
+
+    case tok::html_slash_greater:
+      HOT = S.actOnHTMLOpenTagFinish(HOT,
+                                     copyArray(llvm::makeArrayRef(Attrs)),
+                                     Tok.getLocation(),
+                                     /* IsSelfClosing = */ true);
+      consumeToken();
+      return HOT;
+
+    case tok::html_equals:
+    case tok::html_quoted_string:
+      Diag(Tok.getLocation(),
+           diag::warn_doc_html_open_tag_expected_ident_or_greater);
       while (Tok.is(tok::html_equals) ||
              Tok.is(tok::html_quoted_string))
         consumeToken();
-    } else {
-      // Not a token from HTML open tag.  Thus HTML tag prematurely ended.
-      // TODO: Diag() Err HTML tag prematurely ended
+      if (Tok.is(tok::html_ident) ||
+          Tok.is(tok::html_greater) ||
+          Tok.is(tok::html_slash_greater))
+        continue;
+
       return S.actOnHTMLOpenTagFinish(HOT,
                                       copyArray(llvm::makeArrayRef(Attrs)),
-                                      SourceLocation());
+                                      SourceLocation(),
+                                      /* IsSelfClosing = */ false);
+
+    default:
+      // Not a token from an HTML open tag.  Thus HTML tag prematurely ended.
+      HOT = S.actOnHTMLOpenTagFinish(HOT,
+                                     copyArray(llvm::makeArrayRef(Attrs)),
+                                     SourceLocation(),
+                                     /* IsSelfClosing = */ false);
+      bool StartLineInvalid;
+      const unsigned StartLine = SourceMgr.getPresumedLineNumber(
+                                                  HOT->getLocation(),
+                                                  &StartLineInvalid);
+      bool EndLineInvalid;
+      const unsigned EndLine = SourceMgr.getPresumedLineNumber(
+                                                  Tok.getLocation(),
+                                                  &EndLineInvalid);
+      if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
+        Diag(Tok.getLocation(),
+             diag::warn_doc_html_open_tag_expected_ident_or_greater)
+          << HOT->getSourceRange();
+      else {
+        Diag(Tok.getLocation(),
+             diag::warn_doc_html_open_tag_expected_ident_or_greater);
+        Diag(HOT->getLocation(), diag::note_doc_html_tag_started_here)
+          << HOT->getSourceRange();
+      }
+      return HOT;
     }
   }
 }
@@ -289,6 +337,7 @@
     case tok::html_equals:
     case tok::html_quoted_string:
     case tok::html_greater:
+    case tok::html_slash_greater:
       llvm_unreachable("should not see this token");
     }
     break;
@@ -388,6 +437,7 @@
   case tok::html_equals:
   case tok::html_quoted_string:
   case tok::html_greater:
+  case tok::html_slash_greater:
     llvm_unreachable("should not see this token");
   }
   llvm_unreachable("bogus token kind");
diff --git a/lib/AST/CommentSema.cpp b/lib/AST/CommentSema.cpp
index 1193e04..fa8001b 100644
--- a/lib/AST/CommentSema.cpp
+++ b/lib/AST/CommentSema.cpp
@@ -8,13 +8,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/CommentSema.h"
+#include "clang/AST/CommentDiagnostic.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/Basic/SourceManager.h"
 #include "llvm/ADT/StringSwitch.h"
 
 namespace clang {
 namespace comments {
 
-Sema::Sema(llvm::BumpPtrAllocator &Allocator) :
-    Allocator(Allocator) {
+Sema::Sema(llvm::BumpPtrAllocator &Allocator, const SourceManager &SourceMgr,
+           DiagnosticsEngine &Diags) :
+    Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags), ThisDecl(NULL) {
+}
+
+void Sema::setDecl(const Decl *D) {
+  ThisDecl = D;
 }
 
 ParagraphComment *Sema::actOnParagraphComment(
@@ -39,83 +48,153 @@
                               BlockCommandComment *Command,
                               ParagraphComment *Paragraph) {
   Command->setParagraph(Paragraph);
+  checkBlockCommandEmptyParagraph(Command);
   return Command;
 }
 
 ParamCommandComment *Sema::actOnParamCommandStart(SourceLocation LocBegin,
                                                   SourceLocation LocEnd,
                                                   StringRef Name) {
-  return new (Allocator) ParamCommandComment(LocBegin, LocEnd, Name);
+  ParamCommandComment *Command =
+      new (Allocator) ParamCommandComment(LocBegin, LocEnd, Name);
+
+  if (!ThisDecl ||
+      !(isa<FunctionDecl>(ThisDecl) || isa<ObjCMethodDecl>(ThisDecl)))
+    Diag(Command->getLocation(),
+         diag::warn_doc_param_not_attached_to_a_function_decl)
+      << Command->getCommandNameRange();
+
+  return Command;
 }
 
-ParamCommandComment *Sema::actOnParamCommandArg(ParamCommandComment *Command,
+ParamCommandComment *Sema::actOnParamCommandDirectionArg(
+                                                ParamCommandComment *Command,
                                                 SourceLocation ArgLocBegin,
                                                 SourceLocation ArgLocEnd,
-                                                StringRef Arg,
-                                                bool IsDirection) {
-  if (IsDirection) {
-    ParamCommandComment::PassDirection Direction;
-    std::string ArgLower = Arg.lower();
-    // TODO: optimize: lower Name first (need an API in SmallString for that),
-    // after that StringSwitch.
-    if (ArgLower == "[in]")
-      Direction = ParamCommandComment::In;
-    else if (ArgLower == "[out]")
-      Direction = ParamCommandComment::Out;
-    else if (ArgLower == "[in,out]" || ArgLower == "[out,in]")
-      Direction = ParamCommandComment::InOut;
-    else {
-      // Remove spaces.
-      std::string::iterator O = ArgLower.begin();
-      for (std::string::iterator I = ArgLower.begin(), E = ArgLower.end();
-           I != E; ++I) {
-        const char C = *I;
-        if (C != ' ' && C != '\n' && C != '\r' &&
-            C != '\t' && C != '\v' && C != '\f')
-          *O++ = C;
-      }
-      ArgLower.resize(O - ArgLower.begin());
+                                                StringRef Arg) {
+  ParamCommandComment::PassDirection Direction;
+  std::string ArgLower = Arg.lower();
+  // TODO: optimize: lower Name first (need an API in SmallString for that),
+  // after that StringSwitch.
+  if (ArgLower == "[in]")
+    Direction = ParamCommandComment::In;
+  else if (ArgLower == "[out]")
+    Direction = ParamCommandComment::Out;
+  else if (ArgLower == "[in,out]" || ArgLower == "[out,in]")
+    Direction = ParamCommandComment::InOut;
+  else {
+    // Remove spaces.
+    std::string::iterator O = ArgLower.begin();
+    for (std::string::iterator I = ArgLower.begin(), E = ArgLower.end();
+         I != E; ++I) {
+      const char C = *I;
+      if (C != ' ' && C != '\n' && C != '\r' &&
+          C != '\t' && C != '\v' && C != '\f')
+        *O++ = C;
+    }
+    ArgLower.resize(O - ArgLower.begin());
 
-      bool RemovingWhitespaceHelped = false;
-      if (ArgLower == "[in]") {
-        Direction = ParamCommandComment::In;
-        RemovingWhitespaceHelped = true;
-      } else if (ArgLower == "[out]") {
-        Direction = ParamCommandComment::Out;
-        RemovingWhitespaceHelped = true;
-      } else if (ArgLower == "[in,out]" || ArgLower == "[out,in]") {
-        Direction = ParamCommandComment::InOut;
-        RemovingWhitespaceHelped = true;
-      } else {
-        Direction = ParamCommandComment::In;
-        RemovingWhitespaceHelped = false;
-      }
-      // Diag() unrecognized parameter passing direction, valid directions are ...
-      // if (RemovingWhitespaceHelped) FixIt
-    }
-    Command->setDirection(Direction, /* Explicit = */ true);
-  } else {
-    if (Command->getArgCount() == 0) {
-      if (!Command->isDirectionExplicit()) {
-        // User didn't provide a direction argument.
-        Command->setDirection(ParamCommandComment::In, /* Explicit = */ false);
-      }
-      typedef BlockCommandComment::Argument Argument;
-      Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin,
-                                                         ArgLocEnd),
-                                             Arg);
-      Command->setArgs(llvm::makeArrayRef(A, 1));
-      // if (...) Diag() unrecognized parameter name
+    bool RemovingWhitespaceHelped = false;
+    if (ArgLower == "[in]") {
+      Direction = ParamCommandComment::In;
+      RemovingWhitespaceHelped = true;
+    } else if (ArgLower == "[out]") {
+      Direction = ParamCommandComment::Out;
+      RemovingWhitespaceHelped = true;
+    } else if (ArgLower == "[in,out]" || ArgLower == "[out,in]") {
+      Direction = ParamCommandComment::InOut;
+      RemovingWhitespaceHelped = true;
     } else {
-      // Diag() \\param command requires at most 2 arguments
+      Direction = ParamCommandComment::In;
+      RemovingWhitespaceHelped = false;
     }
+
+    SourceRange ArgRange(ArgLocBegin, ArgLocEnd);
+    if (RemovingWhitespaceHelped)
+      Diag(ArgLocBegin, diag::warn_doc_param_spaces_in_direction)
+        << ArgRange
+        << FixItHint::CreateReplacement(
+                          ArgRange,
+                          ParamCommandComment::getDirectionAsString(Direction));
+    else
+      Diag(ArgLocBegin, diag::warn_doc_param_invalid_direction)
+        << ArgRange;
   }
+  Command->setDirection(Direction, /* Explicit = */ true);
+  return Command;
+}
+
+ParamCommandComment *Sema::actOnParamCommandParamNameArg(
+                                                ParamCommandComment *Command,
+                                                SourceLocation ArgLocBegin,
+                                                SourceLocation ArgLocEnd,
+                                                StringRef Arg) {
+  // Parser will not feed us more arguments than needed.
+  assert(Command->getArgCount() == 0);
+
+  if (!Command->isDirectionExplicit()) {
+    // User didn't provide a direction argument.
+    Command->setDirection(ParamCommandComment::In, /* Explicit = */ false);
+  }
+  typedef BlockCommandComment::Argument Argument;
+  Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin,
+                                                     ArgLocEnd),
+                                         Arg);
+  Command->setArgs(llvm::makeArrayRef(A, 1));
+
+  if (!ThisDecl)
+    return Command;
+
+  const ParmVarDecl * const *ParamVars;
+  unsigned NumParams;
+  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ThisDecl)) {
+    ParamVars = FD->param_begin();
+    NumParams = FD->getNumParams();
+  } else if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(ThisDecl)) {
+    ParamVars = MD->param_begin();
+    NumParams = MD->param_size();
+  } else {
+    // We already warned that this \\param is not attached to a function decl.
+    return Command;
+  }
+
+  // Check that referenced parameter name is in the function decl.
+  const unsigned ResolvedParamIndex = resolveParmVarReference(Arg, ParamVars,
+                                                              NumParams);
+  if (ResolvedParamIndex != ParamCommandComment::InvalidParamIndex) {
+    Command->setParamIndex(ResolvedParamIndex);
+    return Command;
+  }
+
+  SourceRange ArgRange(ArgLocBegin, ArgLocEnd);
+  Diag(ArgLocBegin, diag::warn_doc_param_not_found)
+    << Arg << ArgRange;
+
+  unsigned CorrectedParamIndex = ParamCommandComment::InvalidParamIndex;
+  if (NumParams == 1) {
+    // If function has only one parameter then only that parameter
+    // can be documented.
+    CorrectedParamIndex = 0;
+  } else {
+    // Do typo correction.
+    CorrectedParamIndex = correctTypoInParmVarReference(Arg, ParamVars,
+                                                        NumParams);
+  }
+  if (CorrectedParamIndex != ParamCommandComment::InvalidParamIndex) {
+    const ParmVarDecl *CorrectedPVD = ParamVars[CorrectedParamIndex];
+    if (const IdentifierInfo *CorrectedII = CorrectedPVD->getIdentifier())
+      Diag(ArgLocBegin, diag::note_doc_param_name_suggestion)
+        << CorrectedII->getName()
+        << FixItHint::CreateReplacement(ArgRange, CorrectedII->getName());
+  }
+
   return Command;
 }
 
 ParamCommandComment *Sema::actOnParamCommandFinish(ParamCommandComment *Command,
                                                    ParagraphComment *Paragraph) {
   Command->setParagraph(Paragraph);
+  checkBlockCommandEmptyParagraph(Command);
   return Command;
 }
 
@@ -196,22 +275,78 @@
 
 HTMLOpenTagComment *Sema::actOnHTMLOpenTagStart(SourceLocation LocBegin,
                                                 StringRef TagName) {
-  return new (Allocator) HTMLOpenTagComment(LocBegin, TagName);
+  HTMLOpenTagComment *HOT =
+      new (Allocator) HTMLOpenTagComment(LocBegin, TagName);
+  return HOT;
 }
 
 HTMLOpenTagComment *Sema::actOnHTMLOpenTagFinish(
                               HTMLOpenTagComment *Tag,
                               ArrayRef<HTMLOpenTagComment::Attribute> Attrs,
-                              SourceLocation GreaterLoc) {
+                              SourceLocation GreaterLoc,
+                              bool IsSelfClosing) {
   Tag->setAttrs(Attrs);
   Tag->setGreaterLoc(GreaterLoc);
+  if (IsSelfClosing)
+    Tag->setSelfClosing();
+  else
+    HTMLOpenTags.push_back(Tag);
   return Tag;
 }
 
 HTMLCloseTagComment *Sema::actOnHTMLCloseTag(SourceLocation LocBegin,
                                              SourceLocation LocEnd,
                                              StringRef TagName) {
-  return new (Allocator) HTMLCloseTagComment(LocBegin, LocEnd, TagName);
+  HTMLCloseTagComment *HCT =
+      new (Allocator) HTMLCloseTagComment(LocBegin, LocEnd, TagName);
+  bool FoundOpen = false;
+  for (SmallVectorImpl<HTMLOpenTagComment *>::const_reverse_iterator
+       I = HTMLOpenTags.rbegin(), E = HTMLOpenTags.rend();
+       I != E; ++I) {
+    if ((*I)->getTagName() == TagName) {
+      FoundOpen = true;
+      break;
+    }
+  }
+  if (!FoundOpen) {
+    Diag(HCT->getLocation(), diag::warn_doc_html_close_unbalanced)
+      << HCT->getSourceRange();
+    return HCT;
+  }
+
+  while (!HTMLOpenTags.empty()) {
+    const HTMLOpenTagComment *HOT = HTMLOpenTags.back();
+    HTMLOpenTags.pop_back();
+    StringRef LastNotClosedTagName = HOT->getTagName();
+    if (LastNotClosedTagName == TagName)
+      break;
+
+    if (!HTMLOpenTagNeedsClosing(LastNotClosedTagName))
+      continue;
+
+    bool OpenLineInvalid;
+    const unsigned OpenLine = SourceMgr.getPresumedLineNumber(
+                                                HOT->getLocation(),
+                                                &OpenLineInvalid);
+    bool CloseLineInvalid;
+    const unsigned CloseLine = SourceMgr.getPresumedLineNumber(
+                                                HCT->getLocation(),
+                                                &CloseLineInvalid);
+
+    if (OpenLineInvalid || CloseLineInvalid || OpenLine == CloseLine)
+      Diag(HOT->getLocation(), diag::warn_doc_html_open_close_mismatch)
+        << HOT->getTagName() << HCT->getTagName()
+        << HOT->getSourceRange() << HCT->getSourceRange();
+    else {
+      Diag(HOT->getLocation(), diag::warn_doc_html_open_close_mismatch)
+        << HOT->getTagName() << HCT->getTagName()
+        << HOT->getSourceRange();
+      Diag(HCT->getLocation(), diag::note_doc_html_closing_tag)
+        << HCT->getSourceRange();
+    }
+  }
+
+  return HCT;
 }
 
 FullComment *Sema::actOnFullComment(
@@ -219,6 +354,61 @@
   return new (Allocator) FullComment(Blocks);
 }
 
+void Sema::checkBlockCommandEmptyParagraph(BlockCommandComment *Command) {
+  ParagraphComment *Paragraph = Command->getParagraph();
+  if (Paragraph->isWhitespace()) {
+    SourceLocation DiagLoc;
+    if (Command->getArgCount() > 0)
+      DiagLoc = Command->getArgRange(Command->getArgCount() - 1).getEnd();
+    if (!DiagLoc.isValid())
+      DiagLoc = Command->getCommandNameRange().getEnd();
+    Diag(DiagLoc, diag::warn_doc_block_command_empty_paragraph)
+      << Command->getCommandName()
+      << Command->getSourceRange();
+  }
+}
+
+unsigned Sema::resolveParmVarReference(StringRef Name,
+                                       const ParmVarDecl * const *ParamVars,
+                                       unsigned NumParams) {
+  for (unsigned i = 0; i != NumParams; ++i) {
+    const IdentifierInfo *II = ParamVars[i]->getIdentifier();
+    if (II && II->getName() == Name)
+      return i;
+  }
+  return ParamCommandComment::InvalidParamIndex;
+}
+
+unsigned Sema::correctTypoInParmVarReference(
+                                    StringRef Typo,
+                                    const ParmVarDecl * const *ParamVars,
+                                    unsigned NumParams) {
+  const unsigned MaxEditDistance = (Typo.size() + 2) / 3;
+  unsigned BestPVDIndex = NULL;
+  unsigned BestEditDistance = MaxEditDistance + 1;
+  for (unsigned i = 0; i != NumParams; ++i) {
+    const IdentifierInfo *II = ParamVars[i]->getIdentifier();
+    if (II) {
+      StringRef Name = II->getName();
+      unsigned MinPossibleEditDistance = abs(Name.size() - Typo.size());
+      if (MinPossibleEditDistance > 0 &&
+          Typo.size() / MinPossibleEditDistance < 3)
+        continue;
+
+      unsigned EditDistance = Typo.edit_distance(Name, true, MaxEditDistance);
+      if (EditDistance < BestEditDistance) {
+        BestEditDistance = EditDistance;
+        BestPVDIndex = i;
+      }
+    }
+  }
+
+  if (BestEditDistance <= MaxEditDistance)
+    return BestPVDIndex;
+  else
+    return ParamCommandComment::InvalidParamIndex;;
+}
+
 // TODO: tablegen
 bool Sema::isBlockCommand(StringRef Name) {
   return llvm::StringSwitch<bool>(Name)
@@ -259,7 +449,9 @@
 
 bool Sema::HTMLOpenTagNeedsClosing(StringRef Name) {
   return llvm::StringSwitch<bool>(Name)
-      .Case("br", true)
+      .Case("br", false)
+      .Case("hr", false)
+      .Case("li", false)
       .Default(true);
 }
 
diff --git a/lib/AST/RawCommentList.cpp b/lib/AST/RawCommentList.cpp
index d67eb08..7e183e2 100644
--- a/lib/AST/RawCommentList.cpp
+++ b/lib/AST/RawCommentList.cpp
@@ -61,7 +61,7 @@
 RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
                        bool Merged) :
     Range(SR), RawTextValid(false), BriefTextValid(false),
-    IsAlmostTrailingComment(false),
+    IsAttached(false), IsAlmostTrailingComment(false),
     BeginLineValid(false), EndLineValid(false) {
   // Extract raw comment text, if possible.
   if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {