Enable comment parsing and semantic analysis to emit diagnostics.  A few
diagnostics implemented -- see testcases.

I created a new TableGen file for comment diagnostics,
DiagnosticCommentKinds.td, because comment diagnostics don't logically
fit into AST diagnostics file.  But I don't feel strongly about it.

This also implements support for self-closing HTML tags in comment
lexer and parser (for example, <br />).

In order to issue precise diagnostics CommentSema needs to know the
declaration the comment is attached to.  There is no easy way to find a decl by 
comment, so we match comments and decls in lockstep: after parsing one
declgroup we check if we have any new, not yet attached comments.  If we do --
then we do the usual comment-finding process.

It is interesting that this automatically handles trailing comments.
We pick up not only comments that precede the declaration, but also
comments that *follow* the declaration -- thanks to the lookahead in
the lexer: after parsing the declgroup we've consumed the semicolon
and looked ahead through comments.

Added -Wdocumentation-html flag for semantic HTML errors to allow the user to 
disable only HTML warnings (but not HTML parse errors, which we emit as
warnings in -Wdocumentation).



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@160078 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/AST/CommentSema.cpp b/lib/AST/CommentSema.cpp
index 1193e04..fa8001b 100644
--- a/lib/AST/CommentSema.cpp
+++ b/lib/AST/CommentSema.cpp
@@ -8,13 +8,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/AST/CommentSema.h"
+#include "clang/AST/CommentDiagnostic.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/Basic/SourceManager.h"
 #include "llvm/ADT/StringSwitch.h"
 
 namespace clang {
 namespace comments {
 
-Sema::Sema(llvm::BumpPtrAllocator &Allocator) :
-    Allocator(Allocator) {
+Sema::Sema(llvm::BumpPtrAllocator &Allocator, const SourceManager &SourceMgr,
+           DiagnosticsEngine &Diags) :
+    Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags), ThisDecl(NULL) {
+}
+
+void Sema::setDecl(const Decl *D) {
+  ThisDecl = D;
 }
 
 ParagraphComment *Sema::actOnParagraphComment(
@@ -39,83 +48,153 @@
                               BlockCommandComment *Command,
                               ParagraphComment *Paragraph) {
   Command->setParagraph(Paragraph);
+  checkBlockCommandEmptyParagraph(Command);
   return Command;
 }
 
 ParamCommandComment *Sema::actOnParamCommandStart(SourceLocation LocBegin,
                                                   SourceLocation LocEnd,
                                                   StringRef Name) {
-  return new (Allocator) ParamCommandComment(LocBegin, LocEnd, Name);
+  ParamCommandComment *Command =
+      new (Allocator) ParamCommandComment(LocBegin, LocEnd, Name);
+
+  if (!ThisDecl ||
+      !(isa<FunctionDecl>(ThisDecl) || isa<ObjCMethodDecl>(ThisDecl)))
+    Diag(Command->getLocation(),
+         diag::warn_doc_param_not_attached_to_a_function_decl)
+      << Command->getCommandNameRange();
+
+  return Command;
 }
 
-ParamCommandComment *Sema::actOnParamCommandArg(ParamCommandComment *Command,
+ParamCommandComment *Sema::actOnParamCommandDirectionArg(
+                                                ParamCommandComment *Command,
                                                 SourceLocation ArgLocBegin,
                                                 SourceLocation ArgLocEnd,
-                                                StringRef Arg,
-                                                bool IsDirection) {
-  if (IsDirection) {
-    ParamCommandComment::PassDirection Direction;
-    std::string ArgLower = Arg.lower();
-    // TODO: optimize: lower Name first (need an API in SmallString for that),
-    // after that StringSwitch.
-    if (ArgLower == "[in]")
-      Direction = ParamCommandComment::In;
-    else if (ArgLower == "[out]")
-      Direction = ParamCommandComment::Out;
-    else if (ArgLower == "[in,out]" || ArgLower == "[out,in]")
-      Direction = ParamCommandComment::InOut;
-    else {
-      // Remove spaces.
-      std::string::iterator O = ArgLower.begin();
-      for (std::string::iterator I = ArgLower.begin(), E = ArgLower.end();
-           I != E; ++I) {
-        const char C = *I;
-        if (C != ' ' && C != '\n' && C != '\r' &&
-            C != '\t' && C != '\v' && C != '\f')
-          *O++ = C;
-      }
-      ArgLower.resize(O - ArgLower.begin());
+                                                StringRef Arg) {
+  ParamCommandComment::PassDirection Direction;
+  std::string ArgLower = Arg.lower();
+  // TODO: optimize: lower Name first (need an API in SmallString for that),
+  // after that StringSwitch.
+  if (ArgLower == "[in]")
+    Direction = ParamCommandComment::In;
+  else if (ArgLower == "[out]")
+    Direction = ParamCommandComment::Out;
+  else if (ArgLower == "[in,out]" || ArgLower == "[out,in]")
+    Direction = ParamCommandComment::InOut;
+  else {
+    // Remove spaces.
+    std::string::iterator O = ArgLower.begin();
+    for (std::string::iterator I = ArgLower.begin(), E = ArgLower.end();
+         I != E; ++I) {
+      const char C = *I;
+      if (C != ' ' && C != '\n' && C != '\r' &&
+          C != '\t' && C != '\v' && C != '\f')
+        *O++ = C;
+    }
+    ArgLower.resize(O - ArgLower.begin());
 
-      bool RemovingWhitespaceHelped = false;
-      if (ArgLower == "[in]") {
-        Direction = ParamCommandComment::In;
-        RemovingWhitespaceHelped = true;
-      } else if (ArgLower == "[out]") {
-        Direction = ParamCommandComment::Out;
-        RemovingWhitespaceHelped = true;
-      } else if (ArgLower == "[in,out]" || ArgLower == "[out,in]") {
-        Direction = ParamCommandComment::InOut;
-        RemovingWhitespaceHelped = true;
-      } else {
-        Direction = ParamCommandComment::In;
-        RemovingWhitespaceHelped = false;
-      }
-      // Diag() unrecognized parameter passing direction, valid directions are ...
-      // if (RemovingWhitespaceHelped) FixIt
-    }
-    Command->setDirection(Direction, /* Explicit = */ true);
-  } else {
-    if (Command->getArgCount() == 0) {
-      if (!Command->isDirectionExplicit()) {
-        // User didn't provide a direction argument.
-        Command->setDirection(ParamCommandComment::In, /* Explicit = */ false);
-      }
-      typedef BlockCommandComment::Argument Argument;
-      Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin,
-                                                         ArgLocEnd),
-                                             Arg);
-      Command->setArgs(llvm::makeArrayRef(A, 1));
-      // if (...) Diag() unrecognized parameter name
+    bool RemovingWhitespaceHelped = false;
+    if (ArgLower == "[in]") {
+      Direction = ParamCommandComment::In;
+      RemovingWhitespaceHelped = true;
+    } else if (ArgLower == "[out]") {
+      Direction = ParamCommandComment::Out;
+      RemovingWhitespaceHelped = true;
+    } else if (ArgLower == "[in,out]" || ArgLower == "[out,in]") {
+      Direction = ParamCommandComment::InOut;
+      RemovingWhitespaceHelped = true;
     } else {
-      // Diag() \\param command requires at most 2 arguments
+      Direction = ParamCommandComment::In;
+      RemovingWhitespaceHelped = false;
     }
+
+    SourceRange ArgRange(ArgLocBegin, ArgLocEnd);
+    if (RemovingWhitespaceHelped)
+      Diag(ArgLocBegin, diag::warn_doc_param_spaces_in_direction)
+        << ArgRange
+        << FixItHint::CreateReplacement(
+                          ArgRange,
+                          ParamCommandComment::getDirectionAsString(Direction));
+    else
+      Diag(ArgLocBegin, diag::warn_doc_param_invalid_direction)
+        << ArgRange;
   }
+  Command->setDirection(Direction, /* Explicit = */ true);
+  return Command;
+}
+
+ParamCommandComment *Sema::actOnParamCommandParamNameArg(
+                                                ParamCommandComment *Command,
+                                                SourceLocation ArgLocBegin,
+                                                SourceLocation ArgLocEnd,
+                                                StringRef Arg) {
+  // Parser will not feed us more arguments than needed.
+  assert(Command->getArgCount() == 0);
+
+  if (!Command->isDirectionExplicit()) {
+    // User didn't provide a direction argument.
+    Command->setDirection(ParamCommandComment::In, /* Explicit = */ false);
+  }
+  typedef BlockCommandComment::Argument Argument;
+  Argument *A = new (Allocator) Argument(SourceRange(ArgLocBegin,
+                                                     ArgLocEnd),
+                                         Arg);
+  Command->setArgs(llvm::makeArrayRef(A, 1));
+
+  if (!ThisDecl)
+    return Command;
+
+  const ParmVarDecl * const *ParamVars;
+  unsigned NumParams;
+  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ThisDecl)) {
+    ParamVars = FD->param_begin();
+    NumParams = FD->getNumParams();
+  } else if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(ThisDecl)) {
+    ParamVars = MD->param_begin();
+    NumParams = MD->param_size();
+  } else {
+    // We already warned that this \\param is not attached to a function decl.
+    return Command;
+  }
+
+  // Check that referenced parameter name is in the function decl.
+  const unsigned ResolvedParamIndex = resolveParmVarReference(Arg, ParamVars,
+                                                              NumParams);
+  if (ResolvedParamIndex != ParamCommandComment::InvalidParamIndex) {
+    Command->setParamIndex(ResolvedParamIndex);
+    return Command;
+  }
+
+  SourceRange ArgRange(ArgLocBegin, ArgLocEnd);
+  Diag(ArgLocBegin, diag::warn_doc_param_not_found)
+    << Arg << ArgRange;
+
+  unsigned CorrectedParamIndex = ParamCommandComment::InvalidParamIndex;
+  if (NumParams == 1) {
+    // If function has only one parameter then only that parameter
+    // can be documented.
+    CorrectedParamIndex = 0;
+  } else {
+    // Do typo correction.
+    CorrectedParamIndex = correctTypoInParmVarReference(Arg, ParamVars,
+                                                        NumParams);
+  }
+  if (CorrectedParamIndex != ParamCommandComment::InvalidParamIndex) {
+    const ParmVarDecl *CorrectedPVD = ParamVars[CorrectedParamIndex];
+    if (const IdentifierInfo *CorrectedII = CorrectedPVD->getIdentifier())
+      Diag(ArgLocBegin, diag::note_doc_param_name_suggestion)
+        << CorrectedII->getName()
+        << FixItHint::CreateReplacement(ArgRange, CorrectedII->getName());
+  }
+
   return Command;
 }
 
 ParamCommandComment *Sema::actOnParamCommandFinish(ParamCommandComment *Command,
                                                    ParagraphComment *Paragraph) {
   Command->setParagraph(Paragraph);
+  checkBlockCommandEmptyParagraph(Command);
   return Command;
 }
 
@@ -196,22 +275,78 @@
 
 HTMLOpenTagComment *Sema::actOnHTMLOpenTagStart(SourceLocation LocBegin,
                                                 StringRef TagName) {
-  return new (Allocator) HTMLOpenTagComment(LocBegin, TagName);
+  HTMLOpenTagComment *HOT =
+      new (Allocator) HTMLOpenTagComment(LocBegin, TagName);
+  return HOT;
 }
 
 HTMLOpenTagComment *Sema::actOnHTMLOpenTagFinish(
                               HTMLOpenTagComment *Tag,
                               ArrayRef<HTMLOpenTagComment::Attribute> Attrs,
-                              SourceLocation GreaterLoc) {
+                              SourceLocation GreaterLoc,
+                              bool IsSelfClosing) {
   Tag->setAttrs(Attrs);
   Tag->setGreaterLoc(GreaterLoc);
+  if (IsSelfClosing)
+    Tag->setSelfClosing();
+  else
+    HTMLOpenTags.push_back(Tag);
   return Tag;
 }
 
 HTMLCloseTagComment *Sema::actOnHTMLCloseTag(SourceLocation LocBegin,
                                              SourceLocation LocEnd,
                                              StringRef TagName) {
-  return new (Allocator) HTMLCloseTagComment(LocBegin, LocEnd, TagName);
+  HTMLCloseTagComment *HCT =
+      new (Allocator) HTMLCloseTagComment(LocBegin, LocEnd, TagName);
+  bool FoundOpen = false;
+  for (SmallVectorImpl<HTMLOpenTagComment *>::const_reverse_iterator
+       I = HTMLOpenTags.rbegin(), E = HTMLOpenTags.rend();
+       I != E; ++I) {
+    if ((*I)->getTagName() == TagName) {
+      FoundOpen = true;
+      break;
+    }
+  }
+  if (!FoundOpen) {
+    Diag(HCT->getLocation(), diag::warn_doc_html_close_unbalanced)
+      << HCT->getSourceRange();
+    return HCT;
+  }
+
+  while (!HTMLOpenTags.empty()) {
+    const HTMLOpenTagComment *HOT = HTMLOpenTags.back();
+    HTMLOpenTags.pop_back();
+    StringRef LastNotClosedTagName = HOT->getTagName();
+    if (LastNotClosedTagName == TagName)
+      break;
+
+    if (!HTMLOpenTagNeedsClosing(LastNotClosedTagName))
+      continue;
+
+    bool OpenLineInvalid;
+    const unsigned OpenLine = SourceMgr.getPresumedLineNumber(
+                                                HOT->getLocation(),
+                                                &OpenLineInvalid);
+    bool CloseLineInvalid;
+    const unsigned CloseLine = SourceMgr.getPresumedLineNumber(
+                                                HCT->getLocation(),
+                                                &CloseLineInvalid);
+
+    if (OpenLineInvalid || CloseLineInvalid || OpenLine == CloseLine)
+      Diag(HOT->getLocation(), diag::warn_doc_html_open_close_mismatch)
+        << HOT->getTagName() << HCT->getTagName()
+        << HOT->getSourceRange() << HCT->getSourceRange();
+    else {
+      Diag(HOT->getLocation(), diag::warn_doc_html_open_close_mismatch)
+        << HOT->getTagName() << HCT->getTagName()
+        << HOT->getSourceRange();
+      Diag(HCT->getLocation(), diag::note_doc_html_closing_tag)
+        << HCT->getSourceRange();
+    }
+  }
+
+  return HCT;
 }
 
 FullComment *Sema::actOnFullComment(
@@ -219,6 +354,61 @@
   return new (Allocator) FullComment(Blocks);
 }
 
+void Sema::checkBlockCommandEmptyParagraph(BlockCommandComment *Command) {
+  ParagraphComment *Paragraph = Command->getParagraph();
+  if (Paragraph->isWhitespace()) {
+    SourceLocation DiagLoc;
+    if (Command->getArgCount() > 0)
+      DiagLoc = Command->getArgRange(Command->getArgCount() - 1).getEnd();
+    if (!DiagLoc.isValid())
+      DiagLoc = Command->getCommandNameRange().getEnd();
+    Diag(DiagLoc, diag::warn_doc_block_command_empty_paragraph)
+      << Command->getCommandName()
+      << Command->getSourceRange();
+  }
+}
+
+unsigned Sema::resolveParmVarReference(StringRef Name,
+                                       const ParmVarDecl * const *ParamVars,
+                                       unsigned NumParams) {
+  for (unsigned i = 0; i != NumParams; ++i) {
+    const IdentifierInfo *II = ParamVars[i]->getIdentifier();
+    if (II && II->getName() == Name)
+      return i;
+  }
+  return ParamCommandComment::InvalidParamIndex;
+}
+
+unsigned Sema::correctTypoInParmVarReference(
+                                    StringRef Typo,
+                                    const ParmVarDecl * const *ParamVars,
+                                    unsigned NumParams) {
+  const unsigned MaxEditDistance = (Typo.size() + 2) / 3;
+  unsigned BestPVDIndex = NULL;
+  unsigned BestEditDistance = MaxEditDistance + 1;
+  for (unsigned i = 0; i != NumParams; ++i) {
+    const IdentifierInfo *II = ParamVars[i]->getIdentifier();
+    if (II) {
+      StringRef Name = II->getName();
+      unsigned MinPossibleEditDistance = abs(Name.size() - Typo.size());
+      if (MinPossibleEditDistance > 0 &&
+          Typo.size() / MinPossibleEditDistance < 3)
+        continue;
+
+      unsigned EditDistance = Typo.edit_distance(Name, true, MaxEditDistance);
+      if (EditDistance < BestEditDistance) {
+        BestEditDistance = EditDistance;
+        BestPVDIndex = i;
+      }
+    }
+  }
+
+  if (BestEditDistance <= MaxEditDistance)
+    return BestPVDIndex;
+  else
+    return ParamCommandComment::InvalidParamIndex;;
+}
+
 // TODO: tablegen
 bool Sema::isBlockCommand(StringRef Name) {
   return llvm::StringSwitch<bool>(Name)
@@ -259,7 +449,9 @@
 
 bool Sema::HTMLOpenTagNeedsClosing(StringRef Name) {
   return llvm::StringSwitch<bool>(Name)
-      .Case("br", true)
+      .Case("br", false)
+      .Case("hr", false)
+      .Case("li", false)
       .Default(true);
 }