Add LLVM conventions check that scans for AST elements (types, stmts, decls)
that allocate heap memory.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@96184 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Checker/LLVMConventionsChecker.cpp b/lib/Checker/LLVMConventionsChecker.cpp
index 17a17a8..242f4de 100644
--- a/lib/Checker/LLVMConventionsChecker.cpp
+++ b/lib/Checker/LLVMConventionsChecker.cpp
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclTemplate.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Checker/Checkers/LocalCheckers.h"
 #include "clang/Checker/BugReporter/BugReporter.h"
 #include <string>
-#include <llvm/ADT/StringRef.h>
+#include "llvm/ADT/StringRef.h"
 
 using namespace clang;
 
@@ -25,13 +25,25 @@
 // Generic type checking routines.
 //===----------------------------------------------------------------------===//
 
-static bool IsStringRef(QualType T) {
+static bool IsLLVMStringRef(QualType T) {
   const RecordType *RT = T->getAs<RecordType>();
   if (!RT)
     return false;
 
   return llvm::StringRef(QualType(RT, 0).getAsString()) ==
-  "class llvm::StringRef";
+          "class llvm::StringRef";
+}
+
+static bool InStdNamespace(const Decl *D) {
+  const DeclContext *DC = D->getDeclContext();
+  const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(D->getDeclContext());
+  if (!ND)
+    return false;
+  const IdentifierInfo *II = ND->getIdentifier();
+  if (!II || II->getName() != "std")
+    return false;
+  DC = ND->getDeclContext();
+  return isa<TranslationUnitDecl>(DC);
 }
 
 static bool IsStdString(QualType T) {
@@ -43,15 +55,75 @@
     return false;
 
   const TypedefDecl *TD = TT->getDecl();
-  const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(TD->getDeclContext());
+
+  if (!InStdNamespace(TD))
+    return false;
+
+  return TD->getName() == "string";
+}
+
+static bool InClangNamespace(const Decl *D) {
+  const DeclContext *DC = D->getDeclContext();
+  const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(D->getDeclContext());
   if (!ND)
     return false;
   const IdentifierInfo *II = ND->getIdentifier();
-  if (!II || II->getName() != "std")
+  if (!II || II->getName() != "clang")
+    return false;
+  DC = ND->getDeclContext();
+  return isa<TranslationUnitDecl>(DC);
+}
+
+static bool InLLVMNamespace(const Decl *D) {
+  const DeclContext *DC = D->getDeclContext();
+  const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(D->getDeclContext());
+  if (!ND)
+    return false;
+  const IdentifierInfo *II = ND->getIdentifier();
+  if (!II || II->getName() != "llvm")
+    return false;
+  DC = ND->getDeclContext();
+  return isa<TranslationUnitDecl>(DC);
+}
+
+static bool IsClangType(const RecordDecl *RD) {
+  return RD->getName() == "Type" && InClangNamespace(RD);
+}
+
+static bool IsClangDecl(const RecordDecl *RD) {
+  return RD->getName() == "Decl" && InClangNamespace(RD);
+}
+
+static bool IsClangStmt(const RecordDecl *RD) {
+  return RD->getName() == "Stmt" && InClangNamespace(RD);
+}
+
+static bool IsStdVector(QualType T) {
+  const TemplateSpecializationType *TS = T->getAs<TemplateSpecializationType>();
+  if (!TS)
     return false;
 
-  DeclarationName N = TD->getDeclName();
-  return llvm::StringRef(N.getAsString()) == "string";
+  TemplateName TM = TS->getTemplateName();
+  TemplateDecl *TD = TM.getAsTemplateDecl();
+
+  if (!TD || !InStdNamespace(TD))
+    return false;
+
+  return TD->getName() == "vector";
+}
+
+static bool IsSmallVector(QualType T) {
+  const TemplateSpecializationType *TS = T->getAs<TemplateSpecializationType>();
+  if (!TS)
+    return false;
+
+  TemplateName TM = TS->getTemplateName();
+  TemplateDecl *TD = TM.getAsTemplateDecl();
+
+  if (!TD || !InLLVMNamespace(TD))
+    return false;
+
+  return TD->getName() == "SmallVector";
 }
 
 //===----------------------------------------------------------------------===//
@@ -98,7 +170,7 @@
 
   // Pattern match for:
   // llvm::StringRef x = call() (where call returns std::string)
-  if (!IsStringRef(VD->getType()))
+  if (!IsLLVMStringRef(VD->getType()))
     return;
   CXXExprWithTemporaries *Ex1 = dyn_cast<CXXExprWithTemporaries>(Init);
   if (!Ex1)
@@ -120,12 +192,117 @@
     return;
 
   // Okay, badness!  Report an error.
-  BR.EmitBasicReport("StringRef should not be bound to temporary "
-                     "std::string that it outlives", "LLVM Conventions",
+  const char *desc = "StringRef should not be bound to temporary "
+                     "std::string that it outlives";
+
+  BR.EmitBasicReport(desc, "LLVM Conventions", desc,
                      VD->getLocStart(), Init->getSourceRange());
 }
 
 //===----------------------------------------------------------------------===//
+// CHECK: Clang AST nodes should not have fields that can allocate
+//   memory.
+//===----------------------------------------------------------------------===//
+
+static bool AllocatesMemory(QualType T) {
+  return IsStdVector(T) || IsStdString(T) || IsSmallVector(T);
+}
+
+// This type checking could be sped up via dynamic programming.
+static bool IsPartOfAST(const CXXRecordDecl *R) {
+  if (IsClangStmt(R) || IsClangType(R) || IsClangDecl(R))
+    return true;
+
+  for (CXXRecordDecl::base_class_const_iterator I = R->bases_begin(),
+                                                E = R->bases_end(); I!=E; ++I) {
+    CXXBaseSpecifier BS = *I;
+    QualType T = BS.getType();
+    if (const RecordType *baseT = T->getAs<RecordType>()) {
+      CXXRecordDecl *baseD = cast<CXXRecordDecl>(baseT->getDecl());
+      if (IsPartOfAST(baseD))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+namespace {
+class ASTFieldVisitor {
+  llvm::SmallVector<FieldDecl*, 10> FieldChain;
+  CXXRecordDecl *Root;
+  BugReporter &BR;
+public:
+  ASTFieldVisitor(CXXRecordDecl *root, BugReporter &br)
+    : Root(root), BR(br) {}
+
+  void Visit(FieldDecl *D);
+  void ReportError(QualType T);
+};
+} // end anonymous namespace
+
+static void CheckASTMemory(CXXRecordDecl *R, BugReporter &BR) {
+  if (!IsPartOfAST(R))
+    return;
+
+  for (RecordDecl::field_iterator I = R->field_begin(), E = R->field_end();
+       I != E; ++I) {
+    ASTFieldVisitor walker(R, BR);
+    walker.Visit(*I);
+  }
+}
+
+void ASTFieldVisitor::Visit(FieldDecl *D) {
+  FieldChain.push_back(D);
+
+  QualType T = D->getType();
+
+  if (AllocatesMemory(T))
+    ReportError(T);
+
+  if (const RecordType *RT = T->getAs<RecordType>()) {
+    const RecordDecl *RD = RT->getDecl()->getDefinition();
+    for (RecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end();
+         I != E; ++I)
+      Visit(*I);
+  }
+
+  FieldChain.pop_back();
+}
+
+void ASTFieldVisitor::ReportError(QualType T) {
+  llvm::SmallString<1024> buf;
+  llvm::raw_svector_ostream os(buf);
+
+  os << "AST class '" << Root->getName() << "' has a field '"
+     << FieldChain.front()->getName() << "' that allocates heap memory";
+  if (FieldChain.size() > 1) {
+    os << " via the following chain: ";
+    bool isFirst = true;
+    for (llvm::SmallVectorImpl<FieldDecl*>::iterator I=FieldChain.begin(),
+         E=FieldChain.end(); I!=E; ++I) {
+      if (!isFirst)
+        os << '.';
+      else
+        isFirst = false;
+      os << (*I)->getName();
+    }
+  }
+  os << " (type " << FieldChain.back()->getType().getAsString() << ")";
+  os.flush();
+
+  // Note that this will fire for every translation unit that uses this
+  // class.  This is suboptimal, but at least scan-build will merge
+  // duplicate HTML reports.  In the future we need a unified way of merging
+  // duplicate reports across translation units.  For C++ classes we cannot
+  // just report warnings when we see an out-of-line method definition for a
+  // class, as that heuristic doesn't always work (the complete definition of
+  // the class may be in the header file, for example).
+  BR.EmitBasicReport("AST node allocates heap memory", "LLVM Conventions",
+                     os.str(), FieldChain.front()->getLocStart());
+}
+
+//===----------------------------------------------------------------------===//
 // Entry point for all checks.
 //===----------------------------------------------------------------------===//
 
@@ -134,9 +311,13 @@
        I!=E ; ++I) {
 
     Decl *D = *I;
-    if (D->getBody()) {
+
+    if (D->getBody())
       CheckStringRefAssignedTemporary(D, BR);
-    }
+
+    if (CXXRecordDecl *R = dyn_cast<CXXRecordDecl>(D))
+      if (R->isDefinition())
+        CheckASTMemory(R, BR);
 
     if (DeclContext *DC_child = dyn_cast<DeclContext>(D))
       ScanCodeDecls(DC_child, BR);