Improved IdempotentOperationChecker false positives and false negatives.
- Unfinished analysis may still report valid warnings if the path was completely analyzed
- New 'CanVary' heuristic to recursively determine if a subexpression has a varying element
- Updated test cases, including one known bug
- Exposed GRCoreEngine through GRExprEngine

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@110970 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/Checker/PathSensitive/Checker.h b/include/clang/Checker/PathSensitive/Checker.h
index 1c9f85b..d34550a 100644
--- a/include/clang/Checker/PathSensitive/Checker.h
+++ b/include/clang/Checker/PathSensitive/Checker.h
@@ -165,6 +165,10 @@
     Eng.getBugReporter().EmitReport(R);
   }
 
+  AnalysisContext *getCurrentAnalysisContext() const {
+    return Pred->getLocationContext()->getAnalysisContext();
+  }
+
 private:
   ExplodedNode *GenerateNodeImpl(const Stmt* stmt, const GRState *state,
                              bool markAsSink) {
diff --git a/include/clang/Checker/PathSensitive/GRCoreEngine.h b/include/clang/Checker/PathSensitive/GRCoreEngine.h
index 0426194..4f9f2d8 100644
--- a/include/clang/Checker/PathSensitive/GRCoreEngine.h
+++ b/include/clang/Checker/PathSensitive/GRCoreEngine.h
@@ -43,6 +43,11 @@
   friend class GRCallEnterNodeBuilder;
   friend class GRCallExitNodeBuilder;
 
+public:
+  typedef std::vector<std::pair<BlockEdge, const ExplodedNode*> >
+            BlocksAborted;
+private:
+
   GRSubEngine& SubEngine;
 
   /// G - The simulation graph.  Each node is a (location,state) pair.
@@ -57,10 +62,6 @@
   ///   These are used to record for key nodes in the ExplodedGraph the
   ///   number of times different CFGBlocks have been visited along a path.
   GRBlockCounter::Factory BCounterFactory;
-  
-  
-  typedef std::vector<std::pair<BlockEdge, const ExplodedNode*> >
-          BlocksAborted;
 
   /// The locations where we stopped doing work because we visited a location
   ///  too many times.
diff --git a/include/clang/Checker/PathSensitive/GRExprEngine.h b/include/clang/Checker/PathSensitive/GRExprEngine.h
index f7b7d70..691e7f3 100644
--- a/include/clang/Checker/PathSensitive/GRExprEngine.h
+++ b/include/clang/Checker/PathSensitive/GRExprEngine.h
@@ -254,10 +254,10 @@
   // Functions for external checking of whether we have unfinished work
   bool wasBlockAborted() const { return CoreEngine.wasBlockAborted(); }
   bool hasWorkRemaining() const {
-    return wasBlockAborted() || getWorkList()->hasWork();
+    return wasBlockAborted() || CoreEngine.getWorkList()->hasWork();
   }
 
-  GRWorkList *getWorkList() const { return CoreEngine.getWorkList(); }
+  const GRCoreEngine &getCoreEngine() const { return CoreEngine; }
 
 protected:
   const GRState* GetState(ExplodedNode* N) {
diff --git a/lib/Checker/IdempotentOperationChecker.cpp b/lib/Checker/IdempotentOperationChecker.cpp
index 76f493e..9866c60 100644
--- a/lib/Checker/IdempotentOperationChecker.cpp
+++ b/lib/Checker/IdempotentOperationChecker.cpp
@@ -36,26 +36,24 @@
 //  !=      |   0    |        |        |        |        |         |
 //===----------------------------------------------------------------------===//
 //
-// Ways to reduce false positives (that need to be implemented):
-// - Don't flag downsizing casts
-// - Improved handling of static/global variables
-// - Per-block marking of incomplete analysis
-// - Handling ~0 values
-// - False positives involving silencing unused variable warnings
-//
-// Other things TODO:
+// Things TODO:
 // - Improved error messages
 // - Handle mixed assumptions (which assumptions can belong together?)
 // - Finer grained false positive control (levels)
+// - Handling ~0 values
 
 #include "GRExprEngineExperimentalChecks.h"
+#include "clang/Analysis/CFGStmtMap.h"
 #include "clang/Checker/BugReporter/BugType.h"
 #include "clang/Checker/PathSensitive/CheckerHelpers.h"
 #include "clang/Checker/PathSensitive/CheckerVisitor.h"
+#include "clang/Checker/PathSensitive/GRCoreEngine.h"
 #include "clang/Checker/PathSensitive/SVals.h"
 #include "clang/AST/Stmt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <deque>
 
 using namespace clang;
 
@@ -79,11 +77,15 @@
     static bool isParameterSelfAssign(const Expr *LHS, const Expr *RHS);
     static bool isTruncationExtensionAssignment(const Expr *LHS,
                                                 const Expr *RHS);
-    static bool containsZeroConstant(const Stmt *S);
-    static bool containsOneConstant(const Stmt *S);
+    static bool PathWasCompletelyAnalyzed(const CFG *C,
+                                          const CFGBlock *CB,
+                                          const GRCoreEngine &CE);
+    static bool CanVary(const Expr *Ex, ASTContext &Ctx);
 
     // Hash table
-    typedef llvm::DenseMap<const BinaryOperator *, Assumption> AssumptionMap;
+    typedef llvm::DenseMap<const BinaryOperator *,
+                           std::pair<Assumption, AnalysisContext*> >
+                           AssumptionMap;
     AssumptionMap hash;
 };
 }
@@ -103,23 +105,21 @@
   // Find or create an entry in the hash for this BinaryOperator instance.
   // If we haven't done a lookup before, it will get default initialized to
   // 'Possible'.
-  Assumption &A = hash[B];
+  std::pair<Assumption, AnalysisContext *> &Data = hash[B];
+  Assumption &A = Data.first;
+  Data.second = C.getCurrentAnalysisContext();
 
   // If we already have visited this node on a path that does not contain an
   // idempotent operation, return immediately.
   if (A == Impossible)
     return;
 
-  // Skip binary operators containing common false positives
-  if (containsMacro(B) || containsEnum(B) || containsStmt<SizeOfAlignOfExpr>(B)
-      || containsZeroConstant(B) || containsOneConstant(B)
-      || containsBuiltinOffsetOf(B) || containsStaticLocal(B)) {
-    A = Impossible;
-    return;
-  }
-
+  // Retrieve both sides of the operator and determine if they can vary (which
+  // may mean this is a false positive.
   const Expr *LHS = B->getLHS();
   const Expr *RHS = B->getRHS();
+  bool LHSCanVary = CanVary(LHS, C.getASTContext());
+  bool RHSCanVary = CanVary(RHS, C.getASTContext());
 
   const GRState *state = C.getState();
 
@@ -186,7 +186,7 @@
   case BinaryOperator::Xor:
   case BinaryOperator::LOr:
   case BinaryOperator::LAnd:
-    if (LHSVal != RHSVal)
+    if (LHSVal != RHSVal || !LHSCanVary || !RHSCanVary)
       break;
     UpdateAssumption(A, Equal);
     return;
@@ -204,7 +204,7 @@
    case BinaryOperator::Div:
    case BinaryOperator::LOr:
    case BinaryOperator::LAnd:
-     if (!RHSVal.isConstant(1))
+     if (!RHSVal.isConstant(1) || !RHSCanVary)
        break;
      UpdateAssumption(A, RHSis1);
      return;
@@ -220,7 +220,7 @@
   case BinaryOperator::Mul:
   case BinaryOperator::LOr:
   case BinaryOperator::LAnd:
-    if (!LHSVal.isConstant(1))
+    if (!LHSVal.isConstant(1) || !LHSCanVary)
       break;
     UpdateAssumption(A, LHSis1);
     return;
@@ -248,7 +248,7 @@
   case BinaryOperator::Shr:
   case BinaryOperator::LOr:
   case BinaryOperator::LAnd:
-    if (!RHSVal.isConstant(0))
+    if (!RHSVal.isConstant(0) || !RHSCanVary)
       break;
     UpdateAssumption(A, RHSis0);
     return;
@@ -280,7 +280,7 @@
   case BinaryOperator::Shr:
   case BinaryOperator::LOr:
   case BinaryOperator::LAnd:
-    if (!LHSVal.isConstant(0))
+    if (!LHSVal.isConstant(0) || !LHSCanVary)
       break;
     UpdateAssumption(A, LHSis0);
     return;
@@ -293,52 +293,68 @@
 void IdempotentOperationChecker::VisitEndAnalysis(ExplodedGraph &G,
                                                   BugReporter &BR,
                                                   GRExprEngine &Eng) {
-  // If there is any work remaining we cannot be 100% sure about our warnings
-  if (Eng.hasWorkRemaining())
-    return;
-
   // Iterate over the hash to see if we have any paths with definite
   // idempotent operations.
-  for (AssumptionMap::const_iterator i =
-      hash.begin(); i != hash.end(); ++i) {
-    if (i->second != Impossible) {
-      // Select the error message.
-      const BinaryOperator *B = i->first;
-      llvm::SmallString<128> buf;
-      llvm::raw_svector_ostream os(buf);
+  for (AssumptionMap::const_iterator i = hash.begin(); i != hash.end(); ++i) {
+    // Unpack the hash contents
+    const std::pair<Assumption, AnalysisContext *> &Data = i->second;
+    const Assumption &A = Data.first;
+    AnalysisContext *AC = Data.second;
 
-      switch (i->second) {
-      case Equal:
-        if (B->getOpcode() == BinaryOperator::Assign)
-          os << "Assigned value is always the same as the existing value";
-        else
-          os << "Both operands to '" << B->getOpcodeStr()
-             << "' always have the same value";
-        break;
-      case LHSis1:
-        os << "The left operand to '" << B->getOpcodeStr() << "' is always 1";
-        break;
-      case RHSis1:
-        os << "The right operand to '" << B->getOpcodeStr() << "' is always 1";
-        break;
-      case LHSis0:
-        os << "The left operand to '" << B->getOpcodeStr() << "' is always 0";
-        break;
-      case RHSis0:
-        os << "The right operand to '" << B->getOpcodeStr() << "' is always 0";
-        break;
-      case Possible:
-        llvm_unreachable("Operation was never marked with an assumption");
-      case Impossible:
-        llvm_unreachable(0);
-      }
+    const BinaryOperator *B = i->first;
 
-      // Create the SourceRange Arrays
-      SourceRange S[2] = { i->first->getLHS()->getSourceRange(),
-                           i->first->getRHS()->getSourceRange() };
-      BR.EmitBasicReport("Idempotent operation", "Dead code",
-                         os.str(), i->first->getOperatorLoc(), S, 2);
+    if (A == Impossible)
+      continue;
+
+    // If the analyzer did not finish, check to see if we can still emit this
+    // warning
+    if (Eng.hasWorkRemaining()) {
+      const CFGStmtMap *CBM = CFGStmtMap::Build(AC->getCFG(),
+                                                &AC->getParentMap());
+
+      // If we can trace back
+      if (!PathWasCompletelyAnalyzed(AC->getCFG(),
+                                     CBM->getBlock(B),
+                                     Eng.getCoreEngine()))
+        continue;
+
+      delete CBM;
     }
+
+    // Select the error message.
+    llvm::SmallString<128> buf;
+    llvm::raw_svector_ostream os(buf);
+    switch (A) {
+    case Equal:
+      if (B->getOpcode() == BinaryOperator::Assign)
+        os << "Assigned value is always the same as the existing value";
+      else
+        os << "Both operands to '" << B->getOpcodeStr()
+           << "' always have the same value";
+      break;
+    case LHSis1:
+      os << "The left operand to '" << B->getOpcodeStr() << "' is always 1";
+      break;
+    case RHSis1:
+      os << "The right operand to '" << B->getOpcodeStr() << "' is always 1";
+      break;
+    case LHSis0:
+      os << "The left operand to '" << B->getOpcodeStr() << "' is always 0";
+      break;
+    case RHSis0:
+      os << "The right operand to '" << B->getOpcodeStr() << "' is always 0";
+      break;
+    case Possible:
+      llvm_unreachable("Operation was never marked with an assumption");
+    case Impossible:
+      llvm_unreachable(0);
+    }
+
+    // Create the SourceRange Arrays
+    SourceRange S[2] = { i->first->getLHS()->getSourceRange(),
+                         i->first->getRHS()->getSourceRange() };
+    BR.EmitBasicReport("Idempotent operation", "Dead code",
+                       os.str(), i->first->getOperatorLoc(), S, 2);
   }
 }
 
@@ -410,44 +426,133 @@
   return dyn_cast<DeclRefExpr>(RHS->IgnoreParens()) == NULL;
 }
 
-// Check for a integer or float constant of 0
-bool IdempotentOperationChecker::containsZeroConstant(const Stmt *S) {
-  const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(S);
-  if (IL && IL->getValue() == 0)
-    return true;
+// Returns false if a path to this block was not completely analyzed, or true
+// otherwise.
+bool IdempotentOperationChecker::PathWasCompletelyAnalyzed(
+                                                       const CFG *C,
+                                                       const CFGBlock *CB,
+                                                       const GRCoreEngine &CE) {
+  std::deque<const CFGBlock *> WorkList;
+  llvm::SmallSet<unsigned, 8> Aborted;
+  llvm::SmallSet<unsigned, 128> Visited;
 
-  const FloatingLiteral *FL = dyn_cast<FloatingLiteral>(S);
-  if (FL && FL->getValue().isZero())
-    return true;
+  // Create a set of all aborted blocks
+  typedef GRCoreEngine::BlocksAborted::const_iterator AbortedIterator;
+  for (AbortedIterator I = CE.blocks_aborted_begin(),
+      E = CE.blocks_aborted_end(); I != E; ++I) {
+    const BlockEdge &BE =  I->first;
 
-  for (Stmt::const_child_iterator I = S->child_begin(); I != S->child_end();
-      ++I)
-    if (const Stmt *child = *I)
-      if (containsZeroConstant(child))
-        return true;
-
-  return false;
-}
-
-// Check for an integer or float constant of 1
-bool IdempotentOperationChecker::containsOneConstant(const Stmt *S) {
-  const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(S);
-  if (IL && IL->getValue() == 1)
-    return true;
-
-  if (const FloatingLiteral *FL = dyn_cast<FloatingLiteral>(S)) {
-    const llvm::APFloat &val = FL->getValue();
-    const llvm::APFloat one(val.getSemantics(), 1);
-    if (val.compare(one) == llvm::APFloat::cmpEqual)
-      return true;
+    // The destination block on the BlockEdge is the first block that was not
+    // analyzed.
+    Aborted.insert(BE.getDst()->getBlockID());
   }
 
-  for (Stmt::const_child_iterator I = S->child_begin(); I != S->child_end();
-      ++I)
-    if (const Stmt *child = *I)
-      if (containsOneConstant(child))
-        return true;
+  // Save the entry block ID for early exiting
+  unsigned EntryBlockID = C->getEntry().getBlockID();
 
-  return false;
+  // Create initial node
+  WorkList.push_back(CB);
+
+  while (!WorkList.empty()) {
+    const CFGBlock *Head = WorkList.front();
+    WorkList.pop_front();
+    Visited.insert(Head->getBlockID());
+
+    // If we found the entry block, then there exists a path from the target
+    // node to the entry point of this function -> the path was completely
+    // analyzed.
+    if (Head->getBlockID() == EntryBlockID)
+      return true;
+
+    // If any of the aborted blocks are on the path to the beginning, then all
+    // paths to this block were not analyzed.
+    if (Aborted.count(Head->getBlockID()))
+      return false;
+
+    // Add the predecessors to the worklist unless we have already visited them
+    for (CFGBlock::const_pred_iterator I = Head->pred_begin();
+        I != Head->pred_end(); ++I)
+      if (!Visited.count((*I)->getBlockID()))
+        WorkList.push_back(*I);
+  }
+
+  // If we get to this point, there is no connection to the entry block or an
+  // aborted block. This path is unreachable and we can report the error.
+  return true;
+}
+
+// Recursive function that determines whether an expression contains any element
+// that varies. This could be due to a compile-time constant like sizeof. An
+// expression may also involve a variable that behaves like a constant. The
+// function returns true if the expression varies, and false otherwise.
+bool IdempotentOperationChecker::CanVary(const Expr *Ex, ASTContext &Ctx) {
+  // Parentheses and casts are irrelevant here
+  Ex = Ex->IgnoreParenCasts();
+
+  if (Ex->getLocStart().isMacroID())
+    return false;
+
+  switch (Ex->getStmtClass()) {
+  // Trivially true cases
+  case Stmt::ArraySubscriptExprClass:
+  case Stmt::MemberExprClass:
+  case Stmt::StmtExprClass:
+  case Stmt::CallExprClass:
+  case Stmt::VAArgExprClass:
+  case Stmt::ShuffleVectorExprClass:
+    return true;
+  default:
+    return true;
+
+  // Trivially false cases
+  case Stmt::IntegerLiteralClass:
+  case Stmt::CharacterLiteralClass:
+  case Stmt::FloatingLiteralClass:
+  case Stmt::PredefinedExprClass:
+  case Stmt::ImaginaryLiteralClass:
+  case Stmt::StringLiteralClass:
+  case Stmt::OffsetOfExprClass:
+  case Stmt::CompoundLiteralExprClass:
+  case Stmt::AddrLabelExprClass:
+  case Stmt::TypesCompatibleExprClass:
+  case Stmt::GNUNullExprClass:
+  case Stmt::InitListExprClass:
+  case Stmt::DesignatedInitExprClass:
+  case Stmt::BlockExprClass:
+  case Stmt::BlockDeclRefExprClass:
+    return false;
+
+  // Cases requiring custom logic
+  case Stmt::SizeOfAlignOfExprClass: {
+    const SizeOfAlignOfExpr *SE = cast<const SizeOfAlignOfExpr>(Ex);
+    if (!SE->isSizeOf())
+      return false;
+    return SE->getTypeOfArgument()->isVariableArrayType();
+  }
+  case Stmt::DeclRefExprClass:
+    //    return !IsPseudoConstant(cast<DeclRefExpr>(Ex));
+    return true;
+
+  // The next cases require recursion for subexpressions
+  case Stmt::BinaryOperatorClass: {
+    const BinaryOperator *B = cast<const BinaryOperator>(Ex);
+    return CanVary(B->getRHS(), Ctx) || CanVary(B->getLHS(), Ctx);
+   }
+  case Stmt::UnaryOperatorClass: {
+    const UnaryOperator *U = cast<const UnaryOperator>(Ex);
+    // Handle two trivial cases first
+    switch (U->getOpcode()) {
+    case UnaryOperator::Extension:
+    case UnaryOperator::OffsetOf:
+      return false;
+    default:
+      return CanVary(U->getSubExpr(), Ctx);
+    }
+  }
+  case Stmt::ChooseExprClass:
+    return CanVary(cast<const ChooseExpr>(Ex)->getChosenSubExpr(Ctx), Ctx);
+  case Stmt::ConditionalOperatorClass:
+      return CanVary(cast<const ConditionalOperator>(Ex)->getCond(), Ctx);
+  }
 }
 
diff --git a/test/Analysis/dead-stores.c b/test/Analysis/dead-stores.c
index cc8a3f5..3cd0612 100644
--- a/test/Analysis/dead-stores.c
+++ b/test/Analysis/dead-stores.c
@@ -150,7 +150,7 @@
 
 int f16(int x) {
   x = x * 2;
-  x = sizeof(int [x = (x || x + 1) * 2]) // expected-warning{{Although the value stored to 'x' is used}}
+  x = sizeof(int [x = (x || x + 1) * 2]) // expected-warning{{Although the value stored to 'x' is used}} expected-warning{{The left operand to '*' is always 1}}
       ? 5 : 8;
   return x;
 }
diff --git a/test/Analysis/idempotent-operations.c b/test/Analysis/idempotent-operations.c
index 2543b1a..23401e8 100644
--- a/test/Analysis/idempotent-operations.c
+++ b/test/Analysis/idempotent-operations.c
@@ -53,20 +53,33 @@
 }
 
 void floats(float x) {
-  test_f(x * 1.0); // no-warning
+  test_f(x * 1.0);  // no-warning
   test_f(x * 1.0F); // no-warning
 }
 
-// Ensure that we don't report false poitives on complex loops
+// Ensure that we don't report false poitives in complex loops
 void bailout() {
-  int unused, result = 4;
-  int numbers[5] = { 0, 32, 'x', 128, 255 };
+  int unused = 0, result = 4;
+  result = result; // expected-warning {{Assigned value is always the same as the existing value}}
 
-  for (int bg = 0; bg < 5; bg ++) {
-    result += numbers[bg]; // no-warning
+  for (unsigned bg = 0; bg < 1024; bg ++) {
+    result = bg * result; // no-warning
 
     for (int i = 0; i < 256; i++) {
-      unused = i;
+      unused *= i; // no-warning
     }
   }
 }
+
+// False positive tests
+
+unsigned false1() {
+  return (5 - 2 - 3); // no-warning
+}
+
+enum testenum { enum1 = 0, enum2 };
+unsigned false2() {
+  return enum1; // no-warning
+}
+
+extern unsigned foo();
diff --git a/test/Analysis/null-deref-ps-temp.c b/test/Analysis/null-deref-ps-temp.c
new file mode 100644
index 0000000..90b6ed3
--- /dev/null
+++ b/test/Analysis/null-deref-ps-temp.c
@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple i386-apple-darwin10 -analyze -analyzer-experimental-internal-checks -std=gnu99 -analyzer-check-objc-mem -analyzer-store=region -analyzer-constraints=range -analyzer-no-purge-dead -verify %s -Wreturn-type
+
+// This is a temporary file to isolate a test case that would cause a failure
+// only some of the time in null-deref-ps.c. The idempotent operations checker
+// has revealed a bug on line 18 ('=' instead of '==') when the
+// -analyzer-no-purge-dead flag is passed to cc1. Some fundamental design
+// changes are needed to make this work without the -analyzer-no-purge-dead flag
+// and this test will be integrated back into the main file when this happens.
+
+typedef unsigned uintptr_t;
+
+int f4_b() {
+  short array[2];
+  uintptr_t x = array; // expected-warning{{incompatible pointer to integer conversion}}
+  short *p = x; // expected-warning{{incompatible integer to pointer conversion}}
+
+  // The following branch should be infeasible.
+  if (!(p = &array[0])) { // expected-warning{{Assigned value is always the same as the existing value}}
+    p = 0;
+    *p = 1; // no-warning
+  }
+
+  if (p) {
+    *p = 5; // no-warning
+    p = 0;
+  }
+  else return; // expected-warning {{non-void function 'f4_b' should return a value}}
+
+  *p += 10; // expected-warning{{Dereference of null pointer}}
+  return 0;
+}
diff --git a/test/Analysis/null-deref-ps.c b/test/Analysis/null-deref-ps.c
index 9be73a8..1ae94c7 100644
--- a/test/Analysis/null-deref-ps.c
+++ b/test/Analysis/null-deref-ps.c
@@ -60,27 +60,7 @@
   return *q; // expected-warning{{Dereference of null pointer (loaded from variable 'q')}}
 }
 
-int f4_b() {
-  short array[2];
-  uintptr_t x = array; // expected-warning{{incompatible pointer to integer conversion}}
-  short *p = x; // expected-warning{{incompatible integer to pointer conversion}}
-  
-  // The following branch should be infeasible.
-  if (!(p = &array[0])) {
-    p = 0;
-    *p = 1; // no-warning
-  }
-  
-  if (p) {
-    *p = 5; // no-warning
-    p = 0;
-  }
-  else return; // expected-warning {{non-void function 'f4_b' should return a value}}
-
-  *p += 10; // expected-warning{{Dereference of null pointer}}
-  return 0;
-}
-
+// Placeholder for f4_b, temporarily moved to null-deref-ps-temp.c
 
 int f5() {
   
@@ -280,7 +260,7 @@
 // Test handling of translating between integer "pointers" and back.
 void f13() {
   int *x = 0;
-  if (((((int) x) << 2) + 1) >> 1) *x = 1; // expected-warning{{he left operand to '<<' is always 0}}
+  if (((((int) x) << 2) + 1) >> 1) *x = 1; // expected-warning{{The left operand to '<<' is always 0}} expected-warning{{The left operand to '+' is always 0}}
 }
 
 // PR 4759 - Attribute non-null checking by the analyzer was not correctly