[analyzer] pr18953: Split C++ zero-initialization from default initialization.

The bindDefault() API of the ProgramState allows setting a default value
for reads from memory regions that were not preceded by writes.

It was used for implementing C++ zeroing constructors (i.e. default constructors
that boil down to setting all fields of the object to 0).

Because differences between zeroing consturctors and other forms of default
initialization have been piling up (in particular, zeroing constructors can be
called multiple times over the same object, probably even at the same offset,
requiring a careful and potentially slow cleanup of previous bindings in the
RegionStore), we split the API in two: bindDefaultInitial() for modeling
initial values and bindDefaultZero() for modeling zeroing constructors.

This fixes a few assertion failures from which the investigation originated.

The imperfect protection from both inability of the RegionStore to support
binding extents and lack of information in ASTRecordLayout has been loosened
because it's, well, imperfect, and it is unclear if it fixing more than it
was breaking.

Differential Revision: https://reviews.llvm.org/D46368

llvm-svn: 331561
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
index ef40634..facc32e 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
@@ -242,7 +242,18 @@
 
   ProgramStateRef bindLoc(SVal location, SVal V, const LocationContext *LCtx) const;
 
-  ProgramStateRef bindDefault(SVal loc, SVal V, const LocationContext *LCtx) const;
+  /// Initializes the region of memory represented by \p loc with an initial
+  /// value. Once initialized, all values loaded from any sub-regions of that
+  /// region will be equal to \p V, unless overwritten later by the program.
+  /// This method should not be used on regions that are already initialized.
+  /// If you need to indicate that memory contents have suddenly become unknown
+  /// within a certain region of memory, consider invalidateRegions().
+  ProgramStateRef bindDefaultInitial(SVal loc, SVal V,
+                                     const LocationContext *LCtx) const;
+
+  /// Performs C++ zero-initialization procedure on the region of memory
+  /// represented by \p loc.
+  ProgramStateRef bindDefaultZero(SVal loc, const LocationContext *LCtx) const;
 
   ProgramStateRef killBinding(Loc LV) const;
 
diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h
index 18ae3c9..baf691f 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/Store.h
@@ -107,7 +107,15 @@
   ///   by \c val bound to the location given for \c loc.
   virtual StoreRef Bind(Store store, Loc loc, SVal val) = 0;
 
-  virtual StoreRef BindDefault(Store store, const MemRegion *R, SVal V);
+  /// Return a store with the specified value bound to all sub-regions of the
+  /// region. The region must not have previous bindings. If you need to
+  /// invalidate existing bindings, consider invalidateRegions().
+  virtual StoreRef BindDefaultInitial(Store store, const MemRegion *R,
+                                      SVal V) = 0;
+
+  /// Return a store with in which all values within the given region are
+  /// reset to zero. This method is allowed to overwrite previous bindings.
+  virtual StoreRef BindDefaultZero(Store store, const MemRegion *R) = 0;
 
   /// \brief Create a new store with the specified binding removed.
   /// \param ST the original store, that is the basis for the new store.
diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 8fb255c..31fd70e 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -1273,7 +1273,7 @@
   State = State->BindExpr(CE, C.getLocationContext(), RetVal);
 
   // Fill the region with the initialization value.
-  State = State->bindDefault(RetVal, Init, LCtx);
+  State = State->bindDefaultInitial(RetVal, Init, LCtx);
 
   // Set the region's extent equal to the Size parameter.
   const SymbolicRegion *R =
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index d0ebb22..6956c6d 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -348,7 +348,9 @@
       break;
     case SubobjectAdjustment::MemberPointerAdjustment:
       // FIXME: Unimplemented.
-      State = State->bindDefault(Reg, UnknownVal(), LC);
+      State = State->invalidateRegions(Reg, InitWithAdjustments,
+                                       currBldrCtx->blockCount(), LC, true,
+                                       nullptr, nullptr, nullptr);
       return State;
     }
   }
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index 9152b41..90a35f3 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -375,9 +375,6 @@
          I != E; ++I) {
       ProgramStateRef State = (*I)->getState();
       if (CE->requiresZeroInitialization()) {
-        // Type of the zero doesn't matter.
-        SVal ZeroVal = svalBuilder.makeZeroVal(getContext().CharTy);
-
         // FIXME: Once we properly handle constructors in new-expressions, we'll
         // need to invalidate the region before setting a default value, to make
         // sure there aren't any lingering bindings around. This probably needs
@@ -390,7 +387,7 @@
         // actually make things worse. Placement new makes this tricky as well,
         // since it's then possible to be initializing one part of a multi-
         // dimensional array.
-        State = State->bindDefault(loc::MemRegionVal(Target), ZeroVal, LCtx);
+        State = State->bindDefaultZero(loc::MemRegionVal(Target), LCtx);
       }
 
       State = addAllNecessaryTemporaryInfo(State, CC, LCtx, Target);
diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
index deb2e4a..141863d 100644
--- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -126,16 +126,27 @@
   return newState;
 }
 
-ProgramStateRef ProgramState::bindDefault(SVal loc,
-                                          SVal V,
-                                          const LocationContext *LCtx) const {
+ProgramStateRef
+ProgramState::bindDefaultInitial(SVal loc, SVal V,
+                                 const LocationContext *LCtx) const {
   ProgramStateManager &Mgr = getStateManager();
   const MemRegion *R = loc.castAs<loc::MemRegionVal>().getRegion();
-  const StoreRef &newStore = Mgr.StoreMgr->BindDefault(getStore(), R, V);
+  const StoreRef &newStore = Mgr.StoreMgr->BindDefaultInitial(getStore(), R, V);
   ProgramStateRef new_state = makeWithStore(newStore);
-  return Mgr.getOwningEngine() ?
-           Mgr.getOwningEngine()->processRegionChange(new_state, R, LCtx) :
-           new_state;
+  return Mgr.getOwningEngine()
+             ? Mgr.getOwningEngine()->processRegionChange(new_state, R, LCtx)
+             : new_state;
+}
+
+ProgramStateRef
+ProgramState::bindDefaultZero(SVal loc, const LocationContext *LCtx) const {
+  ProgramStateManager &Mgr = getStateManager();
+  const MemRegion *R = loc.castAs<loc::MemRegionVal>().getRegion();
+  const StoreRef &newStore = Mgr.StoreMgr->BindDefaultZero(getStore(), R);
+  ProgramStateRef new_state = makeWithStore(newStore);
+  return Mgr.getOwningEngine()
+             ? Mgr.getOwningEngine()->processRegionChange(new_state, R, LCtx)
+             : new_state;
 }
 
 typedef ArrayRef<const MemRegion *> RegionList;
diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
index 7f2c1d5..d4624c0 100644
--- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -409,8 +409,22 @@
 
   RegionBindingsRef bind(RegionBindingsConstRef B, Loc LV, SVal V);
 
-  // BindDefault is only used to initialize a region with a default value.
-  StoreRef BindDefault(Store store, const MemRegion *R, SVal V) override {
+  // BindDefaultInitial is only used to initialize a region with
+  // a default value.
+  StoreRef BindDefaultInitial(Store store, const MemRegion *R,
+                              SVal V) override {
+    RegionBindingsRef B = getRegionBindings(store);
+    // Use other APIs when you have to wipe the region that was initialized
+    // earlier.
+    assert(!(B.getDefaultBinding(R) || B.getDirectBinding(R)) &&
+           "Double initialization!");
+    B = B.addBinding(BindingKey::Make(R, BindingKey::Default), V);
+    return StoreRef(B.asImmutableMap().getRootWithoutRetain(), *this);
+  }
+
+  // BindDefaultZero is used for zeroing constructors that may accidentally
+  // overwrite existing bindings.
+  StoreRef BindDefaultZero(Store store, const MemRegion *R) override {
     // FIXME: The offsets of empty bases can be tricky because of
     // of the so called "empty base class optimization".
     // If a base class has been optimized out
@@ -420,24 +434,14 @@
     // and trying to infer them from offsets/alignments
     // seems to be error-prone and non-trivial because of the trailing padding.
     // As a temporary mitigation we don't create bindings for empty bases.
-    if (R->getKind() == MemRegion::CXXBaseObjectRegionKind &&
-        cast<CXXBaseObjectRegion>(R)->getDecl()->isEmpty())
-      return StoreRef(store, *this);
+    if (const auto *BR = dyn_cast<CXXBaseObjectRegion>(R))
+      if (BR->getDecl()->isEmpty())
+        return StoreRef(store, *this);
 
     RegionBindingsRef B = getRegionBindings(store);
-    assert(!B.lookup(R, BindingKey::Direct));
-
-    BindingKey Key = BindingKey::Make(R, BindingKey::Default);
-    if (B.lookup(Key)) {
-      const SubRegion *SR = cast<SubRegion>(R);
-      assert(SR->getAsOffset().getOffset() ==
-             SR->getSuperRegion()->getAsOffset().getOffset() &&
-             "A default value must come from a super-region");
-      B = removeSubRegionBindings(B, SR);
-    } else {
-      B = B.addBinding(Key, V);
-    }
-
+    SVal V = svalBuilder.makeZeroVal(Ctx.CharTy);
+    B = removeSubRegionBindings(B, cast<SubRegion>(R));
+    B = B.addBinding(BindingKey::Make(R, BindingKey::Default), V);
     return StoreRef(B.asImmutableMap().getRootWithoutRetain(), *this);
   }
 
diff --git a/clang/lib/StaticAnalyzer/Core/Store.cpp b/clang/lib/StaticAnalyzer/Core/Store.cpp
index e78ac0f..eeafaf6 100644
--- a/clang/lib/StaticAnalyzer/Core/Store.cpp
+++ b/clang/lib/StaticAnalyzer/Core/Store.cpp
@@ -65,10 +65,6 @@
   return MRMgr.getElementRegion(EleTy, idx, Base, svalBuilder.getContext());
 }
 
-StoreRef StoreManager::BindDefault(Store store, const MemRegion *R, SVal V) {
-  return StoreRef(store, *this);
-}
-
 const ElementRegion *StoreManager::GetElementZeroRegion(const SubRegion *R,
                                                         QualType T) {
   NonLoc idx = svalBuilder.makeZeroArrayIndex();
diff --git a/clang/test/Analysis/ctor.mm b/clang/test/Analysis/ctor.mm
index cfae8ed..751cdb9 100644
--- a/clang/test/Analysis/ctor.mm
+++ b/clang/test/Analysis/ctor.mm
@@ -1,5 +1,7 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -fobjc-arc -analyzer-config c++-inlining=constructors -Wno-null-dereference -std=c++11 -verify %s
-// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -fobjc-arc -analyzer-config c++-inlining=constructors -Wno-null-dereference -std=c++11 -verify -DTEST_INLINABLE_ALLOCATORS %s
+// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 -DI386 -analyzer-checker=core,debug.ExprInspection -fobjc-arc -analyzer-config c++-inlining=constructors -Wno-null-dereference -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -triple i386-apple-darwin10 -DI386 -analyzer-checker=core,debug.ExprInspection -fobjc-arc -analyzer-config c++-inlining=constructors -Wno-null-dereference -std=c++11 -verify -DTEST_INLINABLE_ALLOCATORS %s
+// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin12 -analyzer-checker=core,debug.ExprInspection -fobjc-arc -analyzer-config c++-inlining=constructors -Wno-null-dereference -std=c++11 -verify %s
+// RUN: %clang_analyze_cc1 -triple x86_64-apple-darwin12 -analyzer-checker=core,debug.ExprInspection -fobjc-arc -analyzer-config c++-inlining=constructors -Wno-null-dereference -std=c++11 -verify -DTEST_INLINABLE_ALLOCATORS %s
 
 #include "Inputs/system-header-simulator-cxx.h"
 
@@ -638,12 +640,15 @@
 
   class Empty {
   public:
-    Empty();
+    static int glob;
+    Empty(); // No body.
+    Empty(int x); // Body below.
   };
 
   class PairContainer : public Empty {
-    raw_pair p;
   public:
+    raw_pair p;
+    int q;
     PairContainer() : Empty(), p() {
       // This previously caused a crash because the empty base class looked
       // like an initialization of 'p'.
@@ -651,8 +656,52 @@
     PairContainer(int) : Empty(), p() {
       // Test inlining something else here.
     }
+    PairContainer(double): Empty(1), p() {
+      clang_analyzer_eval(p.p1 == 0); // expected-warning{{TRUE}}
+      clang_analyzer_eval(p.p2 == 0); // expected-warning{{TRUE}}
+
+      clang_analyzer_eval(q == 1); // expected-warning{{TRUE}}
+
+      // This one's indeed UNKNOWN. Definitely not TRUE.
+      clang_analyzer_eval(p.p2 == glob); // expected-warning{{UNKNOWN}}
+    }
   };
 
+  Empty::Empty(int x) {
+    static_cast<PairContainer *>(this)->p.p1 = x;
+    static_cast<PairContainer *>(this)->q = x;
+    // Our static member will store the old garbage values of fields that aren't
+    // yet initialized. It's not certainly garbage though (i.e. the constructor
+    // could have been called on an initialized piece of memory), so no
+    // uninitialized value warning here, and it should be a symbol, not
+    // undefined value, for later comparison.
+    glob = static_cast<PairContainer *>(this)->p.p2;
+  }
+
+	class Empty2 {
+	public:
+		static int glob_p1, glob_p2;
+		Empty2(); // Body below.
+	};
+
+	class PairDoubleEmptyContainer: public Empty, public Empty2 {
+	public:
+    raw_pair p;
+		PairDoubleEmptyContainer(): Empty(), Empty2(), p() {
+      clang_analyzer_eval(p.p1 == 0); // expected-warning{{TRUE}}
+      clang_analyzer_eval(p.p2 == 0); // expected-warning{{TRUE}}
+
+      // This is indeed UNKNOWN.
+      clang_analyzer_eval(p.p1 == glob_p1); // expected-warning{{UNKNOWN}}
+      clang_analyzer_eval(p.p2 == glob_p2); // expected-warning{{UNKNOWN}}
+		}
+	};
+
+	Empty2::Empty2() {
+    glob_p1 = static_cast<PairDoubleEmptyContainer *>(this)->p.p1;
+    glob_p2 = static_cast<PairDoubleEmptyContainer *>(this)->p.p2;
+	}
+
   class PairContainerContainer {
     int padding;
     PairContainer pc;
@@ -749,3 +798,67 @@
   clang_analyzer_eval(d2.x == 1); // expected-warning{{TRUE}}
 }
 }
+
+namespace vbase_zero_init {
+class A {
+  virtual void foo();
+};
+
+class B {
+  virtual void bar();
+public:
+  static int glob_y, glob_z, glob_w;
+  int x;
+  B(); // Body below.
+};
+
+class C : virtual public A {
+public:
+  int y;
+};
+
+class D : public B, public C {
+public:
+  // 'z', unlike 'w', resides in an area that would have been within padding of
+  // base class 'C' if it wasn't part of 'D', but only on 64-bit systems.
+  int z, w;
+  // Initialization order: A(), B(), C().
+  D() : A(), C() {
+    clang_analyzer_eval(x == 1); // expected-warning{{TRUE}}
+    clang_analyzer_eval(y == 0); // expected-warning{{TRUE}}
+#ifdef I386
+    clang_analyzer_eval(z == 3); // expected-warning{{TRUE}}
+#else
+    // FIXME: Should be TRUE. Initialized in B().
+    clang_analyzer_eval(z == 3); // expected-warning{{UNKNOWN}}
+#endif
+    clang_analyzer_eval(w == 4); // expected-warning{{TRUE}}
+
+    // FIXME: Should be UNKNOWN. Changed in B() since glob_y was assigned.
+    clang_analyzer_eval(y == glob_y); // expected-warning{{TRUE}}
+
+#ifdef I386
+    clang_analyzer_eval(z == glob_z); // expected-warning{{UNKNOWN}}
+#else
+    // FIXME: Should be UNKNOWN. Changed in B() since glob_z was assigned.
+    clang_analyzer_eval(z == glob_z); // expected-warning{{TRUE}}
+#endif
+
+    clang_analyzer_eval(w == glob_w); // expected-warning{{UNKNOWN}}
+  } // no-crash
+};
+
+B::B() : x(1) {
+  // Our static members will store the old garbage values of fields that aren't
+  // yet initialized. These aren't certainly garbage though (i.e. the
+  // constructor could have been called on an initialized piece of memory),
+  // so no uninitialized value warning here, and these should be symbols, not
+  // undefined values, for later comparison.
+  glob_y = static_cast<D *>(this)->y;
+  glob_z = static_cast<D *>(this)->z;
+  glob_w = static_cast<D *>(this)->w;
+  static_cast<D *>(this)->y = 2;
+  static_cast<D *>(this)->z = 3;
+  static_cast<D *>(this)->w = 4;
+}
+}