Add a new symbol type, SymbolExtent, to represent the extents of memory regions that may not be known at compile-time (such as those created by malloc). This replaces the old setExtent/getExtent API on Store, which used the GRState's GDM to store SVals.

Also adds a getKnownValue() method to SValuator, which gets the integer value of an SVal if it is known to only have one possible value. There are more places in the code that could be using this, but in general we want to be dealing entirely in SVals, so its usefulness is limited.

The only visible functionality change is that extents are now honored for any DeclRegion, such as fields and Objective-C ivars, rather than just variables. This shows up in bounds-checking and cast-size-checking.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@107577 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Checker/BuiltinFunctionChecker.cpp b/lib/Checker/BuiltinFunctionChecker.cpp
index 9c8b516..057e474 100644
--- a/lib/Checker/BuiltinFunctionChecker.cpp
+++ b/lib/Checker/BuiltinFunctionChecker.cpp
@@ -57,15 +57,24 @@
   case Builtin::BI__builtin_alloca: {
     // FIXME: Refactor into StoreManager itself?
     MemRegionManager& RM = C.getStoreManager().getRegionManager();
-    const MemRegion* R =
+    const AllocaRegion* R =
       RM.getAllocaRegion(CE, C.getNodeBuilder().getCurrentBlockCount(),
                          C.getPredecessor()->getLocationContext());
 
     // Set the extent of the region in bytes. This enables us to use the
     // SVal of the argument directly. If we save the extent in bits, we
     // cannot represent values like symbol*8.
-    SVal Extent = state->getSVal(*(CE->arg_begin()));
-    state = C.getStoreManager().setExtent(state, R, Extent);
+    DefinedOrUnknownSVal Size =
+      cast<DefinedOrUnknownSVal>(state->getSVal(*(CE->arg_begin())));
+
+    ValueManager& ValMgr = C.getValueManager();
+    DefinedOrUnknownSVal Extent = R->getExtent(ValMgr);
+
+    SValuator& SVator = ValMgr.getSValuator();
+    DefinedOrUnknownSVal ExtentMatchesSizeArg =
+      SVator.EvalEQ(state, Extent, Size);
+    state = state->Assume(ExtentMatchesSizeArg, true);
+
     C.GenerateNode(state->BindExpr(CE, loc::MemRegionVal(R)));
     return true;
   }
diff --git a/lib/Checker/CastSizeChecker.cpp b/lib/Checker/CastSizeChecker.cpp
index 59ea9e0..a502c10 100644
--- a/lib/Checker/CastSizeChecker.cpp
+++ b/lib/Checker/CastSizeChecker.cpp
@@ -44,7 +44,8 @@
 
   QualType ToPointeeTy = ToPTy->getPointeeType();
 
-  const MemRegion *R = C.getState()->getSVal(E).getAsRegion();
+  const GRState *state = C.getState();
+  const MemRegion *R = state->getSVal(E).getAsRegion();
   if (R == 0)
     return;
 
@@ -52,19 +53,18 @@
   if (SR == 0)
     return;
 
-  llvm::Optional<SVal> V = 
-                    C.getEngine().getStoreManager().getExtent(C.getState(), SR);
-  if (!V)
+  ValueManager &ValMgr = C.getValueManager();
+  SVal Extent = SR->getExtent(ValMgr);
+
+  SValuator &SVator = ValMgr.getSValuator();
+  const llvm::APSInt *ExtentInt = SVator.getKnownValue(state, Extent);
+  if (!ExtentInt)
     return;
 
-  const nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(V);
-  if (!CI)
-    return;
-
-  CharUnits RegionSize = CharUnits::fromQuantity(CI->getValue().getSExtValue());
+  CharUnits RegionSize = CharUnits::fromQuantity(ExtentInt->getSExtValue());
   CharUnits TypeSize = C.getASTContext().getTypeSizeInChars(ToPointeeTy);
   
-  // void, and a few other un-sizeable types
+  // Ignore void, and a few other un-sizeable types.
   if (TypeSize.isZero())
     return;
   
diff --git a/lib/Checker/MallocChecker.cpp b/lib/Checker/MallocChecker.cpp
index a5bba1d..dcc21ca 100644
--- a/lib/Checker/MallocChecker.cpp
+++ b/lib/Checker/MallocChecker.cpp
@@ -172,15 +172,23 @@
   unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
   ValueManager &ValMgr = C.getValueManager();
 
+  // Set the return value.
   SVal RetVal = ValMgr.getConjuredSymbolVal(NULL, CE, CE->getType(), Count);
+  state = state->BindExpr(CE, RetVal);
 
-  state = C.getEngine().getStoreManager().setExtent(state, RetVal.getAsRegion(),
-                                                    Size);
-
+  // Fill the region with the initialization value.
   state = state->bindDefault(RetVal, Init);
 
-  state = state->BindExpr(CE, RetVal);
-  
+  // Set the region's extent equal to the Size parameter.
+  const SymbolicRegion *R = cast<SymbolicRegion>(RetVal.getAsRegion());
+  DefinedOrUnknownSVal Extent = R->getExtent(ValMgr);
+  DefinedOrUnknownSVal DefinedSize = cast<DefinedOrUnknownSVal>(Size);
+
+  SValuator &SVator = ValMgr.getSValuator();
+  DefinedOrUnknownSVal ExtentMatchesSize =
+    SVator.EvalEQ(state, Extent, DefinedSize);
+  state = state->Assume(ExtentMatchesSize, true);
+
   SymbolRef Sym = RetVal.getAsLocSymbol();
   assert(Sym);
   // Set the symbol's state to Allocated.
diff --git a/lib/Checker/MemRegion.cpp b/lib/Checker/MemRegion.cpp
index 66d2a41..6a60a61 100644
--- a/lib/Checker/MemRegion.cpp
+++ b/lib/Checker/MemRegion.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Checker/PathSensitive/MemRegion.h"
+#include "clang/Checker/PathSensitive/ValueManager.h"
 #include "clang/Analysis/AnalysisContext.h"
 #include "clang/Analysis/Support/BumpVector.h"
 #include "clang/AST/CharUnits.h"
@@ -171,6 +172,52 @@
 }
 
 //===----------------------------------------------------------------------===//
+// Region extents.
+//===----------------------------------------------------------------------===//
+
+DefinedOrUnknownSVal DeclRegion::getExtent(ValueManager& ValMgr) const {
+  ASTContext& Ctx = ValMgr.getContext();
+  QualType T = getDesugaredValueType(Ctx);
+
+  // FIXME: Handle variable-length arrays.
+  if (isa<VariableArrayType>(T) || isa<IncompleteArrayType>(T))
+    return UnknownVal();
+
+  CharUnits Size = Ctx.getTypeSizeInChars(T);
+  QualType SizeTy = Ctx.getSizeType();
+  return ValMgr.makeIntVal(Size.getQuantity(), SizeTy);
+}
+
+DefinedOrUnknownSVal FieldRegion::getExtent(ValueManager& ValMgr) const {
+  DefinedOrUnknownSVal Extent = DeclRegion::getExtent(ValMgr);
+
+  // A zero-length array at the end of a struct often stands for dynamically-
+  // allocated extra memory.
+  if (Extent.isZeroConstant()) {
+    ASTContext& Ctx = ValMgr.getContext();
+    QualType T = getDesugaredValueType(Ctx);
+
+    if (isa<ConstantArrayType>(T))
+      return UnknownVal();
+  }
+
+  return Extent;
+}
+
+DefinedOrUnknownSVal AllocaRegion::getExtent(ValueManager& ValMgr) const {
+  return nonloc::SymbolVal(ValMgr.getSymbolManager().getExtentSymbol(this));
+}
+
+DefinedOrUnknownSVal SymbolicRegion::getExtent(ValueManager& ValMgr) const {
+  return nonloc::SymbolVal(ValMgr.getSymbolManager().getExtentSymbol(this));
+}
+
+DefinedOrUnknownSVal StringRegion::getExtent(ValueManager& ValMgr) const {
+  QualType SizeTy = ValMgr.getContext().getSizeType();
+  return ValMgr.makeIntVal(getStringLiteral()->getByteLength()+1, SizeTy);
+}
+
+//===----------------------------------------------------------------------===//
 // FoldingSet profiling.
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Checker/RegionStore.cpp b/lib/Checker/RegionStore.cpp
index 8a64ec8..7164b83 100644
--- a/lib/Checker/RegionStore.cpp
+++ b/lib/Checker/RegionStore.cpp
@@ -118,22 +118,6 @@
 }
 
 //===----------------------------------------------------------------------===//
-// Region "Extents"
-//===----------------------------------------------------------------------===//
-//
-//  MemRegions represent chunks of memory with a size (their "extent").  This
-//  GDM entry tracks the extents for regions.  Extents are in bytes.
-//
-namespace { class RegionExtents {}; }
-static int RegionExtentsIndex = 0;
-namespace clang {
-  template<> struct GRStateTrait<RegionExtents>
-    : public GRStatePartialTrait<llvm::ImmutableMap<const MemRegion*, SVal> > {
-    static void* GDMIndex() { return &RegionExtentsIndex; }
-  };
-}
-
-//===----------------------------------------------------------------------===//
 // Utility functions.
 //===----------------------------------------------------------------------===//
 
@@ -380,18 +364,7 @@
   // Region "extents".
   //===------------------------------------------------------------------===//
 
-  const GRState *setExtent(const GRState *state,const MemRegion* R,SVal Extent){
-    return state->set<RegionExtents>(R, Extent);
-  }
-
-  Optional<SVal> getExtent(const GRState *state, const MemRegion *R) {
-    const SVal *V = state->get<RegionExtents>(R);
-    if (V)
-      return *V;
-    else
-      return Optional<SVal>();
-  }
-
+  // FIXME: This method will soon be eliminated; see the note in Store.h.
   DefinedOrUnknownSVal getSizeInElements(const GRState *state,
                                          const MemRegion* R, QualType EleTy);
 
@@ -772,88 +745,19 @@
 DefinedOrUnknownSVal RegionStoreManager::getSizeInElements(const GRState *state,
                                                            const MemRegion *R,
                                                            QualType EleTy) {
+  SVal Size = cast<SubRegion>(R)->getExtent(ValMgr);
+  SValuator &SVator = ValMgr.getSValuator();
+  const llvm::APSInt *SizeInt = SVator.getKnownValue(state, Size);
+  if (!SizeInt)
+    return UnknownVal();
 
-  switch (R->getKind()) {
-    case MemRegion::CXXThisRegionKind:
-      assert(0 && "Cannot get size of 'this' region");
-    case MemRegion::GenericMemSpaceRegionKind:
-    case MemRegion::StackLocalsSpaceRegionKind:
-    case MemRegion::StackArgumentsSpaceRegionKind:
-    case MemRegion::HeapSpaceRegionKind:
-    case MemRegion::NonStaticGlobalSpaceRegionKind:
-    case MemRegion::StaticGlobalSpaceRegionKind:
-    case MemRegion::UnknownSpaceRegionKind:
-      assert(0 && "Cannot index into a MemSpace");
-      return UnknownVal();
+  CharUnits RegionSize = CharUnits::fromQuantity(SizeInt->getSExtValue());
+  CharUnits EleSize = getContext().getTypeSizeInChars(EleTy);
 
-    case MemRegion::FunctionTextRegionKind:
-    case MemRegion::BlockTextRegionKind:
-    case MemRegion::BlockDataRegionKind:
-      // Technically this can happen if people do funny things with casts.
-      return UnknownVal();
-
-      // Not yet handled.
-    case MemRegion::AllocaRegionKind:
-    case MemRegion::CompoundLiteralRegionKind:
-    case MemRegion::ElementRegionKind:
-    case MemRegion::FieldRegionKind:
-    case MemRegion::ObjCIvarRegionKind:
-    case MemRegion::CXXObjectRegionKind:
-      return UnknownVal();
-
-    case MemRegion::SymbolicRegionKind: {
-      const SVal *Size = state->get<RegionExtents>(R);
-      if (!Size)
-        return UnknownVal();
-      const nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(Size);
-      if (!CI)
-        return UnknownVal();
-
-      CharUnits RegionSize =
-        CharUnits::fromQuantity(CI->getValue().getSExtValue());
-      CharUnits EleSize = getContext().getTypeSizeInChars(EleTy);
-      assert(RegionSize % EleSize == 0);
-
-      return ValMgr.makeIntVal(RegionSize / EleSize, false);
-    }
-
-    case MemRegion::StringRegionKind: {
-      const StringLiteral* Str = cast<StringRegion>(R)->getStringLiteral();
-      // We intentionally made the size value signed because it participates in
-      // operations with signed indices.
-      return ValMgr.makeIntVal(Str->getByteLength()+1, false);
-    }
-
-    case MemRegion::VarRegionKind: {
-      const VarRegion* VR = cast<VarRegion>(R);
-      ASTContext& Ctx = getContext();
-      // Get the type of the variable.
-      QualType T = VR->getDesugaredValueType(Ctx);
-
-      // FIXME: Handle variable-length arrays.
-      if (isa<VariableArrayType>(T))
-        return UnknownVal();
-
-      CharUnits EleSize = Ctx.getTypeSizeInChars(EleTy);
-
-      if (const ConstantArrayType* CAT = dyn_cast<ConstantArrayType>(T)) {
-        // return the size as signed integer.
-        CharUnits RealEleSize = Ctx.getTypeSizeInChars(CAT->getElementType());
-        CharUnits::QuantityType EleRatio = RealEleSize / EleSize;
-        int64_t Length = CAT->getSize().getSExtValue();
-        return ValMgr.makeIntVal(Length * EleRatio, false);
-      }
-
-      // Clients can reinterpret ordinary variables as arrays, possibly of
-      // another type. The width is rounded down to ensure that an access is
-      // entirely within bounds.
-      CharUnits VarSize = Ctx.getTypeSizeInChars(T);
-      return ValMgr.makeIntVal(VarSize / EleSize, false);
-    }
-  }
-
-  assert(0 && "Unreachable");
-  return UnknownVal();
+  // If a variable is reinterpreted as a type that doesn't fit into a larger
+  // type evenly, round it down.
+  // This is a signed value, since it's used in arithmetic with signed indices.
+  return ValMgr.makeIntVal(RegionSize / EleSize, false);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1954,13 +1858,6 @@
   }
   state.setStore(B.getRoot());
   const GRState *s = StateMgr.getPersistentState(state);
-  // Remove the extents of dead symbolic regions.
-  llvm::ImmutableMap<const MemRegion*,SVal> Extents = s->get<RegionExtents>();
-  for (llvm::ImmutableMap<const MemRegion *, SVal>::iterator I=Extents.begin(),
-         E = Extents.end(); I != E; ++I) {
-    if (!W.isVisited(I->first))
-      s = s->remove<RegionExtents>(I->first);
-  }
   return s;
 }
 
diff --git a/lib/Checker/SimpleSValuator.cpp b/lib/Checker/SimpleSValuator.cpp
index 5b24992..3bc4ee7 100644
--- a/lib/Checker/SimpleSValuator.cpp
+++ b/lib/Checker/SimpleSValuator.cpp
@@ -34,6 +34,10 @@
                            Loc lhs, Loc rhs, QualType resultTy);
   virtual SVal EvalBinOpLN(const GRState *state, BinaryOperator::Opcode op,
                            Loc lhs, NonLoc rhs, QualType resultTy);
+
+  /// getKnownValue - Evaluates a given SVal. If the SVal has only one possible
+  ///  (integer) value, that value is returned. Otherwise, returns NULL.
+  virtual const llvm::APSInt *getKnownValue(const GRState *state, SVal V);
   
   SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
                      const llvm::APSInt &RHS, QualType resultTy);
@@ -819,3 +823,21 @@
   return state->getStateManager().getStoreManager().EvalBinOp(op, lhs,
                                                               rhs, resultTy);
 }
+
+const llvm::APSInt *SimpleSValuator::getKnownValue(const GRState *state,
+                                                   SVal V) {
+  if (V.isUnknownOrUndef())
+    return NULL;
+
+  if (loc::ConcreteInt* X = dyn_cast<loc::ConcreteInt>(&V))
+    return &X->getValue();
+
+  if (nonloc::ConcreteInt* X = dyn_cast<nonloc::ConcreteInt>(&V))
+    return &X->getValue();
+
+  if (SymbolRef Sym = V.getAsSymbol())
+    return state->getSymVal(Sym);
+
+  // FIXME: Add support for SymExprs.
+  return NULL;
+}
diff --git a/lib/Checker/SymbolManager.cpp b/lib/Checker/SymbolManager.cpp
index 0bf51d7..c2b557e 100644
--- a/lib/Checker/SymbolManager.cpp
+++ b/lib/Checker/SymbolManager.cpp
@@ -74,6 +74,10 @@
      << getParentSymbol() << ',' << getRegion() << '}';
 }
 
+void SymbolExtent::dumpToStream(llvm::raw_ostream& os) const {
+  os << "extent_$" << getSymbolID() << '{' << getRegion() << '}';
+}
+
 void SymbolRegionValue::dumpToStream(llvm::raw_ostream& os) const {
   os << "reg_$" << getSymbolID() << "<" << R << ">";
 }
@@ -130,6 +134,22 @@
   return cast<SymbolDerived>(SD);
 }
 
+const SymbolExtent*
+SymbolManager::getExtentSymbol(const SubRegion *R) {
+  llvm::FoldingSetNodeID profile;
+  SymbolExtent::Profile(profile, R);
+  void* InsertPos;
+  SymExpr *SD = DataSet.FindNodeOrInsertPos(profile, InsertPos);
+  if (!SD) {
+    SD = (SymExpr*) BPAlloc.Allocate<SymbolExtent>();
+    new (SD) SymbolExtent(SymbolCounter, R);
+    DataSet.InsertNode(SD, InsertPos);
+    ++SymbolCounter;
+  }
+
+  return cast<SymbolExtent>(SD);
+}
+
 const SymIntExpr *SymbolManager::getSymIntExpr(const SymExpr *lhs,
                                                BinaryOperator::Opcode op,
                                                const llvm::APSInt& v,
@@ -170,11 +190,14 @@
   return T;
 }
 
-
 QualType SymbolDerived::getType(ASTContext& Ctx) const {
   return R->getValueType(Ctx);
 }
 
+QualType SymbolExtent::getType(ASTContext& Ctx) const {
+  return Ctx.getSizeType();
+}
+
 QualType SymbolRegionValue::getType(ASTContext& C) const {
   return R->getValueType(C);
 }
@@ -210,6 +233,15 @@
     return false;
   }
 
+  if (const SymbolExtent *extent = dyn_cast<SymbolExtent>(sym)) {
+    const MemRegion *Base = extent->getRegion()->getBaseRegion();
+    if (const VarRegion *VR = dyn_cast<VarRegion>(Base))
+      return isLive(VR);
+    if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Base))
+      return isLive(SR->getSymbol());
+    return false;
+  }
+
   // Interogate the symbol.  It may derive from an input value to
   // the analyzed function/method.
   return isa<SymbolRegionValue>(sym);