[analyzer] Anti-aliasing: different heap allocations do not alias

Add a concept of symbolic memory region belonging to heap memory space.
When comparing symbolic regions allocated on the heap, assume that they
do not alias. 

Use symbolic heap region to suppress a common false positive pattern in
the malloc checker, in code that relies on malloc not returning the
memory aliased to other malloc allocations, stack.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@158136 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 3b9712f..c22c6a2 100644
--- a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -477,19 +477,27 @@
                                            const CallExpr *CE,
                                            SVal Size, SVal Init,
                                            ProgramStateRef state) {
-  // Get the return value.
-  SVal retVal = state->getSVal(CE, C.getLocationContext());
+
+  // Bind the return value to the symbolic value from the heap region.
+  // TODO: We could rewrite post visit to eval call; 'malloc' does not have
+  // side effects other than what we model here.
+  unsigned Count = C.getCurrentBlockCount();
+  SValBuilder &svalBuilder = C.getSValBuilder();
+  const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
+  DefinedSVal RetVal =
+    cast<DefinedSVal>(svalBuilder.getConjuredHeapSymbolVal(CE, LCtx, Count));
+  state = state->BindExpr(CE, C.getLocationContext(), RetVal);
 
   // We expect the malloc functions to return a pointer.
-  if (!isa<Loc>(retVal))
+  if (!isa<Loc>(RetVal))
     return 0;
 
   // Fill the region with the initialization value.
-  state = state->bindDefault(retVal, Init);
+  state = state->bindDefault(RetVal, Init);
 
   // Set the region's extent equal to the Size parameter.
   const SymbolicRegion *R =
-      dyn_cast_or_null<SymbolicRegion>(retVal.getAsRegion());
+      dyn_cast_or_null<SymbolicRegion>(RetVal.getAsRegion());
   if (!R)
     return 0;
   if (isa<DefinedOrUnknownSVal>(Size)) {
diff --git a/lib/StaticAnalyzer/Core/MemRegion.cpp b/lib/StaticAnalyzer/Core/MemRegion.cpp
index 96905c0..bcf7c3f 100644
--- a/lib/StaticAnalyzer/Core/MemRegion.cpp
+++ b/lib/StaticAnalyzer/Core/MemRegion.cpp
@@ -849,6 +849,10 @@
   return getSubRegion<SymbolicRegion>(sym, getUnknownRegion());
 }
 
+const SymbolicRegion *MemRegionManager::getSymbolicHeapRegion(SymbolRef Sym) {
+  return getSubRegion<SymbolicRegion>(Sym, getHeapRegion());
+}
+
 const FieldRegion*
 MemRegionManager::getFieldRegion(const FieldDecl *d,
                                  const MemRegion* superRegion){
diff --git a/lib/StaticAnalyzer/Core/SValBuilder.cpp b/lib/StaticAnalyzer/Core/SValBuilder.cpp
index 765ae48..c391489 100644
--- a/lib/StaticAnalyzer/Core/SValBuilder.cpp
+++ b/lib/StaticAnalyzer/Core/SValBuilder.cpp
@@ -148,6 +148,18 @@
   return nonloc::SymbolVal(sym);
 }
 
+DefinedOrUnknownSVal
+SValBuilder::getConjuredHeapSymbolVal(const Expr *E,
+                                      const LocationContext *LCtx,
+                                      unsigned VisitCount) {
+  QualType T = E->getType();
+  assert(Loc::isLocType(T));
+  assert(SymbolManager::canSymbolicate(T));
+
+  SymbolRef sym = SymMgr.getConjuredSymbol(E, LCtx, T, VisitCount);
+  return loc::MemRegionVal(MemMgr.getSymbolicHeapRegion(sym));
+}
+
 DefinedSVal SValBuilder::getMetadataSymbolVal(const void *symbolTag,
                                               const MemRegion *region,
                                               const Expr *expr, QualType type,
diff --git a/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
index 29c65f8..9cbbece 100644
--- a/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ b/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -673,11 +673,18 @@
       // regions, though.
       return UnknownVal();
 
-    // If both values wrap regions, see if they're from different base regions.
+    const MemSpaceRegion *LeftMS = LeftMR->getMemorySpace();
+    const MemSpaceRegion *RightMS = RightMR->getMemorySpace();
+    const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion();
     const MemRegion *LeftBase = LeftMR->getBaseRegion();
     const MemRegion *RightBase = RightMR->getBaseRegion();
-    if (LeftBase != RightBase &&
-        !isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) {
+
+    // If the two regions are from different known memory spaces they cannot be
+    // equal. Also, assume that no symbolic region (whose memory space is
+    // unknown) is on the stack.
+    if (LeftMS != RightMS &&
+        ((LeftMS != UnknownMS && RightMS != UnknownMS) ||
+         (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) {
       switch (op) {
       default:
         return UnknownVal();
@@ -688,24 +695,20 @@
       }
     }
 
-    // The two regions are from the same base region. See if they're both a
-    // type of region we know how to compare.
-    const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace();
-    const MemSpaceRegion *RightMS = RightBase->getMemorySpace();
-
-    // Heuristic: assume that no symbolic region (whose memory space is
-    // unknown) is on the stack.
-    // FIXME: we should be able to be more precise once we can do better
-    // aliasing constraints for symbolic regions, but this is a reasonable,
-    // albeit unsound, assumption that holds most of the time.
-    if (isa<StackSpaceRegion>(LeftMS) ^ isa<StackSpaceRegion>(RightMS)) {
+    // If both values wrap regions, see if they're from different base regions.
+    // Note, heap base symbolic regions are assumed to not alias with
+    // each other; for example, we assume that malloc returns different address
+    // on each invocation.
+    if (LeftBase != RightBase &&
+        ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) ||
+         isa<HeapSpaceRegion>(LeftMS)) ){
       switch (op) {
-        default:
-          break;
-        case BO_EQ:
-          return makeTruthVal(false, resultTy);
-        case BO_NE:
-          return makeTruthVal(true, resultTy);
+      default:
+        return UnknownVal();
+      case BO_EQ:
+        return makeTruthVal(false, resultTy);
+      case BO_NE:
+        return makeTruthVal(true, resultTy);
       }
     }