[analyzer][UninitializedObjectChecker] Fixed dereferencing

iThis patch aims to fix derefencing, which has been debated for months now.

Instead of working with SVals, the function now relies on TypedValueRegion.

Differential Revision: https://reviews.llvm.org/D51057

llvm-svn: 342213
diff --git a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp
index 72e256b..22e3cbd 100644
--- a/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp
@@ -95,11 +95,13 @@
 /// known, and thus FD can not be analyzed.
 static bool isVoidPointer(QualType T);
 
-/// Dereferences \p V and returns the value and dynamic type of the pointee, as
-/// well as whether \p FR needs to be casted back to that type. If for whatever
-/// reason dereferencing fails, returns with None.
-static llvm::Optional<std::tuple<SVal, QualType, bool>>
-dereference(ProgramStateRef State, const FieldRegion *FR);
+using DereferenceInfo = std::pair<const TypedValueRegion *, bool>;
+
+/// Dereferences \p FR and returns with the pointee's region, and whether it
+/// needs to be casted back to it's location type. If for whatever reason
+/// dereferencing fails, returns with None.
+static llvm::Optional<DereferenceInfo> dereference(ProgramStateRef State,
+                                                   const FieldRegion *FR);
 
 //===----------------------------------------------------------------------===//
 //                   Methods for FindUninitializedFields.
@@ -110,10 +112,8 @@
 bool FindUninitializedFields::isPointerOrReferenceUninit(
     const FieldRegion *FR, FieldChainInfo LocalChain) {
 
-  assert((FR->getDecl()->getType()->isAnyPointerType() ||
-          FR->getDecl()->getType()->isReferenceType() ||
-          FR->getDecl()->getType()->isBlockPointerType()) &&
-         "This method only checks pointer/reference objects!");
+  assert(isDereferencableType(FR->getDecl()->getType()) &&
+         "This method only checks dereferencable objects!");
 
   SVal V = State->getSVal(FR);
 
@@ -134,54 +134,47 @@
 
   // At this point the pointer itself is initialized and points to a valid
   // location, we'll now check the pointee.
-  llvm::Optional<std::tuple<SVal, QualType, bool>> DerefInfo =
-      dereference(State, FR);
+  llvm::Optional<DereferenceInfo> DerefInfo = dereference(State, FR);
   if (!DerefInfo) {
     IsAnyFieldInitialized = true;
     return false;
   }
 
-  V = std::get<0>(*DerefInfo);
-  QualType DynT = std::get<1>(*DerefInfo);
-  bool NeedsCastBack = std::get<2>(*DerefInfo);
+  const TypedValueRegion *R = DerefInfo->first;
+  const bool NeedsCastBack = DerefInfo->second;
 
-  // If FR is a pointer pointing to a non-primitive type.
-  if (Optional<nonloc::LazyCompoundVal> RecordV =
-          V.getAs<nonloc::LazyCompoundVal>()) {
+  QualType DynT = R->getLocationType();
+  QualType PointeeT = DynT->getPointeeType();
 
-    const TypedValueRegion *R = RecordV->getRegion();
+  if (PointeeT->isStructureOrClassType()) {
+    if (NeedsCastBack)
+      return isNonUnionUninit(R, LocalChain.add(NeedsCastLocField(FR, DynT)));
+    return isNonUnionUninit(R, LocalChain.add(LocField(FR)));
+  }
 
-    if (DynT->getPointeeType()->isStructureOrClassType()) {
+  if (PointeeT->isUnionType()) {
+    if (isUnionUninit(R)) {
       if (NeedsCastBack)
-        return isNonUnionUninit(R, LocalChain.add(NeedsCastLocField(FR, DynT)));
-      return isNonUnionUninit(R, LocalChain.add(LocField(FR)));
-    }
-
-    if (DynT->getPointeeType()->isUnionType()) {
-      if (isUnionUninit(R)) {
-        if (NeedsCastBack)
-          return addFieldToUninits(LocalChain.add(NeedsCastLocField(FR, DynT)));
-        return addFieldToUninits(LocalChain.add(LocField(FR)));
-      } else {
-        IsAnyFieldInitialized = true;
-        return false;
-      }
-    }
-
-    if (DynT->getPointeeType()->isArrayType()) {
+        return addFieldToUninits(LocalChain.add(NeedsCastLocField(FR, DynT)));
+      return addFieldToUninits(LocalChain.add(LocField(FR)));
+    } else {
       IsAnyFieldInitialized = true;
       return false;
     }
-
-    llvm_unreachable("All cases are handled!");
   }
 
-  assert((isPrimitiveType(DynT->getPointeeType()) || DynT->isAnyPointerType() ||
-          DynT->isReferenceType()) &&
+  if (PointeeT->isArrayType()) {
+    IsAnyFieldInitialized = true;
+    return false;
+  }
+
+  assert((isPrimitiveType(PointeeT) || isDereferencableType(PointeeT)) &&
          "At this point FR must either have a primitive dynamic type, or it "
          "must be a null, undefined, unknown or concrete pointer!");
 
-  if (isPrimitiveUninit(V)) {
+  SVal PointeeV = State->getSVal(R);
+
+  if (isPrimitiveUninit(PointeeV)) {
     if (NeedsCastBack)
       return addFieldToUninits(LocalChain.add(NeedsCastLocField(FR, DynT)));
     return addFieldToUninits(LocalChain.add(LocField(FR)));
@@ -204,47 +197,46 @@
   return false;
 }
 
-static llvm::Optional<std::tuple<SVal, QualType, bool>>
-dereference(ProgramStateRef State, const FieldRegion *FR) {
+static llvm::Optional<DereferenceInfo> dereference(ProgramStateRef State,
+                                                   const FieldRegion *FR) {
 
-  DynamicTypeInfo DynTInfo;
-  QualType DynT;
+  llvm::SmallSet<const TypedValueRegion *, 5> VisitedRegions;
 
   // If the static type of the field is a void pointer, we need to cast it back
   // to the dynamic type before dereferencing.
   bool NeedsCastBack = isVoidPointer(FR->getDecl()->getType());
 
   SVal V = State->getSVal(FR);
-  assert(V.getAs<loc::MemRegionVal>() && "V must be loc::MemRegionVal!");
+  assert(V.getAsRegion() && "V must have an underlying region!");
 
-  // If V is multiple pointer value, we'll dereference it again (e.g.: int** ->
-  // int*).
-  // TODO: Dereference according to the dynamic type to avoid infinite loop for
-  // these kind of fields:
-  //   int **ptr = reinterpret_cast<int **>(&ptr);
-  while (auto Tmp = V.getAs<loc::MemRegionVal>()) {
-    // We can't reason about symbolic regions, assume its initialized.
-    // Note that this also avoids a potential infinite recursion, because
-    // constructors for list-like classes are checked without being called, and
-    // the Static Analyzer will construct a symbolic region for Node *next; or
-    // similar code snippets.
-    if (Tmp->getRegion()->getSymbolicBase()) {
+  // The region we'd like to acquire.
+  const auto *R = V.getAsRegion()->getAs<TypedValueRegion>();
+  if (!R)
+    return None;
+
+  VisitedRegions.insert(R);
+
+  // We acquire the dynamic type of R,
+  QualType DynT = R->getLocationType();
+
+  while (const MemRegion *Tmp = State->getSVal(R, DynT).getAsRegion()) {
+
+    R = Tmp->getAs<TypedValueRegion>();
+
+    if (!R)
       return None;
-    }
 
-    DynTInfo = getDynamicTypeInfo(State, Tmp->getRegion());
-    if (!DynTInfo.isValid()) {
+    // We found a cyclic pointer, like int *ptr = (int *)&ptr.
+    // TODO: Report these fields too.
+    if (!VisitedRegions.insert(R).second)
       return None;
-    }
 
-    DynT = DynTInfo.getType();
-
-    if (isVoidPointer(DynT)) {
-      return None;
-    }
-
-    V = State->getSVal(*Tmp, DynT);
+    DynT = R->getLocationType();
+    // In order to ensure that this loop terminates, we're also checking the
+    // dynamic type of R, since type hierarchy is finite.
+    if (isDereferencableType(DynT->getPointeeType()))
+      break;
   }
 
-  return std::make_tuple(V, DynT, NeedsCastBack);
+  return std::make_pair(R, NeedsCastBack);
 }