[analyzer] Be less pessimistic about invalidation of global variables
as a result of a call.

Problem:
Global variables, which come in from system libraries should not be
invalidated by all calls. Also, non-system globals should not be
invalidated by system calls.

Solution:
The following solution to invalidation of globals seems flexible enough
for taint (does not invalidate stdin) and should not lead to too
many false positives. We split globals into 3 classes:

* immutable - values are preserved by calls (unless the specific
global is passed in as a parameter):
     A :  Most system globals and const scalars

* invalidated by functions defined in system headers:
     B: errno

* invalidated by all other functions (note, these functions may in
turn contain system calls):
     B: errno
     C: all other globals (which are not in A nor B)

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@147569 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index 3c7c907..dcc7ab9 100644
--- a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -407,8 +407,7 @@
   default: {
     const MemSpaceRegion *MS = MR->getMemorySpace();
     
-    switch (MS->getKind()) {
-    case MemRegion::StackLocalsSpaceRegionKind: {
+    if (isa<StackLocalsSpaceRegion>(MS)) {
       const VarRegion *VR = dyn_cast<VarRegion>(MR);
       const VarDecl *VD;
       if (VR)
@@ -422,7 +421,8 @@
         os << "the address of a local stack variable";
       return true;
     }
-    case MemRegion::StackArgumentsSpaceRegionKind: {
+
+    if (isa<StackArgumentsSpaceRegion>(MS)) {
       const VarRegion *VR = dyn_cast<VarRegion>(MR);
       const VarDecl *VD;
       if (VR)
@@ -436,8 +436,8 @@
         os << "the address of a parameter";
       return true;
     }
-    case MemRegion::NonStaticGlobalSpaceRegionKind:
-    case MemRegion::StaticGlobalSpaceRegionKind: {
+
+    if (isa<GlobalsSpaceRegion>(MS)) {
       const VarRegion *VR = dyn_cast<VarRegion>(MR);
       const VarDecl *VD;
       if (VR)
@@ -454,9 +454,8 @@
         os << "the address of a global variable";
       return true;
     }
-    default:
-      return false;
-    }
+
+    return false;
   }
   }
 }
diff --git a/lib/StaticAnalyzer/Core/ExprEngine.cpp b/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 9b0e66f..f7022c6 100644
--- a/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -24,7 +24,6 @@
 #include "clang/AST/DeclCXX.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/SourceManager.h"
-#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/PrettyStackTrace.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/ImmutableList.h"
@@ -158,27 +157,6 @@
   return state;
 }
 
-bool
-ExprEngine::doesInvalidateGlobals(const CallOrObjCMessage &callOrMessage) const
-{
-  if (callOrMessage.isFunctionCall() && !callOrMessage.isCXXCall()) {
-    SVal calleeV = callOrMessage.getFunctionCallee();
-    if (const FunctionTextRegion *codeR =
-          dyn_cast_or_null<FunctionTextRegion>(calleeV.getAsRegion())) {
-      
-      const FunctionDecl *fd = codeR->getDecl();
-      if (const IdentifierInfo *ii = fd->getIdentifier()) {
-        StringRef fname = ii->getName();
-        if (fname == "strlen")
-          return false;
-      }
-    }
-  }
-  
-  // The conservative answer: invalidates globals.
-  return true;
-}
-
 //===----------------------------------------------------------------------===//
 // Top-level transfer function logic (Dispatcher).
 //===----------------------------------------------------------------------===//
diff --git a/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index 4134b35..99752e5 100644
--- a/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -294,15 +294,17 @@
     if (ObjTy->isRecordType()) {
       regionsToInvalidate.push_back(EleReg);
       // Invalidate the regions.
+      // TODO: Pass the call to new information as the last argument, to limit
+      // the globals which will get invalidated.
       state = state->invalidateRegions(regionsToInvalidate,
-                                       CNE, blockCount, 0,
-                                       /* invalidateGlobals = */ true);
+                                       CNE, blockCount, 0, 0);
       
     } else {
       // Invalidate the regions.
+      // TODO: Pass the call to new information as the last argument, to limit
+      // the globals which will get invalidated.
       state = state->invalidateRegions(regionsToInvalidate,
-                                       CNE, blockCount, 0,
-                                       /* invalidateGlobals = */ true);
+                                       CNE, blockCount, 0, 0);
 
       if (CNE->hasInitializer()) {
         SVal V = state->getSVal(*CNE->constructor_arg_begin());
diff --git a/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
index 46ebd84..faffee7 100644
--- a/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
@@ -200,7 +200,7 @@
   //  global variables.
   return State->invalidateRegions(RegionsToInvalidate,
                                   Call.getOriginExpr(), Count,
-                                  &IS, doesInvalidateGlobals(Call));
+                                  &IS, &Call);
 
 }
 
diff --git a/lib/StaticAnalyzer/Core/MemRegion.cpp b/lib/StaticAnalyzer/Core/MemRegion.cpp
index 2ddaed5..8af0251 100644
--- a/lib/StaticAnalyzer/Core/MemRegion.cpp
+++ b/lib/StaticAnalyzer/Core/MemRegion.cpp
@@ -19,6 +19,7 @@
 #include "clang/Analysis/Support/BumpVector.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/RecordLayout.h"
+#include "clang/Basic/SourceManager.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace clang;
@@ -459,10 +460,6 @@
   os << superRegion << "->" << *getDecl();
 }
 
-void NonStaticGlobalSpaceRegion::dumpToStream(raw_ostream &os) const {
-  os << "NonStaticGlobalSpaceRegion";
-}
-
 void ObjCIvarRegion::dumpToStream(raw_ostream &os) const {
   os << "ivar{" << superRegion << ',' << *getDecl() << '}';
 }
@@ -491,6 +488,22 @@
   os << "StaticGlobalsMemSpace{" << CR << '}';
 }
 
+void NonStaticGlobalSpaceRegion::dumpToStream(raw_ostream &os) const {
+  os << "NonStaticGlobalSpaceRegion";
+}
+
+void GlobalInternalSpaceRegion::dumpToStream(raw_ostream &os) const {
+  os << "GlobalInternalSpaceRegion";
+}
+
+void GlobalSystemSpaceRegion::dumpToStream(raw_ostream &os) const {
+  os << "GlobalSystemSpaceRegion";
+}
+
+void GlobalImmutableSpaceRegion::dumpToStream(raw_ostream &os) const {
+  os << "GlobalImmutableSpaceRegion";
+}
+
 //===----------------------------------------------------------------------===//
 // MemRegionManager methods.
 //===----------------------------------------------------------------------===//
@@ -542,10 +555,18 @@
 }
 
 const GlobalsSpaceRegion
-*MemRegionManager::getGlobalsRegion(const CodeTextRegion *CR) {
-  if (!CR)
-    return LazyAllocate(globals);
+*MemRegionManager::getGlobalsRegion(MemRegion::Kind K,
+                                    const CodeTextRegion *CR) {
+  if (!CR) {
+    if (K == MemRegion::GlobalSystemSpaceRegionKind)
+      return LazyAllocate(SystemGlobals);
+    if (K == MemRegion::GlobalImmutableSpaceRegionKind)
+      return LazyAllocate(ImmutableGlobals);
+    assert(K == MemRegion::GlobalInternalSpaceRegionKind);
+    return LazyAllocate(InternalGlobals);
+  }
 
+  assert(K == MemRegion::StaticGlobalSpaceRegionKind);
   StaticGlobalSpaceRegion *&R = StaticsGlobalSpaceRegions[CR];
   if (R)
     return R;
@@ -570,7 +591,6 @@
 //===----------------------------------------------------------------------===//
 // Constructing regions.
 //===----------------------------------------------------------------------===//
-
 const StringRegion* MemRegionManager::getStringRegion(const StringLiteral* Str){
   return getSubRegion<StringRegion>(Str, getGlobalsRegion());
 }
@@ -579,9 +599,31 @@
                                                 const LocationContext *LC) {
   const MemRegion *sReg = 0;
 
-  if (D->hasGlobalStorage() && !D->isStaticLocal())
-    sReg = getGlobalsRegion();
-  else {
+  if (D->hasGlobalStorage() && !D->isStaticLocal()) {
+
+    // First handle the globals defined in system headers.
+    if (C.getSourceManager().isInSystemHeader(D->getLocation())) {
+      // Whitelist the system globals which often DO GET modified, assume the
+      // rest are immutable.
+      if (D->getName().find("errno") != StringRef::npos)
+        sReg = getGlobalsRegion(MemRegion::GlobalSystemSpaceRegionKind);
+      else
+        sReg = getGlobalsRegion(MemRegion::GlobalImmutableSpaceRegionKind);
+
+    // Treat other globals as GlobalInternal unless they are constants.
+    } else {
+      QualType GQT = D->getType();
+      const Type *GT = GQT.getTypePtrOrNull();
+      // TODO: We could walk the complex types here and see if everything is
+      // constified.
+      if (GT && GQT.isConstQualified() && GT->isArithmeticType())
+        sReg = getGlobalsRegion(MemRegion::GlobalImmutableSpaceRegionKind);
+      else
+        sReg = getGlobalsRegion();
+    }
+  
+  // Finally handle static locals.  
+  } else {
     // FIXME: Once we implement scope handling, we will need to properly lookup
     // 'D' to the proper LocationContext.
     const DeclContext *DC = D->getDeclContext();
@@ -599,13 +641,15 @@
         assert(D->isStaticLocal());
         const Decl *D = STC->getDecl();
         if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
-          sReg = getGlobalsRegion(getFunctionTextRegion(FD));
+          sReg = getGlobalsRegion(MemRegion::StaticGlobalSpaceRegionKind,
+                                  getFunctionTextRegion(FD));
         else if (const BlockDecl *BD = dyn_cast<BlockDecl>(D)) {
           const BlockTextRegion *BTR =
             getBlockTextRegion(BD,
                      C.getCanonicalType(BD->getSignatureAsWritten()->getType()),
                      STC->getAnalysisDeclContext());
-          sReg = getGlobalsRegion(BTR);
+          sReg = getGlobalsRegion(MemRegion::StaticGlobalSpaceRegionKind,
+                                  BTR);
         }
         else {
           // FIXME: For ObjC-methods, we need a new CodeTextRegion.  For now
diff --git a/lib/StaticAnalyzer/Core/ProgramState.cpp b/lib/StaticAnalyzer/Core/ProgramState.cpp
index eac9c1c..4b98ce8 100644
--- a/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -136,20 +136,20 @@
 ProgramState::invalidateRegions(ArrayRef<const MemRegion *> Regions,
                                 const Expr *E, unsigned Count,
                                 StoreManager::InvalidatedSymbols *IS,
-                                bool invalidateGlobals) const {
+                                const CallOrObjCMessage *Call) const {
   if (!IS) {
     StoreManager::InvalidatedSymbols invalidated;
     return invalidateRegionsImpl(Regions, E, Count,
-                                 invalidated, invalidateGlobals);
+                                 invalidated, Call);
   }
-  return invalidateRegionsImpl(Regions, E, Count, *IS, invalidateGlobals);
+  return invalidateRegionsImpl(Regions, E, Count, *IS, Call);
 }
 
 const ProgramState *
 ProgramState::invalidateRegionsImpl(ArrayRef<const MemRegion *> Regions,
                                     const Expr *E, unsigned Count,
                                     StoreManager::InvalidatedSymbols &IS,
-                                    bool invalidateGlobals) const {
+                                    const CallOrObjCMessage *Call) const {
   ProgramStateManager &Mgr = getStateManager();
   SubEngine* Eng = Mgr.getOwningEngine();
  
@@ -157,14 +157,14 @@
     StoreManager::InvalidatedRegions Invalidated;
     const StoreRef &newStore
       = Mgr.StoreMgr->invalidateRegions(getStore(), Regions, E, Count, IS,
-                                        invalidateGlobals, &Invalidated);
+                                        Call, &Invalidated);
     const ProgramState *newState = makeWithStore(newStore);
     return Eng->processRegionChanges(newState, &IS, Regions, Invalidated);
   }
 
   const StoreRef &newStore =
     Mgr.StoreMgr->invalidateRegions(getStore(), Regions, E, Count, IS,
-                                    invalidateGlobals, NULL);
+                                    Call, NULL);
   return makeWithStore(newStore);
 }
 
diff --git a/lib/StaticAnalyzer/Core/RegionStore.cpp b/lib/StaticAnalyzer/Core/RegionStore.cpp
index 56ce0e1..a0d1bad 100644
--- a/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -20,6 +20,7 @@
 #include "clang/Analysis/Analyses/LiveVariables.h"
 #include "clang/Analysis/AnalysisContext.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ObjCMessage.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
@@ -235,11 +236,16 @@
   //===-------------------------------------------------------------------===//
   // Binding values to regions.
   //===-------------------------------------------------------------------===//
+  RegionBindings invalidateGlobalRegion(MemRegion::Kind K,
+                                        const Expr *Ex,
+                                        unsigned Count,
+                                        RegionBindings B,
+                                        InvalidatedRegions *Invalidated);
 
   StoreRef invalidateRegions(Store store, ArrayRef<const MemRegion *> Regions,
                              const Expr *E, unsigned Count,
                              InvalidatedSymbols &IS,
-                             bool invalidateGlobals,
+                             const CallOrObjCMessage *Call,
                              InvalidatedRegions *Invalidated);
 
 public:   // Made public for helper classes.
@@ -718,15 +724,39 @@
   B = RM.addBinding(B, baseR, BindingKey::Direct, V);
 }
 
+RegionBindings RegionStoreManager::invalidateGlobalRegion(MemRegion::Kind K,
+                                                          const Expr *Ex,
+                                                          unsigned Count,
+                                                          RegionBindings B,
+                                            InvalidatedRegions *Invalidated) {
+  // Bind the globals memory space to a new symbol that we will use to derive
+  // the bindings for all globals.
+  const GlobalsSpaceRegion *GS = MRMgr.getGlobalsRegion(K);
+  SVal V =
+      svalBuilder.getConjuredSymbolVal(/* SymbolTag = */ (void*) GS, Ex,
+          /* symbol type, doesn't matter */ Ctx.IntTy,
+          Count);
+
+  B = removeBinding(B, GS);
+  B = addBinding(B, BindingKey::Make(GS, BindingKey::Default), V);
+
+  // Even if there are no bindings in the global scope, we still need to
+  // record that we touched it.
+  if (Invalidated)
+    Invalidated->push_back(GS);
+
+  return B;
+}
+
 StoreRef RegionStoreManager::invalidateRegions(Store store,
                                             ArrayRef<const MemRegion *> Regions,
                                                const Expr *Ex, unsigned Count,
                                                InvalidatedSymbols &IS,
-                                               bool invalidateGlobals,
+                                               const CallOrObjCMessage *Call,
                                               InvalidatedRegions *Invalidated) {
   invalidateRegionsWorker W(*this, StateMgr,
                             RegionStoreManager::GetRegionBindings(store),
-                            Ex, Count, IS, Invalidated, invalidateGlobals);
+                            Ex, Count, IS, Invalidated, false);
 
   // Scan the bindings and generate the clusters.
   W.GenerateClusters();
@@ -741,20 +771,20 @@
   // Return the new bindings.
   RegionBindings B = W.getRegionBindings();
 
-  if (invalidateGlobals) {
-    // Bind the non-static globals memory space to a new symbol that we will
-    // use to derive the bindings for all non-static globals.
-    const GlobalsSpaceRegion *GS = MRMgr.getGlobalsRegion();
-    SVal V =
-      svalBuilder.getConjuredSymbolVal(/* SymbolTag = */ (void*) GS, Ex,
-                                  /* symbol type, doesn't matter */ Ctx.IntTy,
-                                  Count);
-    B = addBinding(B, BindingKey::Make(GS, BindingKey::Default), V);
-
-    // Even if there are no bindings in the global scope, we still need to
-    // record that we touched it.
-    if (Invalidated)
-      Invalidated->push_back(GS);
+  // For all globals which are not static nor immutable: determine which global
+  // regions should be invalidated and invalidate them.
+  // TODO: This could possibly be more precise with modules.
+  //
+  // System calls invalidate only system globals.
+  if (Call && Call->isInSystemHeader()) {
+    B = invalidateGlobalRegion(MemRegion::GlobalSystemSpaceRegionKind,
+                               Ex, Count, B, Invalidated);
+  // Internal calls might invalidate both system and internal globals.
+  } else {
+    B = invalidateGlobalRegion(MemRegion::GlobalSystemSpaceRegionKind,
+                               Ex, Count, B, Invalidated);
+    B = invalidateGlobalRegion(MemRegion::GlobalInternalSpaceRegionKind,
+                               Ex, Count, B, Invalidated);
   }
 
   return StoreRef(B.getRootWithoutRetain(), *this);
diff --git a/lib/StaticAnalyzer/Core/Store.cpp b/lib/StaticAnalyzer/Core/Store.cpp
index 825510e..543f878 100644
--- a/lib/StaticAnalyzer/Core/Store.cpp
+++ b/lib/StaticAnalyzer/Core/Store.cpp
@@ -101,8 +101,10 @@
     case MemRegion::StackArgumentsSpaceRegionKind:
     case MemRegion::HeapSpaceRegionKind:
     case MemRegion::UnknownSpaceRegionKind:
-    case MemRegion::NonStaticGlobalSpaceRegionKind:
-    case MemRegion::StaticGlobalSpaceRegionKind: {
+    case MemRegion::StaticGlobalSpaceRegionKind:
+    case MemRegion::GlobalInternalSpaceRegionKind:
+    case MemRegion::GlobalSystemSpaceRegionKind:
+    case MemRegion::GlobalImmutableSpaceRegionKind: {
       llvm_unreachable("Invalid region cast");
     }