[ThinLTO] Attempt to recommit r365188 after alignment fix

llvm-svn: 365215
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 914561f..e25eb29 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -231,6 +231,13 @@
   return false;
 }
 
+static bool isNonVolatileStore(const Instruction *I) {
+  if (const auto *SI = dyn_cast<StoreInst>(I))
+    return !SI->isVolatile();
+
+  return false;
+}
+
 static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
                                    const Function &F, BlockFrequencyInfo *BFI,
                                    ProfileSummaryInfo *PSI, DominatorTree &DT,
@@ -245,7 +252,7 @@
   // Map from callee ValueId to profile count. Used to accumulate profile
   // counts for all static calls to a given callee.
   MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
-  SetVector<ValueInfo> RefEdges;
+  SetVector<ValueInfo> RefEdges, LoadRefEdges, StoreRefEdges;
   SetVector<GlobalValue::GUID> TypeTests;
   SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
       TypeCheckedLoadVCalls;
@@ -258,6 +265,7 @@
   // list.
   findRefEdges(Index, &F, RefEdges, Visited);
   std::vector<const Instruction *> NonVolatileLoads;
+  std::vector<const Instruction *> NonVolatileStores;
 
   bool HasInlineAsmMaybeReferencingInternal = false;
   for (const BasicBlock &BB : F)
@@ -265,12 +273,34 @@
       if (isa<DbgInfoIntrinsic>(I))
         continue;
       ++NumInsts;
-      if (isNonVolatileLoad(&I)) {
-        // Postpone processing of non-volatile load instructions
-        // See comments below
-        Visited.insert(&I);
-        NonVolatileLoads.push_back(&I);
-        continue;
+      // Regular LTO module doesn't participate in ThinLTO import,
+      // so no reference from it can be read/writeonly, since this
+      // would require importing variable as local copy
+      if (IsThinLTO) {
+        if (isNonVolatileLoad(&I)) {
+          // Postpone processing of non-volatile load instructions
+          // See comments below
+          Visited.insert(&I);
+          NonVolatileLoads.push_back(&I);
+          continue;
+        } else if (isNonVolatileStore(&I)) {
+          Visited.insert(&I);
+          NonVolatileStores.push_back(&I);
+          // All references from second operand of store (destination address)
+          // can be considered write-only if they're not referenced by any
+          // non-store instruction. References from first operand of store
+          // (stored value) can't be treated either as read- or as write-only
+          // so we add them to RefEdges as we do with all other instructions
+          // except non-volatile load.
+          Value *Stored = I.getOperand(0);
+          if (auto *GV = dyn_cast<GlobalValue>(Stored))
+            // findRefEdges will try to examine GV operands, so instead
+            // of calling it we should add GV to RefEdges directly.
+            RefEdges.insert(Index.getOrInsertValueInfo(GV));
+          else if (auto *U = dyn_cast<User>(Stored))
+            findRefEdges(Index, U, RefEdges, Visited);
+          continue;
+        }
       }
       findRefEdges(Index, &I, RefEdges, Visited);
       auto CS = ImmutableCallSite(&I);
@@ -361,24 +391,61 @@
       }
     }
 
-  // By now we processed all instructions in a function, except
-  // non-volatile loads. All new refs we add in a loop below
-  // are obviously constant. All constant refs are grouped in the
-  // end of RefEdges vector, so we can use a single integer value
-  // to identify them.
-  unsigned RefCnt = RefEdges.size();
-  for (const Instruction *I : NonVolatileLoads) {
-    Visited.erase(I);
-    findRefEdges(Index, I, RefEdges, Visited);
-  }
-  std::vector<ValueInfo> Refs = RefEdges.takeVector();
-  // Regular LTO module doesn't participate in ThinLTO import,
-  // so no reference from it can be readonly, since this would
-  // require importing variable as local copy
-  if (IsThinLTO)
-    for (; RefCnt < Refs.size(); ++RefCnt)
+  std::vector<ValueInfo> Refs;
+  if (IsThinLTO) {
+    auto AddRefEdges = [&](const std::vector<const Instruction *> &Instrs,
+                           SetVector<ValueInfo> &Edges,
+                           SmallPtrSet<const User *, 8> &Cache) {
+      for (const auto *I : Instrs) {
+        Cache.erase(I);
+        findRefEdges(Index, I, Edges, Cache);
+      }
+    };
+
+    // By now we processed all instructions in a function, except
+    // non-volatile loads and non-volatile value stores. Let's find
+    // ref edges for both of instruction sets
+    AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited);
+    // We can add some values to the Visited set when processing load
+    // instructions which are also used by stores in NonVolatileStores.
+    // For example this can happen if we have following code:
+    //
+    // store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**)
+    // %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**)
+    //
+    // After processing loads we'll add bitcast to the Visited set, and if
+    // we use the same set while processing stores, we'll never see store
+    // to @bar and @bar will be mistakenly treated as readonly.
+    SmallPtrSet<const llvm::User *, 8> StoreCache;
+    AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache);
+
+    // If both load and store instruction reference the same variable
+    // we won't be able to optimize it. Add all such reference edges
+    // to RefEdges set.
+    for (auto &VI : StoreRefEdges)
+      if (LoadRefEdges.remove(VI))
+        RefEdges.insert(VI);
+
+    unsigned RefCnt = RefEdges.size();
+    // All new reference edges inserted in two loops below are either
+    // read or write only. They will be grouped in the end of RefEdges
+    // vector, so we can use a single integer value to identify them.
+    for (auto &VI : LoadRefEdges)
+      RefEdges.insert(VI);
+
+    unsigned FirstWORef = RefEdges.size();
+    for (auto &VI : StoreRefEdges)
+      RefEdges.insert(VI);
+
+    Refs = RefEdges.takeVector();
+    for (; RefCnt < FirstWORef; ++RefCnt)
       Refs[RefCnt].setReadOnly();
 
+    for (; RefCnt < Refs.size(); ++RefCnt)
+      Refs[RefCnt].setWriteOnly();
+  } else {
+    Refs = RefEdges.takeVector();
+  }
   // Explicit add hot edges to enforce importing for designated GUIDs for
   // sample PGO, to enable the same inlines as the profiled optimized binary.
   for (auto &I : F.getImportGUIDs())
@@ -526,10 +593,11 @@
     }
   }
 
-  // Don't mark variables we won't be able to internalize as read-only.
-  GlobalVarSummary::GVarFlags VarFlags(
+  // Don't mark variables we won't be able to internalize as read/write-only.
+  bool CanBeInternalized =
       !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
-      !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass());
+      !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass();
+  GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized);
   auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags,
                                                          RefEdges.takeVector());
   if (NonRenamableLocal)
@@ -647,7 +715,7 @@
           } else {
             std::unique_ptr<GlobalVarSummary> Summary =
                 llvm::make_unique<GlobalVarSummary>(
-                    GVFlags, GlobalVarSummary::GVarFlags(),
+                    GVFlags, GlobalVarSummary::GVarFlags(false, false),
                     ArrayRef<ValueInfo>{});
             Index.addGlobalValueSummary(*GV, std::move(Summary));
           }