Introduce llvm.loop.parallel_accesses and llvm.access.group metadata.

The current llvm.mem.parallel_loop_access metadata has a problem in that
it uses LoopIDs. LoopID unfortunately is not loop identifier. It is
neither unique (there's even a regression test assigning the some LoopID
to multiple loops; can otherwise happen if passes such as LoopVersioning
make copies of entire loops) nor persistent (every time a property is
removed/added from a LoopID's MDNode, it will also receive a new LoopID;
this happens e.g. when calling Loop::setLoopAlreadyUnrolled()).
Since most loop transformation passes change the loop attributes (even
if it just to mark that a loop should not be processed again as
llvm.loop.isvectorized does, for the versioned and unversioned loop),
the parallel access information is lost for any subsequent pass.

This patch unlinks LoopIDs and parallel accesses.
llvm.mem.parallel_loop_access metadata on instruction is replaced by
llvm.access.group metadata. llvm.access.group points to a distinct
MDNode with no operands (avoiding the problem to ever need to add/remove
operands), called "access group". Alternatively, it can point to a list
of access groups. The LoopID then has an attribute
llvm.loop.parallel_accesses with all the access groups that are parallel
(no dependencies carries by this loop).

This intentionally avoid any kind of "ID". Loops that are clones/have
their attributes modifies retain the llvm.loop.parallel_accesses
attribute. Access instructions that a cloned point to the same access
group. It is not necessary for each access to have it's own "ID" MDNode,
but those memory access instructions with the same behavior can be
grouped together.

The behavior of llvm.mem.parallel_loop_access is not changed by this
patch, but should be considered deprecated.

Differential Revision: https://reviews.llvm.org/D52116

llvm-svn: 349725
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index ae158ae..3e6a496 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -174,6 +174,9 @@
     MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
   if (LoopMemParallelMD)
     L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
+  MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
+  if (AccessGroupMD)
+    L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
 
   StoreInst *S = Builder.CreateStore(L, Dest);
   // Alignment from the mem intrinsic will be better, so use it.
@@ -182,6 +185,8 @@
     S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
   if (LoopMemParallelMD)
     S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
+  if (AccessGroupMD)
+    S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
 
   if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
     // non-atomics can be volatile
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 02ebb5e..b9b1e7b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -493,6 +493,7 @@
     case LLVMContext::MD_noalias:
     case LLVMContext::MD_nontemporal:
     case LLVMContext::MD_mem_parallel_loop_access:
+    case LLVMContext::MD_access_group:
       // All of these directly apply.
       NewLoad->setMetadata(ID, N);
       break;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index a71ebdc..7603cf4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -608,6 +608,7 @@
     LLVMContext::MD_align,
     LLVMContext::MD_dereferenceable,
     LLVMContext::MD_dereferenceable_or_null,
+    LLVMContext::MD_access_group,
   };
 
   for (unsigned ID : KnownIDs)
diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index 0797ce9..76a42d7 100644
--- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -246,7 +246,7 @@
       LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
       LLVMContext::MD_noalias,        LLVMContext::MD_range,
       LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
-      LLVMContext::MD_invariant_group};
+      LLVMContext::MD_invariant_group, LLVMContext::MD_access_group};
   combineMetadata(ReplInst, I, KnownIDs, true);
 }
 
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index c0c59d2..f48d3cc 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -633,6 +633,8 @@
     // Set Loop Versioning metaData for version loop.
     addStringMetadataToLoop(LVer.getVersionedLoop(), LICMVersioningMetaData);
     // Set "llvm.mem.parallel_loop_access" metaData to versioned loop.
+    // FIXME: "llvm.mem.parallel_loop_access" annotates memory access
+    // instructions, not loops.
     addStringMetadataToLoop(LVer.getVersionedLoop(),
                             "llvm.mem.parallel_loop_access");
     // Update version loop with aggressive aliasing assumption.
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 8756a1a..e93f56da 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -996,7 +996,8 @@
   // handled here, but combineMetadata doesn't support them yet
   unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
                          LLVMContext::MD_noalias,
-                         LLVMContext::MD_invariant_group};
+                         LLVMContext::MD_invariant_group,
+                         LLVMContext::MD_access_group};
   combineMetadata(C, cpy, KnownIDs, true);
 
   // Remove the memcpy.
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 81eea0d..d1a2dc0 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2593,7 +2593,8 @@
     }
     V = convertValue(DL, IRB, V, NewAllocaTy);
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
-    Store->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
+    Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
+                             LLVMContext::MD_access_group});
     if (AATags)
       Store->setAAMetadata(AATags);
     Pass.DeadInsts.insert(&SI);
@@ -2662,7 +2663,8 @@
       NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
                                      SI.isVolatile());
     }
-    NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
+    NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
+                             LLVMContext::MD_access_group});
     if (AATags)
       NewSI->setAAMetadata(AATags);
     if (SI.isVolatile())
@@ -3799,7 +3801,8 @@
                          PartPtrTy, BasePtr->getName() + "."),
           getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
           LI->getName());
-      PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
+      PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
+                                LLVMContext::MD_access_group});
 
       // Append this load onto the list of split loads so we can find it later
       // to rewrite the stores.
@@ -3855,7 +3858,8 @@
                            APInt(DL.getIndexSizeInBits(AS), PartOffset),
                            PartPtrTy, StoreBasePtr->getName() + "."),
             getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
-        PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
+        PStore->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
+                                   LLVMContext::MD_access_group});
         LLVM_DEBUG(dbgs() << "      +" << PartOffset << ":" << *PStore << "\n");
       }
 
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 3a6f8e6..5eb3fda 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -379,7 +379,8 @@
           || Tag == LLVMContext::MD_invariant_load
           || Tag == LLVMContext::MD_alias_scope
           || Tag == LLVMContext::MD_noalias
-          || Tag == ParallelLoopAccessMDKind);
+          || Tag == ParallelLoopAccessMDKind
+          || Tag == LLVMContext::MD_access_group);
 }
 
 // Transfer metadata from Op to the instructions in CV if it is known
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index bda2ee2..0b22082 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -770,14 +771,16 @@
   UnwindDest->removePredecessor(InvokeBB);
 }
 
-/// When inlining a call site that has !llvm.mem.parallel_loop_access metadata,
-/// that metadata should be propagated to all memory-accessing cloned
-/// instructions.
+/// When inlining a call site that has !llvm.mem.parallel_loop_access or
+/// llvm.access.group metadata, that metadata should be propagated to all
+/// memory-accessing cloned instructions.
 static void PropagateParallelLoopAccessMetadata(CallSite CS,
                                                 ValueToValueMapTy &VMap) {
   MDNode *M =
     CS.getInstruction()->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
-  if (!M)
+  MDNode *CallAccessGroup =
+      CS.getInstruction()->getMetadata(LLVMContext::MD_access_group);
+  if (!M && !CallAccessGroup)
     return;
 
   for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
@@ -789,11 +792,20 @@
     if (!NI)
       continue;
 
-    if (MDNode *PM = NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
+    if (M) {
+      if (MDNode *PM =
+              NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access)) {
         M = MDNode::concatenate(PM, M);
       NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
-    } else if (NI->mayReadOrWriteMemory()) {
-      NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+      } else if (NI->mayReadOrWriteMemory()) {
+        NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access, M);
+      }
+    }
+
+    if (NI->mayReadOrWriteMemory()) {
+      MDNode *UnitedAccGroups = uniteAccessGroups(
+          NI->getMetadata(LLVMContext::MD_access_group), CallAccessGroup);
+      NI->setMetadata(LLVMContext::MD_access_group, UnitedAccGroups);
     }
   }
 }
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 79343a9..831fb57 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
@@ -2297,6 +2298,10 @@
       case LLVMContext::MD_mem_parallel_loop_access:
         K->setMetadata(Kind, MDNode::intersect(JMD, KMD));
         break;
+      case LLVMContext::MD_access_group:
+        K->setMetadata(LLVMContext::MD_access_group,
+                       intersectAccessGroups(K, J));
+        break;
       case LLVMContext::MD_range:
 
         // If K does move, use most generic range. Otherwise keep the range of
@@ -2353,7 +2358,8 @@
       LLVMContext::MD_invariant_load,  LLVMContext::MD_nonnull,
       LLVMContext::MD_invariant_group, LLVMContext::MD_align,
       LLVMContext::MD_dereferenceable,
-      LLVMContext::MD_dereferenceable_or_null};
+      LLVMContext::MD_dereferenceable_or_null,
+      LLVMContext::MD_access_group};
   combineMetadata(K, J, KnownIDs, KDominatesJ);
 }
 
@@ -2384,7 +2390,8 @@
       LLVMContext::MD_tbaa,            LLVMContext::MD_alias_scope,
       LLVMContext::MD_noalias,         LLVMContext::MD_range,
       LLVMContext::MD_fpmath,          LLVMContext::MD_invariant_load,
-      LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull};
+      LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull,
+      LLVMContext::MD_access_group};
   combineMetadata(ReplInst, I, KnownIDs, false);
 }
 
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 1d4f07f..3866395 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -187,44 +187,14 @@
   INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 }
 
-static Optional<MDNode *> findOptionMDForLoopID(MDNode *LoopID,
-                                                StringRef Name) {
-  // Return none if LoopID is false.
-  if (!LoopID)
-    return None;
-
-  // First operand should refer to the loop id itself.
-  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
-  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
-
-  // Iterate over LoopID operands and look for MDString Metadata
-  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
-    MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
-    if (!MD)
-      continue;
-    MDString *S = dyn_cast<MDString>(MD->getOperand(0));
-    if (!S)
-      continue;
-    // Return true if MDString holds expected MetaData.
-    if (Name.equals(S->getString()))
-      return MD;
-  }
-  return None;
-}
-
-static Optional<MDNode *> findOptionMDForLoop(const Loop *TheLoop,
-                                              StringRef Name) {
-  return findOptionMDForLoopID(TheLoop->getLoopID(), Name);
-}
-
 /// Find string metadata for loop
 ///
 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
 /// operand or null otherwise.  If the string metadata is not found return
 /// Optional's not-a-value.
-Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
+Optional<const MDOperand *> llvm::findStringMetadataForLoop(const Loop *TheLoop,
                                                             StringRef Name) {
-  auto MD = findOptionMDForLoop(TheLoop, Name).getValueOr(nullptr);
+  MDNode *MD = findOptionMDForLoop(TheLoop, Name);
   if (!MD)
     return None;
   switch (MD->getNumOperands()) {
@@ -239,19 +209,15 @@
 
 static Optional<bool> getOptionalBoolLoopAttribute(const Loop *TheLoop,
                                                    StringRef Name) {
-  Optional<MDNode *> MD = findOptionMDForLoop(TheLoop, Name);
-  if (!MD.hasValue())
+  MDNode *MD = findOptionMDForLoop(TheLoop, Name);
+  if (!MD)
     return None;
-  MDNode *OptionNode = MD.getValue();
-  if (OptionNode == nullptr)
-    return None;
-  switch (OptionNode->getNumOperands()) {
+  switch (MD->getNumOperands()) {
   case 1:
     // When the value is absent it is interpreted as 'attribute set'.
     return true;
   case 2:
-    return mdconst::extract_or_null<ConstantInt>(
-        OptionNode->getOperand(1).get());
+    return mdconst::extract_or_null<ConstantInt>(MD->getOperand(1).get());
   }
   llvm_unreachable("unexpected number of options");
 }
@@ -325,8 +291,7 @@
 
   bool HasAnyFollowup = false;
   for (StringRef OptionName : FollowupOptions) {
-    MDNode *FollowupNode =
-        findOptionMDForLoopID(OrigLoopID, OptionName).getValueOr(nullptr);
+    MDNode *FollowupNode = findOptionMDForLoopID(OrigLoopID, OptionName);
     if (!FollowupNode)
       continue;
 
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b98f2ff..ac09ce5 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1321,7 +1321,8 @@
                              LLVMContext::MD_align,
                              LLVMContext::MD_dereferenceable,
                              LLVMContext::MD_dereferenceable_or_null,
-                             LLVMContext::MD_mem_parallel_loop_access};
+                             LLVMContext::MD_mem_parallel_loop_access,
+                             LLVMContext::MD_access_group};
       combineMetadata(I1, I2, KnownIDs, true);
 
       // I1 and I2 are being combined into a single instruction.  Its debug