Disable the Loop Vectorizer in case of GEMM

Currently, in case of GEMM and the pattern matching based optimizations, we
use only the SLP Vectorizer out of two LLVM vectorizers. Since the Loop
Vectorizer can get in the way of optimal code generation, we disable the Loop
Vectorizer for the innermost loop using mark nodes and emitting the
corresponding metadata.

Reviewed-by: Tobias Grosser <tobias@grosser.es>

Differential Revision: https://reviews.llvm.org/D36928

llvm-svn: 311473
diff --git a/polly/lib/CodeGen/IRBuilder.cpp b/polly/lib/CodeGen/IRBuilder.cpp
index 94cdda2..1ad01a4 100644
--- a/polly/lib/CodeGen/IRBuilder.cpp
+++ b/polly/lib/CodeGen/IRBuilder.cpp
@@ -114,15 +114,27 @@
   ParallelLoops.pop_back();
 }
 
-void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L,
-                                      bool IsParallel) const {
-  if (!IsParallel)
-    return;
+void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel,
+                                      bool IsLoopVectorizerDisabled) const {
+  MDNode *MData = nullptr;
 
-  assert(!ParallelLoops.empty() && "Expected a parallel loop to annotate");
-  MDNode *Ids = ParallelLoops.back();
-  MDNode *Id = cast<MDNode>(Ids->getOperand(Ids->getNumOperands() - 1));
-  B->setMetadata("llvm.loop", Id);
+  if (IsLoopVectorizerDisabled) {
+    SmallVector<Metadata *, 3> Args;
+    LLVMContext &Ctx = SE->getContext();
+    Args.push_back(MDString::get(Ctx, "llvm.loop.vectorize.enable"));
+    auto *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0);
+    Args.push_back(ValueAsMetadata::get(FalseValue));
+    MData = MDNode::concatenate(MData, getID(Ctx, MDNode::get(Ctx, Args)));
+  }
+
+  if (IsParallel) {
+    assert(!ParallelLoops.empty() && "Expected a parallel loop to annotate");
+    MDNode *Ids = ParallelLoops.back();
+    MDNode *Id = cast<MDNode>(Ids->getOperand(Ids->getNumOperands() - 1));
+    MData = MDNode::concatenate(MData, Id);
+  }
+
+  B->setMetadata("llvm.loop", MData);
 }
 
 /// Get the pointer operand