LoopVectorizer: If dependency checks fail try runtime checks
When a dependence check fails we can still try to vectorize loops with runtime
array bounds checks.
This helps linpack to vectorize a loop in dgefa. And we are back to 2x of the
scalar performance on a corei7-avx.
radar://15339680
llvm-svn: 193853
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
index 4772256..d15479d 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
@@ -34,3 +34,31 @@
for.end: ; preds = %for.body, %entry
ret i32 undef
}
+
+; Make sure that we try to vectorize loops with a runtime check if the
+; dependency check fails.
+
+; CHECK-LABEL: test_runtime_check
+; CHECK: <4 x float>
+define void @test_runtime_check(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %ind.sum = add i64 %iv, %offset
+ %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
+ %l1 = load float* %arr.idx, align 4
+ %ind.sum2 = add i64 %iv, %offset2
+ %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
+ %l2 = load float* %arr.idx2, align 4
+ %m = fmul fast float %b, %l2
+ %ad = fadd fast float %l1, %m
+ store float %ad, float* %arr.idx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, %n
+ br i1 %exitcond, label %loopexit, label %for.body
+
+loopexit:
+ ret void
+}