[LoopUnrollAnalyzer] Take into account cost of instructions controlling branches, along with their operands.
Previously, we didn't add their and their operands cost, which could've
resulted in unrolling loops for no actual benefit.
llvm-svn: 269985
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 034cb04..d167c44 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -506,6 +506,7 @@
BBWorklist.insert(Succ);
else
ExitWorklist.insert({BB, Succ});
+ AddCostRecursively(*TI, Iteration);
}
// If we found no optimization opportunities on the first iteration, we
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
index f62141d..11c9f96 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
@@ -55,3 +55,35 @@
%r.lcssa = phi i32 [ %r, %loop ]
ret i32 %r.lcssa
}
+
+; In this case the loaded value is used only to control branch.
+; If we missed that, we could've thought that it's unused and unrolling would
+; clean up almost entire loop. Make sure that we do not unroll such loop.
+; CHECK-LABEL: @foo3
+; CHECK: br i1 %exitcond, label %loop.end, label %loop.header
+define i32 @foo3(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop.latch ]
+ %r1 = phi i32 [ 0, %entry ], [ %r3, %loop.latch ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %cmp = icmp eq i32 0, %src_element
+ br i1 %cmp, label %loop.if, label %loop.latch
+
+loop.if:
+ %r2 = add i32 %r1, 1
+ br label %loop.latch
+
+loop.latch:
+ %r3 = phi i32 [%r1, %loop.header], [%r2, %loop.if]
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %inc, 9
+ br i1 %exitcond, label %loop.end, label %loop.header
+
+loop.end:
+ %r.lcssa = phi i32 [ %r3, %loop.latch ]
+ ret i32 %r.lcssa
+}