[Pipeliner] Add missing loop carried dependences
The pipeliner is not adding a dependence edge for a loop carried
dependence, and ends up scheduling a load from iteration n prior
to an aliased store in iteration n-1.
The code that adds the loop carried dependences in the pipeliner
doesn't check if the memory objects for loads and stores are
"identified" (i.e., distinct) objects. If they are not, then the
code that adds the dependences needs to be conservative. The
objects can be used to check dependences only when they are
distinct objects.
The code that checks for loop carried dependences has been updated
to classify loads and stores that are not identified as "unknown"
values. A store with an "unknown" value can potentially create
a loop carried dependence with any pending load.
Patch by Brendon Cahoon.
llvm-svn: 328547
diff --git a/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll b/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll
new file mode 100644
index 0000000..3f8abf0
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-loop-carried-unknown.ll
@@ -0,0 +1,54 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Test that the pipeliner schedules a store before the load in which there is a
+; loop carried dependence. Previously, the loop carried dependence wasn't added
+; and the load from iteration n was scheduled prior to the store from iteration
+; n-1.
+
+; CHECK: loop0(.LBB0_[[LOOP:.]],
+; CHECK: .LBB0_[[LOOP]]:
+; CHECK: memh({{.*}}) =
+; CHECK: = memuh({{.*}})
+; CHECK: endloop0
+
+%s.0 = type { i16, i16 }
+
+; Function Attrs: nounwind
+define void @f0() local_unnamed_addr #0 {
+b0:
+ br label %b1
+
+b1: ; preds = %b1, %b0
+ %v0 = phi i32 [ 0, %b0 ], [ %v22, %b1 ]
+ %v1 = load %s.0*, %s.0** undef, align 4
+ %v2 = getelementptr inbounds %s.0, %s.0* %v1, i32 0, i32 0
+ %v3 = load i16, i16* %v2, align 2
+ %v4 = add i16 0, %v3
+ %v5 = add i16 %v4, 0
+ %v6 = add i16 %v5, 0
+ %v7 = add i16 %v6, 0
+ %v8 = add i16 %v7, 0
+ %v9 = add i16 %v8, 0
+ %v10 = add i16 %v9, 0
+ %v11 = add i16 %v10, 0
+ %v12 = add i16 %v11, 0
+ %v13 = add i16 %v12, 0
+ %v14 = add i16 %v13, 0
+ %v15 = add i16 %v14, 0
+ %v16 = add i16 %v15, 0
+ %v17 = add i16 %v16, 0
+ %v18 = add i16 %v17, 0
+ %v19 = add i16 %v18, 0
+ %v20 = load %s.0*, %s.0** undef, align 4
+ store i16 %v19, i16* undef, align 2
+ %v21 = getelementptr inbounds %s.0, %s.0* %v20, i32 0, i32 1
+ store i16 0, i16* %v21, align 2
+ %v22 = add nuw nsw i32 %v0, 1
+ %v23 = icmp eq i32 %v22, 6
+ br i1 %v23, label %b2, label %b1
+
+b2: ; preds = %b1
+ ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" }