[LoopReroll] Reroll loops with unordered atomic memory accesses

Reviewers: hfinkel, jfb, reames

Subscribers: mcrosier, mzolotukhin, llvm-commits

Differential Revision: https://reviews.llvm.org/D22385

llvm-svn: 275932
diff --git a/llvm/test/Transforms/LoopReroll/basic.ll b/llvm/test/Transforms/LoopReroll/basic.ll
index ce2ab2f..0c8f5d8 100644
--- a/llvm/test/Transforms/LoopReroll/basic.ll
+++ b/llvm/test/Transforms/LoopReroll/basic.ll
@@ -576,6 +576,137 @@
 }
 
 
+define void @unordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
+; CHECK-LABEL: @unordered_atomic_ops(
+
+; CHECK: for.body:
+; CHECK-NEXT:   %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK-NEXT:   %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvar
+; CHECK-NEXT:   %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvar
+; CHECK-NEXT:   %va = load atomic i32, i32* %buf0_a unordered, align 4
+; CHECK-NEXT:   store atomic i32 %va, i32* %buf1_a unordered, align 4
+; CHECK-NEXT:   %indvar.next = add i32 %indvar, 1
+; CHECK-NEXT:   %exitcond = icmp eq i32 %indvar, 3199
+; CHECK-NEXT:   br i1 %exitcond, label %for.end, label %for.body
+
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add i32 %indvars.iv, 2
+  %indvars.mid = add i32 %indvars.iv, 1
+  %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
+  %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
+  %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
+  %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
+  %va = load atomic i32, i32* %buf0_a unordered, align 4
+  %vb = load atomic i32, i32* %buf0_b unordered, align 4
+  store atomic i32 %va, i32* %buf1_a unordered, align 4
+  store atomic i32 %vb, i32* %buf1_b unordered, align 4
+  %cmp = icmp slt i32 %indvars.iv.next, 3200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+define void @unordered_atomic_ops_nomatch(i32* noalias %buf_0, i32* noalias %buf_1) {
+; Negative test
+
+; CHECK-LABEL: @unordered_atomic_ops_nomatch(
+entry:
+  br label %for.body
+
+for.body:
+; CHECK: for.body:
+; CHECK:   %indvars.iv.next = add i32 %indvars.iv, 2
+; CHECK:   %indvars.mid = add i32 %indvars.iv, 1
+; CHECK:   %cmp = icmp slt i32 %indvars.iv.next, 3200
+; CHECK:   br i1 %cmp, label %for.body, label %for.end
+
+  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add i32 %indvars.iv, 2
+  %indvars.mid = add i32 %indvars.iv, 1
+  %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
+  %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
+  %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
+  %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
+  %va = load atomic i32, i32* %buf0_a unordered, align 4
+  %vb = load atomic i32, i32* %buf0_b unordered, align 4
+  store i32 %va, i32* %buf1_a, align 4  ;; Not atomic
+  store atomic i32 %vb, i32* %buf1_b unordered, align 4
+  %cmp = icmp slt i32 %indvars.iv.next, 3200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+define void @ordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) {
+; Negative test
+
+; CHECK-LABEL: @ordered_atomic_ops(
+entry:
+  br label %for.body
+
+for.body:
+; CHECK: for.body:
+; CHECK:   %indvars.iv.next = add i32 %indvars.iv, 2
+; CHECK:   %indvars.mid = add i32 %indvars.iv, 1
+; CHECK:   %cmp = icmp slt i32 %indvars.iv.next, 3200
+; CHECK:   br i1 %cmp, label %for.body, label %for.end
+
+  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add i32 %indvars.iv, 2
+  %indvars.mid = add i32 %indvars.iv, 1
+  %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
+  %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
+  %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
+  %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
+  %va = load atomic i32, i32* %buf0_a acquire, align 4
+  %vb = load atomic i32, i32* %buf0_b acquire, align 4
+  store atomic i32 %va, i32* %buf1_a release, align 4
+  store atomic i32 %vb, i32* %buf1_b release, align 4
+  %cmp = icmp slt i32 %indvars.iv.next, 3200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+define void @unordered_atomic_ops_with_fence(i32* noalias %buf_0, i32* noalias %buf_1) {
+; CHECK-LABEL: @unordered_atomic_ops_with_fence(
+entry:
+  br label %for.body
+
+for.body:
+; CHECK: for.body:
+; CHECK:  %va = load atomic i32, i32* %buf0_a unordered, align 4
+; CHECK-NEXT:  %vb = load atomic i32, i32* %buf0_b unordered, align 4
+; CHECK-NEXT:  fence seq_cst
+; CHECK-NEXT:  store atomic i32 %va, i32* %buf1_a unordered, align 4
+; CHECK-NEXT:  store atomic i32 %vb, i32* %buf1_b unordered, align 4
+
+  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add i32 %indvars.iv, 2
+  %indvars.mid = add i32 %indvars.iv, 1
+  %buf0_a = getelementptr i32, i32* %buf_0, i32 %indvars.iv
+  %buf0_b = getelementptr i32, i32* %buf_0, i32 %indvars.mid
+  %buf1_a = getelementptr i32, i32* %buf_1, i32 %indvars.iv
+  %buf1_b = getelementptr i32, i32* %buf_1, i32 %indvars.mid
+  %va = load atomic i32, i32* %buf0_a unordered, align 4
+  %vb = load atomic i32, i32* %buf0_b unordered, align 4
+  fence seq_cst
+  store atomic i32 %va, i32* %buf1_a unordered, align 4
+  store atomic i32 %vb, i32* %buf1_b unordered, align 4
+  %cmp = icmp slt i32 %indvars.iv.next, 3200
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
 attributes #0 = { nounwind uwtable }
 attributes #1 = { nounwind }