Revert "Temporarily Revert "Add basic loop fusion pass.""
The reversion apparently deleted the test/Transforms directory.
Will be re-reverting again.
llvm-svn: 358552
diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/kryo-large-stride.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/kryo-large-stride.ll
new file mode 100644
index 0000000..22cf6a9
--- /dev/null
+++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/kryo-large-stride.ll
@@ -0,0 +1,53 @@
+; RUN: opt -mcpu=kryo -mtriple=aarch64-gnu-linux -loop-data-prefetch -max-prefetch-iters-ahead=1000 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=kryo -mtriple=aarch64-gnu-linux -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=kryo -mtriple=aarch64-gnu-linux -passes=loop-data-prefetch -max-prefetch-iters-ahead=1000 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=kryo -mtriple=aarch64-gnu-linux -passes=loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+
+; ALL-LABEL: @small_stride(
+define void @small_stride(double* nocapture %a, double* nocapture readonly %b) {
+entry:
+ br label %for.body
+
+; ALL: for.body:
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+; ALL-NOT: call void @llvm.prefetch
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.end, label %for.body
+
+; ALL: for.end:
+for.end: ; preds = %for.body
+ ret void
+}
+
+; ALL-LABEL: @large_stride(
+define void @large_stride(double* nocapture %a, double* nocapture readonly %b) {
+entry:
+ br label %for.body
+
+; ALL: for.body:
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+; LARGE_PREFETCH: call void @llvm.prefetch
+; NO_LARGE_PREFETCH-NOT: call void @llvm.prefetch
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 150
+ %exitcond = icmp eq i64 %indvars.iv.next, 160000
+ br i1 %exitcond, label %for.end, label %for.body
+
+; ALL: for.end:
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
new file mode 100644
index 0000000..fe956a8
--- /dev/null
+++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
@@ -0,0 +1,55 @@
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -max-prefetch-iters-ahead=100 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -passes=loop-data-prefetch -max-prefetch-iters-ahead=100 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -passes=loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -passes=loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+
+; ALL-LABEL: @small_stride(
+define void @small_stride(double* nocapture %a, double* nocapture readonly %b) {
+entry:
+ br label %for.body
+
+; ALL: for.body:
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+; ALL-NOT: call void @llvm.prefetch
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.end, label %for.body
+
+; ALL: for.end:
+for.end: ; preds = %for.body
+ ret void
+}
+
+; ALL-LABEL: @large_stride(
+define void @large_stride(double* nocapture %a, double* nocapture readonly %b) {
+entry:
+ br label %for.body
+
+; ALL: for.body:
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+; LARGE_PREFETCH: call void @llvm.prefetch
+; NO_LARGE_PREFETCH-NOT: call void @llvm.prefetch
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 300
+ %exitcond = icmp eq i64 %indvars.iv.next, 160000
+ br i1 %exitcond, label %for.end, label %for.body
+
+; ALL: for.end:
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg b/llvm/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg
new file mode 100644
index 0000000..675f48e
--- /dev/null
+++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll']
+
+if not 'AArch64' in config.root.targets:
+ config.unsupported = True
diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark-with-hotness.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark-with-hotness.ll
new file mode 100644
index 0000000..6149119
--- /dev/null
+++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark-with-hotness.ll
@@ -0,0 +1,86 @@
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch \
+; RUN: -pass-remarks=loop-data-prefetch -S -max-prefetch-iters-ahead=100 \
+; RUN: -pass-remarks-with-hotness \
+; RUN: < %s 2>&1 | FileCheck %s
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -passes=loop-data-prefetch \
+; RUN: -pass-remarks=loop-data-prefetch -S -max-prefetch-iters-ahead=100 \
+; RUN: -pass-remarks-with-hotness \
+; RUN: < %s 2>&1 | FileCheck %s
+
+; ModuleID = '/tmp/s.c'
+source_filename = "/tmp/s.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; 1 struct MyStruct {
+; 2 int field;
+; 3 char kk[2044];
+; 4 } *my_struct;
+; 5
+; 6 int f(struct MyStruct *p, int N) {
+; 7 int total = 0;
+; 8 for (int i = 0; i < N; i++) {
+; 9 total += my_struct[i].field;
+; 10 }
+; 11 return total;
+; 12 }
+
+; CHECK: remark: /tmp/s.c:9:27: prefetched memory access (hotness: 600)
+
+%struct.MyStruct = type { i32, [2044 x i8] }
+
+@my_struct = common global %struct.MyStruct* null, align 8
+
+define i32 @f(%struct.MyStruct* nocapture readnone %p, i32 %N) !dbg !6 !prof !21 {
+entry:
+ %cmp6 = icmp sgt i32 %N, 0, !dbg !8
+ br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup, !dbg !9, !prof !22
+
+for.body.lr.ph: ; preds = %entry
+ %0 = load %struct.MyStruct*, %struct.MyStruct** @my_struct, align 8, !dbg !10, !tbaa !11
+ br label %for.body, !dbg !9
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %total.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ ret i32 %total.0.lcssa, !dbg !15
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %total.07 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+ %field = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %0, i64 %indvars.iv, i32 0, !dbg !16
+ %1 = load i32, i32* %field, align 4, !dbg !16, !tbaa !17
+ %add = add nsw i32 %1, %total.07, !dbg !20
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !9
+ %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !9
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9, !prof !23
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "/tmp/s.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"PIC Level", i32 2}
+!5 = !{!"clang version 3.9.0"}
+!6 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 6, type: !7, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 8, column: 21, scope: !6)
+!9 = !DILocation(line: 8, column: 3, scope: !6)
+!10 = !DILocation(line: 9, column: 14, scope: !6)
+!11 = !{!12, !12, i64 0}
+!12 = !{!"any pointer", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !DILocation(line: 11, column: 3, scope: !6)
+!16 = !DILocation(line: 9, column: 27, scope: !6)
+!17 = !{!18, !19, i64 0}
+!18 = !{!"MyStruct", !19, i64 0, !13, i64 4}
+!19 = !{!"int", !13, i64 0}
+!20 = !DILocation(line: 9, column: 11, scope: !6)
+!21 = !{!"function_entry_count", i64 6}
+!22 = !{!"branch_weights", i32 99, i32 1}
+!23 = !{!"branch_weights", i32 1, i32 99}
diff --git a/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark.ll b/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark.ll
new file mode 100644
index 0000000..e7d8f5a
--- /dev/null
+++ b/llvm/test/Transforms/LoopDataPrefetch/AArch64/opt-remark.ll
@@ -0,0 +1,81 @@
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch \
+; RUN: -pass-remarks=loop-data-prefetch -S -max-prefetch-iters-ahead=100 \
+; RUN: < %s 2>&1 | FileCheck %s
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -passes=loop-data-prefetch \
+; RUN: -pass-remarks=loop-data-prefetch -S -max-prefetch-iters-ahead=100 \
+; RUN: < %s 2>&1 | FileCheck %s
+
+; ModuleID = '/tmp/s.c'
+source_filename = "/tmp/s.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; 1 struct MyStruct {
+; 2 int field;
+; 3 char kk[2044];
+; 4 } *my_struct;
+; 5
+; 6 int f(struct MyStruct *p, int N) {
+; 7 int total = 0;
+; 8 for (int i = 0; i < N; i++) {
+; 9 total += my_struct[i].field;
+; 10 }
+; 11 return total;
+; 12 }
+
+; CHECK: remark: /tmp/s.c:9:27: prefetched memory access
+
+%struct.MyStruct = type { i32, [2044 x i8] }
+
+@my_struct = common global %struct.MyStruct* null, align 8
+
+define i32 @f(%struct.MyStruct* nocapture readnone %p, i32 %N) !dbg !6 {
+entry:
+ %cmp6 = icmp sgt i32 %N, 0, !dbg !8
+ br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup, !dbg !9
+
+for.body.lr.ph: ; preds = %entry
+ %0 = load %struct.MyStruct*, %struct.MyStruct** @my_struct, align 8, !dbg !10, !tbaa !11
+ br label %for.body, !dbg !9
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %total.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ ret i32 %total.0.lcssa, !dbg !15
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %total.07 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+ %field = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %0, i64 %indvars.iv, i32 0, !dbg !16
+ %1 = load i32, i32* %field, align 4, !dbg !16, !tbaa !17
+ %add = add nsw i32 %1, %total.07, !dbg !20
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !9
+ %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !9
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "/tmp/s.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"PIC Level", i32 2}
+!5 = !{!"clang version 3.9.0"}
+!6 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 6, type: !7, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 8, column: 21, scope: !6)
+!9 = !DILocation(line: 8, column: 3, scope: !6)
+!10 = !DILocation(line: 9, column: 14, scope: !6)
+!11 = !{!12, !12, i64 0}
+!12 = !{!"any pointer", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !DILocation(line: 11, column: 3, scope: !6)
+!16 = !DILocation(line: 9, column: 27, scope: !6)
+!17 = !{!18, !19, i64 0}
+!18 = !{!"MyStruct", !19, i64 0, !13, i64 4}
+!19 = !{!"int", !13, i64 0}
+!20 = !DILocation(line: 9, column: 11, scope: !6)
diff --git a/llvm/test/Transforms/LoopDataPrefetch/PowerPC/basic.ll b/llvm/test/Transforms/LoopDataPrefetch/PowerPC/basic.ll
new file mode 100644
index 0000000..ea46fd0
--- /dev/null
+++ b/llvm/test/Transforms/LoopDataPrefetch/PowerPC/basic.ll
@@ -0,0 +1,26 @@
+; RUN: opt -mcpu=a2 -loop-data-prefetch -S < %s | FileCheck %s
+; RUN: opt -mcpu=a2 -passes=loop-data-prefetch -S < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define void @foo(double* nocapture %a, double* nocapture readonly %b) {
+entry:
+ br label %for.body
+
+; CHECK: for.body:
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+; CHECK: call void @llvm.prefetch
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: for.end:
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopDataPrefetch/PowerPC/lit.local.cfg b/llvm/test/Transforms/LoopDataPrefetch/PowerPC/lit.local.cfg
new file mode 100644
index 0000000..0913324
--- /dev/null
+++ b/llvm/test/Transforms/LoopDataPrefetch/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'PowerPC' in config.root.targets:
+ config.unsupported = True