blob: cd1935645fc1bf013096115b7bdc8ad4a942b18e [file] [log] [blame]
Tobias Grosser75805372011-04-29 06:27:02 +00001; ModuleID = 'parallel_loop.s'
2; RUN: opt %loadPolly %defaultOpts -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck %s
Tobias Grosser04eadc42012-02-13 12:29:43 +00003; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=%S -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck -check-prefix=IMPORT %s
4; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=%S -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck -check-prefix=IMPORT %s
5; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-postfix=tiled -polly-import-jscop-dir=%S -polly-cloog -polly-codegen -enable-polly-openmp -analyze -disable-polly-legality < %s | FileCheck -check-prefix=TILED %s
Tobias Grosser75805372011-04-29 06:27:02 +00006target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
7target triple = "x86_64-unknown-linux-gnu"
8
9@A = common global [1024 x [1024 x float]] zeroinitializer, align 16
10@B = common global [1024 x [1024 x float]] zeroinitializer, align 16
11@C = common global [1024 x [1024 x float]] zeroinitializer, align 16
12@X = common global [1024 x float] zeroinitializer, align 16
13
14define float @parallel_loop() nounwind {
15bb:
16 br label %bb18
17
18bb18: ; preds = %bb32, %bb
19 %indvar9 = phi i64 [ %indvar.next10, %bb32 ], [ 0, %bb ]
20 %exitcond15 = icmp ne i64 %indvar9, 1024
21 br i1 %exitcond15, label %bb19, label %bb33
22
23bb19: ; preds = %bb18
24 br label %bb20
25
26bb20: ; preds = %bb30, %bb19
27 %indvar6 = phi i64 [ %indvar.next7, %bb30 ], [ 0, %bb19 ]
28 %scevgep14 = getelementptr [1024 x [1024 x float]]* @C, i64 0, i64 %indvar9, i64 %indvar6
29 %exitcond12 = icmp ne i64 %indvar6, 1024
30 br i1 %exitcond12, label %bb21, label %bb31
31
32bb21: ; preds = %bb20
33 br label %bb22
34
35bb22: ; preds = %bb28, %bb21
36 %indvar3 = phi i64 [ %indvar.next4, %bb28 ], [ 0, %bb21 ]
37 %scevgep11 = getelementptr [1024 x [1024 x float]]* @A, i64 0, i64 %indvar9, i64 %indvar3
38 %scevgep8 = getelementptr [1024 x [1024 x float]]* @B, i64 0, i64 %indvar3, i64 %indvar6
39 %exitcond5 = icmp ne i64 %indvar3, 1024
40 br i1 %exitcond5, label %bb23, label %bb29
41
42bb23: ; preds = %bb22
43 %tmp = load float* %scevgep11, align 4
44 %tmp24 = load float* %scevgep8, align 4
45 %tmp25 = fmul float %tmp, %tmp24
46 %tmp26 = load float* %scevgep14, align 4
47 %tmp27 = fadd float %tmp26, %tmp25
48 store float %tmp27, float* %scevgep14, align 4
49 br label %bb28
50
51bb28: ; preds = %bb23
52 %indvar.next4 = add i64 %indvar3, 1
53 br label %bb22
54
55bb29: ; preds = %bb22
56 br label %bb30
57
58bb30: ; preds = %bb29
59 %indvar.next7 = add i64 %indvar6, 1
60 br label %bb20
61
62bb31: ; preds = %bb20
63 br label %bb32
64
65bb32: ; preds = %bb31
66 %indvar.next10 = add i64 %indvar9, 1
67 br label %bb18
68
69bb33: ; preds = %bb18
70 br label %bb34
71
72bb34: ; preds = %bb48, %bb33
73 %i.1 = phi i32 [ 0, %bb33 ], [ %tmp49, %bb48 ]
74 %exitcond2 = icmp ne i32 %i.1, 1024
75 br i1 %exitcond2, label %bb35, label %bb50
76
77bb35: ; preds = %bb34
78 br label %bb36
79
80bb36: ; preds = %bb45, %bb35
81 %j.1 = phi i32 [ 0, %bb35 ], [ %tmp46, %bb45 ]
82 %exitcond1 = icmp ne i32 %j.1, 1024
83 br i1 %exitcond1, label %bb37, label %bb47
84
85bb37: ; preds = %bb36
86 br label %bb38
87
88bb38: ; preds = %bb43, %bb37
89 %indvar = phi i64 [ %indvar.next, %bb43 ], [ 0, %bb37 ]
90 %scevgep = getelementptr [1024 x float]* @X, i64 0, i64 %indvar
91 %exitcond = icmp ne i64 %indvar, 1024
92 br i1 %exitcond, label %bb39, label %bb44
93
94bb39: ; preds = %bb38
95 %tmp40 = load float* %scevgep, align 4
96 %tmp41 = load float* %scevgep, align 4
97 %tmp42 = fadd float %tmp41, %tmp40
98 store float %tmp42, float* %scevgep, align 4
99 br label %bb43
100
101bb43: ; preds = %bb39
102 %indvar.next = add i64 %indvar, 1
103 br label %bb38
104
105bb44: ; preds = %bb38
106 br label %bb45
107
108bb45: ; preds = %bb44
109 %tmp46 = add nsw i32 %j.1, 1
110 br label %bb36
111
112bb47: ; preds = %bb36
113 br label %bb48
114
115bb48: ; preds = %bb47
116 %tmp49 = add nsw i32 %i.1, 1
117 br label %bb34
118
119bb50: ; preds = %bb34
120 %tmp51 = load float* getelementptr inbounds ([1024 x [1024 x float]]* @C, i64 0, i64 42, i64 42), align 8
121 %tmp52 = load float* getelementptr inbounds ([1024 x float]* @X, i64 0, i64 42), align 8
122 %tmp53 = fadd float %tmp51, %tmp52
123 ret float %tmp53
124}
125
126; CHECK: for (c2=0;c2<=1023;c2++) {
127; CHECK: for (c4=0;c4<=1023;c4++) {
128; CHECK: for (c6=0;c6<=1023;c6++) {
129; CHECK: Stmt_bb23(c2,c4,c6);
130; CHECK: }
131; CHECK: }
132; CHECK: }
133; CHECK: for (c2=0;c2<=1023;c2++) {
134; CHECK: for (c4=0;c4<=1023;c4++) {
135; CHECK: for (c6=0;c6<=1023;c6++) {
136; CHECK: Stmt_bb39(c2,c4,c6);
137; CHECK: }
138; CHECK: }
139; CHECK: }
140; CHECK: Parallel loop with iterator 'c2' generated
141; CHECK: Parallel loop with iterator 'c6' generated
142; CHECK-NOT: Parallel loop
143
144
145; IMPORT: for (c2=0;c2<=1023;c2++) {
146; IMPORT: for (c4=0;c4<=1023;c4++) {
147; IMPORT: for (c6=0;c6<=1023;c6++) {
148; IMPORT: Stmt_bb23(c2,c4,c6);
149; IMPORT: Stmt_bb39(c2,c4,c6);
150; IMPORT: }
151; IMPORT: }
152; IMPORT: }
153; IMPORT-NOT: Parallel loop
154
155; TILED: for (c2=0;c2<=1023;c2+=4) {
156; TILED: for (c4=0;c4<=1023;c4+=4) {
157; TILED: for (c6=0;c6<=1023;c6+=4) {
158; TILED: for (c8=c2;c8<=c2+3;c8++) {
159; TILED: for (c9=c4;c9<=c4+3;c9++) {
160; TILED: for (c10=c6;c10<=c6+3;c10++) {
161; TILED: Stmt_bb23(c8,c9,c10);
162; TILED: }
163; TILED: }
164; TILED: }
165; TILED: }
166; TILED: }
167; TILED: }
168; TILED: for (c2=0;c2<=1023;c2+=4) {
169; TILED: for (c4=0;c4<=1023;c4+=4) {
170; TILED: for (c6=0;c6<=1023;c6+=4) {
171; TILED: for (c8=c2;c8<=c2+3;c8++) {
172; TILED: for (c9=c4;c9<=c4+3;c9++) {
173; TILED: for (c10=c6;c10<=c6+3;c10++) {
174; TILED: Stmt_bb39(c8,c9,c10);
175; TILED: }
176; TILED: }
177; TILED: }
178; TILED: }
179; TILED: }
180; TILED: }
181; I am not sure if we actually may have parallel loops here. The dependency
182; analysis does not detect any. This may however be because we do not
183; correctly update the imported schedule. Add a check that hopefully fails
184; after this is corrected. Or someone proves there are no parallel loops and
185; we can remove this comment.
186; TILDED-NOT: Parallel loop