Tobias Grosser | 7580537 | 2011-04-29 06:27:02 +0000 | [diff] [blame] | 1 | ; ModuleID = 'parallel_loop.s' |
| 2 | ; RUN: opt %loadPolly %defaultOpts -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck %s |
Tobias Grosser | 04eadc4 | 2012-02-13 12:29:43 +0000 | [diff] [blame] | 3 | ; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=%S -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck -check-prefix=IMPORT %s |
| 4 | ; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-dir=%S -polly-cloog -polly-codegen -enable-polly-openmp -analyze < %s | FileCheck -check-prefix=IMPORT %s |
| 5 | ; RUN: opt %loadPolly %defaultOpts -polly-import-jscop -polly-import-jscop-postfix=tiled -polly-import-jscop-dir=%S -polly-cloog -polly-codegen -enable-polly-openmp -analyze -disable-polly-legality < %s | FileCheck -check-prefix=TILED %s |
Tobias Grosser | 7580537 | 2011-04-29 06:27:02 +0000 | [diff] [blame] | 6 | target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" |
| 7 | target triple = "x86_64-unknown-linux-gnu" |
| 8 | |
| 9 | @A = common global [1024 x [1024 x float]] zeroinitializer, align 16 |
| 10 | @B = common global [1024 x [1024 x float]] zeroinitializer, align 16 |
| 11 | @C = common global [1024 x [1024 x float]] zeroinitializer, align 16 |
| 12 | @X = common global [1024 x float] zeroinitializer, align 16 |
| 13 | |
| 14 | define float @parallel_loop() nounwind { |
| 15 | bb: |
| 16 | br label %bb18 |
| 17 | |
| 18 | bb18: ; preds = %bb32, %bb |
| 19 | %indvar9 = phi i64 [ %indvar.next10, %bb32 ], [ 0, %bb ] |
| 20 | %exitcond15 = icmp ne i64 %indvar9, 1024 |
| 21 | br i1 %exitcond15, label %bb19, label %bb33 |
| 22 | |
| 23 | bb19: ; preds = %bb18 |
| 24 | br label %bb20 |
| 25 | |
| 26 | bb20: ; preds = %bb30, %bb19 |
| 27 | %indvar6 = phi i64 [ %indvar.next7, %bb30 ], [ 0, %bb19 ] |
| 28 | %scevgep14 = getelementptr [1024 x [1024 x float]]* @C, i64 0, i64 %indvar9, i64 %indvar6 |
| 29 | %exitcond12 = icmp ne i64 %indvar6, 1024 |
| 30 | br i1 %exitcond12, label %bb21, label %bb31 |
| 31 | |
| 32 | bb21: ; preds = %bb20 |
| 33 | br label %bb22 |
| 34 | |
| 35 | bb22: ; preds = %bb28, %bb21 |
| 36 | %indvar3 = phi i64 [ %indvar.next4, %bb28 ], [ 0, %bb21 ] |
| 37 | %scevgep11 = getelementptr [1024 x [1024 x float]]* @A, i64 0, i64 %indvar9, i64 %indvar3 |
| 38 | %scevgep8 = getelementptr [1024 x [1024 x float]]* @B, i64 0, i64 %indvar3, i64 %indvar6 |
| 39 | %exitcond5 = icmp ne i64 %indvar3, 1024 |
| 40 | br i1 %exitcond5, label %bb23, label %bb29 |
| 41 | |
| 42 | bb23: ; preds = %bb22 |
| 43 | %tmp = load float* %scevgep11, align 4 |
| 44 | %tmp24 = load float* %scevgep8, align 4 |
| 45 | %tmp25 = fmul float %tmp, %tmp24 |
| 46 | %tmp26 = load float* %scevgep14, align 4 |
| 47 | %tmp27 = fadd float %tmp26, %tmp25 |
| 48 | store float %tmp27, float* %scevgep14, align 4 |
| 49 | br label %bb28 |
| 50 | |
| 51 | bb28: ; preds = %bb23 |
| 52 | %indvar.next4 = add i64 %indvar3, 1 |
| 53 | br label %bb22 |
| 54 | |
| 55 | bb29: ; preds = %bb22 |
| 56 | br label %bb30 |
| 57 | |
| 58 | bb30: ; preds = %bb29 |
| 59 | %indvar.next7 = add i64 %indvar6, 1 |
| 60 | br label %bb20 |
| 61 | |
| 62 | bb31: ; preds = %bb20 |
| 63 | br label %bb32 |
| 64 | |
| 65 | bb32: ; preds = %bb31 |
| 66 | %indvar.next10 = add i64 %indvar9, 1 |
| 67 | br label %bb18 |
| 68 | |
| 69 | bb33: ; preds = %bb18 |
| 70 | br label %bb34 |
| 71 | |
| 72 | bb34: ; preds = %bb48, %bb33 |
| 73 | %i.1 = phi i32 [ 0, %bb33 ], [ %tmp49, %bb48 ] |
| 74 | %exitcond2 = icmp ne i32 %i.1, 1024 |
| 75 | br i1 %exitcond2, label %bb35, label %bb50 |
| 76 | |
| 77 | bb35: ; preds = %bb34 |
| 78 | br label %bb36 |
| 79 | |
| 80 | bb36: ; preds = %bb45, %bb35 |
| 81 | %j.1 = phi i32 [ 0, %bb35 ], [ %tmp46, %bb45 ] |
| 82 | %exitcond1 = icmp ne i32 %j.1, 1024 |
| 83 | br i1 %exitcond1, label %bb37, label %bb47 |
| 84 | |
| 85 | bb37: ; preds = %bb36 |
| 86 | br label %bb38 |
| 87 | |
| 88 | bb38: ; preds = %bb43, %bb37 |
| 89 | %indvar = phi i64 [ %indvar.next, %bb43 ], [ 0, %bb37 ] |
| 90 | %scevgep = getelementptr [1024 x float]* @X, i64 0, i64 %indvar |
| 91 | %exitcond = icmp ne i64 %indvar, 1024 |
| 92 | br i1 %exitcond, label %bb39, label %bb44 |
| 93 | |
| 94 | bb39: ; preds = %bb38 |
| 95 | %tmp40 = load float* %scevgep, align 4 |
| 96 | %tmp41 = load float* %scevgep, align 4 |
| 97 | %tmp42 = fadd float %tmp41, %tmp40 |
| 98 | store float %tmp42, float* %scevgep, align 4 |
| 99 | br label %bb43 |
| 100 | |
| 101 | bb43: ; preds = %bb39 |
| 102 | %indvar.next = add i64 %indvar, 1 |
| 103 | br label %bb38 |
| 104 | |
| 105 | bb44: ; preds = %bb38 |
| 106 | br label %bb45 |
| 107 | |
| 108 | bb45: ; preds = %bb44 |
| 109 | %tmp46 = add nsw i32 %j.1, 1 |
| 110 | br label %bb36 |
| 111 | |
| 112 | bb47: ; preds = %bb36 |
| 113 | br label %bb48 |
| 114 | |
| 115 | bb48: ; preds = %bb47 |
| 116 | %tmp49 = add nsw i32 %i.1, 1 |
| 117 | br label %bb34 |
| 118 | |
| 119 | bb50: ; preds = %bb34 |
| 120 | %tmp51 = load float* getelementptr inbounds ([1024 x [1024 x float]]* @C, i64 0, i64 42, i64 42), align 8 |
| 121 | %tmp52 = load float* getelementptr inbounds ([1024 x float]* @X, i64 0, i64 42), align 8 |
| 122 | %tmp53 = fadd float %tmp51, %tmp52 |
| 123 | ret float %tmp53 |
| 124 | } |
| 125 | |
| 126 | ; CHECK: for (c2=0;c2<=1023;c2++) { |
| 127 | ; CHECK: for (c4=0;c4<=1023;c4++) { |
| 128 | ; CHECK: for (c6=0;c6<=1023;c6++) { |
| 129 | ; CHECK: Stmt_bb23(c2,c4,c6); |
| 130 | ; CHECK: } |
| 131 | ; CHECK: } |
| 132 | ; CHECK: } |
| 133 | ; CHECK: for (c2=0;c2<=1023;c2++) { |
| 134 | ; CHECK: for (c4=0;c4<=1023;c4++) { |
| 135 | ; CHECK: for (c6=0;c6<=1023;c6++) { |
| 136 | ; CHECK: Stmt_bb39(c2,c4,c6); |
| 137 | ; CHECK: } |
| 138 | ; CHECK: } |
| 139 | ; CHECK: } |
| 140 | ; CHECK: Parallel loop with iterator 'c2' generated |
| 141 | ; CHECK: Parallel loop with iterator 'c6' generated |
| 142 | ; CHECK-NOT: Parallel loop |
| 143 | |
| 144 | |
| 145 | ; IMPORT: for (c2=0;c2<=1023;c2++) { |
| 146 | ; IMPORT: for (c4=0;c4<=1023;c4++) { |
| 147 | ; IMPORT: for (c6=0;c6<=1023;c6++) { |
| 148 | ; IMPORT: Stmt_bb23(c2,c4,c6); |
| 149 | ; IMPORT: Stmt_bb39(c2,c4,c6); |
| 150 | ; IMPORT: } |
| 151 | ; IMPORT: } |
| 152 | ; IMPORT: } |
| 153 | ; IMPORT-NOT: Parallel loop |
| 154 | |
| 155 | ; TILED: for (c2=0;c2<=1023;c2+=4) { |
| 156 | ; TILED: for (c4=0;c4<=1023;c4+=4) { |
| 157 | ; TILED: for (c6=0;c6<=1023;c6+=4) { |
| 158 | ; TILED: for (c8=c2;c8<=c2+3;c8++) { |
| 159 | ; TILED: for (c9=c4;c9<=c4+3;c9++) { |
| 160 | ; TILED: for (c10=c6;c10<=c6+3;c10++) { |
| 161 | ; TILED: Stmt_bb23(c8,c9,c10); |
| 162 | ; TILED: } |
| 163 | ; TILED: } |
| 164 | ; TILED: } |
| 165 | ; TILED: } |
| 166 | ; TILED: } |
| 167 | ; TILED: } |
| 168 | ; TILED: for (c2=0;c2<=1023;c2+=4) { |
| 169 | ; TILED: for (c4=0;c4<=1023;c4+=4) { |
| 170 | ; TILED: for (c6=0;c6<=1023;c6+=4) { |
| 171 | ; TILED: for (c8=c2;c8<=c2+3;c8++) { |
| 172 | ; TILED: for (c9=c4;c9<=c4+3;c9++) { |
| 173 | ; TILED: for (c10=c6;c10<=c6+3;c10++) { |
| 174 | ; TILED: Stmt_bb39(c8,c9,c10); |
| 175 | ; TILED: } |
| 176 | ; TILED: } |
| 177 | ; TILED: } |
| 178 | ; TILED: } |
| 179 | ; TILED: } |
| 180 | ; TILED: } |
| 181 | ; I am not sure if we actually may have parallel loops here. The dependency |
| 182 | ; analysis does not detect any. This may however be because we do not |
| 183 | ; correctly update the imported schedule. Add a check that hopefully fails |
| 184 | ; after this is corrected. Or someone proves there are no parallel loops and |
| 185 | ; we can remove this comment. |
| 186 | ; TILDED-NOT: Parallel loop |