David Green | 963401d | 2018-07-01 12:47:30 +0000 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| 2 | ; RUN: opt -basicaa -tbaa -loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s |
| 3 | |
| 4 | target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" |
| 5 | |
| 6 | ; CHECK-LABEL: test1 |
| 7 | ; Tests for(i) { sum = 0; for(j) sum += B[j]; A[i] = sum; } |
| 8 | ; CHECK-NEXT: entry: |
| 9 | ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[J:%.*]], 0 |
| 10 | ; CHECK-NEXT: [[CMPJ:%.*]] = icmp ne i32 [[I:%.*]], 0 |
| 11 | ; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP]], [[CMPJ]] |
| 12 | ; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_OUTER_PREHEADER:%.*]], label [[FOR_END:%.*]] |
| 13 | ; CHECK: for.outer.preheader: |
| 14 | ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[I]], -1 |
| 15 | ; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[I]], 3 |
| 16 | ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 |
| 17 | ; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_OUTER_PREHEADER_NEW:%.*]] |
| 18 | ; CHECK: for.outer.preheader.new: |
| 19 | ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[I]], [[XTRAITER]] |
| 20 | ; CHECK-NEXT: br label [[FOR_OUTER:%.*]] |
| 21 | ; CHECK: for.outer: |
| 22 | ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[ADD8_3:%.*]], [[FOR_LATCH:%.*]] ], [ 0, [[FOR_OUTER_PREHEADER_NEW]] ] |
| 23 | ; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ [[UNROLL_ITER]], [[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[FOR_LATCH]] ] |
| 24 | ; CHECK-NEXT: [[ADD8:%.*]] = add nuw nsw i32 [[I]], 1 |
| 25 | ; CHECK-NEXT: [[NITER_NSUB:%.*]] = sub i32 [[NITER]], 1 |
| 26 | ; CHECK-NEXT: [[ADD8_1:%.*]] = add nuw nsw i32 [[ADD8]], 1 |
| 27 | ; CHECK-NEXT: [[NITER_NSUB_1:%.*]] = sub i32 [[NITER_NSUB]], 1 |
| 28 | ; CHECK-NEXT: [[ADD8_2:%.*]] = add nuw nsw i32 [[ADD8_1]], 1 |
| 29 | ; CHECK-NEXT: [[NITER_NSUB_2:%.*]] = sub i32 [[NITER_NSUB_1]], 1 |
| 30 | ; CHECK-NEXT: [[ADD8_3]] = add nuw i32 [[ADD8_2]], 1 |
| 31 | ; CHECK-NEXT: [[NITER_NSUB_3]] = sub i32 [[NITER_NSUB_2]], 1 |
| 32 | ; CHECK-NEXT: br label [[FOR_INNER:%.*]] |
| 33 | ; CHECK: for.inner: |
| 34 | ; CHECK-NEXT: [[J_0:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ] |
| 35 | ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD:%.*]], [[FOR_INNER]] ] |
| 36 | ; CHECK-NEXT: [[J_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_1:%.*]], [[FOR_INNER]] ] |
| 37 | ; CHECK-NEXT: [[SUM_1:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_1:%.*]], [[FOR_INNER]] ] |
| 38 | ; CHECK-NEXT: [[J_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_2:%.*]], [[FOR_INNER]] ] |
| 39 | ; CHECK-NEXT: [[SUM_2:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_2:%.*]], [[FOR_INNER]] ] |
| 40 | ; CHECK-NEXT: [[J_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC_3:%.*]], [[FOR_INNER]] ] |
| 41 | ; CHECK-NEXT: [[SUM_3:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD_3:%.*]], [[FOR_INNER]] ] |
| 42 | ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[J_0]] |
| 43 | ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !tbaa !0 |
| 44 | ; CHECK-NEXT: [[ADD]] = add i32 [[TMP2]], [[SUM]] |
| 45 | ; CHECK-NEXT: [[INC]] = add nuw i32 [[J_0]], 1 |
| 46 | ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_1]] |
| 47 | ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4, !tbaa !0 |
| 48 | ; CHECK-NEXT: [[ADD_1]] = add i32 [[TMP3]], [[SUM_1]] |
| 49 | ; CHECK-NEXT: [[INC_1]] = add nuw i32 [[J_1]], 1 |
| 50 | ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_2]] |
| 51 | ; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4, !tbaa !0 |
| 52 | ; CHECK-NEXT: [[ADD_2]] = add i32 [[TMP4]], [[SUM_2]] |
| 53 | ; CHECK-NEXT: [[INC_2]] = add nuw i32 [[J_2]], 1 |
| 54 | ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_3]] |
| 55 | ; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4, !tbaa !0 |
| 56 | ; CHECK-NEXT: [[ADD_3]] = add i32 [[TMP5]], [[SUM_3]] |
| 57 | ; CHECK-NEXT: [[INC_3]] = add nuw i32 [[J_3]], 1 |
| 58 | ; CHECK-NEXT: [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[J]] |
| 59 | ; CHECK-NEXT: br i1 [[EXITCOND_3]], label [[FOR_LATCH]], label [[FOR_INNER]] |
| 60 | ; CHECK: for.latch: |
| 61 | ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INNER]] ] |
| 62 | ; CHECK-NEXT: [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], [[FOR_INNER]] ] |
| 63 | ; CHECK-NEXT: [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], [[FOR_INNER]] ] |
| 64 | ; CHECK-NEXT: [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], [[FOR_INNER]] ] |
| 65 | ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I]] |
| 66 | ; CHECK-NEXT: store i32 [[ADD_LCSSA]], i32* [[ARRAYIDX6]], align 4, !tbaa !0 |
| 67 | ; CHECK-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8]] |
| 68 | ; CHECK-NEXT: store i32 [[ADD_LCSSA_1]], i32* [[ARRAYIDX6_1]], align 4, !tbaa !0 |
| 69 | ; CHECK-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_1]] |
| 70 | ; CHECK-NEXT: store i32 [[ADD_LCSSA_2]], i32* [[ARRAYIDX6_2]], align 4, !tbaa !0 |
| 71 | ; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_2]] |
| 72 | ; CHECK-NEXT: store i32 [[ADD_LCSSA_3]], i32* [[ARRAYIDX6_3]], align 4, !tbaa !0 |
| 73 | ; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NSUB_3]], 0 |
| 74 | ; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[FOR_OUTER]], !llvm.loop !4 |
| 75 | ; CHECK: for.end.loopexit.unr-lcssa.loopexit: |
| 76 | ; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[ADD8_3]], [[FOR_LATCH]] ] |
| 77 | ; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_UNR_LCSSA]] |
| 78 | ; CHECK: for.end.loopexit.unr-lcssa: |
| 79 | ; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ 0, [[FOR_OUTER_PREHEADER]] ], [ [[I_UNR_PH]], [[FOR_END_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] |
| 80 | ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 |
| 81 | ; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_OUTER_EPIL_PREHEADER:%.*]], label [[FOR_END_LOOPEXIT:%.*]] |
| 82 | ; CHECK: for.outer.epil.preheader: |
| 83 | ; CHECK-NEXT: br label [[FOR_OUTER_EPIL:%.*]] |
| 84 | ; CHECK: for.outer.epil: |
| 85 | ; CHECK-NEXT: br label [[FOR_INNER_EPIL:%.*]] |
| 86 | ; CHECK: for.inner.epil: |
| 87 | ; CHECK-NEXT: [[J_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], [[FOR_INNER_EPIL]] ] |
| 88 | ; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], [[FOR_INNER_EPIL]] ] |
| 89 | ; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_EPIL]] |
| 90 | ; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4, !tbaa !0 |
| 91 | ; CHECK-NEXT: [[ADD_EPIL]] = add i32 [[TMP6]], [[SUM_EPIL]] |
| 92 | ; CHECK-NEXT: [[INC_EPIL]] = add nuw i32 [[J_EPIL]], 1 |
| 93 | ; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[INC_EPIL]], [[J]] |
| 94 | ; CHECK-NEXT: br i1 [[EXITCOND_EPIL]], label [[FOR_LATCH_EPIL:%.*]], label [[FOR_INNER_EPIL]] |
| 95 | ; CHECK: for.latch.epil: |
| 96 | ; CHECK-NEXT: [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_INNER_EPIL]] ] |
| 97 | ; CHECK-NEXT: [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_UNR]] |
| 98 | ; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL]], i32* [[ARRAYIDX6_EPIL]], align 4, !tbaa !0 |
| 99 | ; CHECK-NEXT: [[ADD8_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1 |
| 100 | ; CHECK-NEXT: [[EPIL_ITER_SUB:%.*]] = sub i32 [[XTRAITER]], 1 |
| 101 | ; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 [[EPIL_ITER_SUB]], 0 |
| 102 | ; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_OUTER_EPIL_1:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA:%.*]] |
| 103 | ; CHECK: for.end.loopexit.epilog-lcssa: |
| 104 | ; CHECK-NEXT: br label [[FOR_END_LOOPEXIT]] |
| 105 | ; CHECK: for.end.loopexit: |
| 106 | ; CHECK-NEXT: br label [[FOR_END]] |
| 107 | ; CHECK: for.end: |
| 108 | ; CHECK-NEXT: ret void |
| 109 | ; CHECK: for.outer.epil.1: |
| 110 | ; CHECK-NEXT: br label [[FOR_INNER_EPIL_1:%.*]] |
| 111 | ; CHECK: for.inner.epil.1: |
| 112 | ; CHECK-NEXT: [[J_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ] |
| 113 | ; CHECK-NEXT: [[SUM_EPIL_1:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], [[FOR_INNER_EPIL_1]] ] |
| 114 | ; CHECK-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_EPIL_1]] |
| 115 | ; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4, !tbaa !0 |
| 116 | ; CHECK-NEXT: [[ADD_EPIL_1]] = add i32 [[TMP7]], [[SUM_EPIL_1]] |
| 117 | ; CHECK-NEXT: [[INC_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1 |
| 118 | ; CHECK-NEXT: [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[INC_EPIL_1]], [[J]] |
| 119 | ; CHECK-NEXT: br i1 [[EXITCOND_EPIL_1]], label [[FOR_LATCH_EPIL_1:%.*]], label [[FOR_INNER_EPIL_1]] |
| 120 | ; CHECK: for.latch.epil.1: |
| 121 | ; CHECK-NEXT: [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], [[FOR_INNER_EPIL_1]] ] |
| 122 | ; CHECK-NEXT: [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_EPIL]] |
| 123 | ; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_1]], i32* [[ARRAYIDX6_EPIL_1]], align 4, !tbaa !0 |
| 124 | ; CHECK-NEXT: [[ADD8_EPIL_1:%.*]] = add nuw i32 [[ADD8_EPIL]], 1 |
| 125 | ; CHECK-NEXT: [[EPIL_ITER_SUB_1:%.*]] = sub i32 [[EPIL_ITER_SUB]], 1 |
| 126 | ; CHECK-NEXT: [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 [[EPIL_ITER_SUB_1]], 0 |
| 127 | ; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label [[FOR_OUTER_EPIL_2:%.*]], label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] |
| 128 | ; CHECK: for.outer.epil.2: |
| 129 | ; CHECK-NEXT: br label [[FOR_INNER_EPIL_2:%.*]] |
| 130 | ; CHECK: for.inner.epil.2: |
| 131 | ; CHECK-NEXT: [[J_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ] |
| 132 | ; CHECK-NEXT: [[SUM_EPIL_2:%.*]] = phi i32 [ 0, [[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], [[FOR_INNER_EPIL_2]] ] |
| 133 | ; CHECK-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[J_EPIL_2]] |
| 134 | ; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4, !tbaa !0 |
| 135 | ; CHECK-NEXT: [[ADD_EPIL_2]] = add i32 [[TMP8]], [[SUM_EPIL_2]] |
| 136 | ; CHECK-NEXT: [[INC_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1 |
| 137 | ; CHECK-NEXT: [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[INC_EPIL_2]], [[J]] |
| 138 | ; CHECK-NEXT: br i1 [[EXITCOND_EPIL_2]], label [[FOR_LATCH_EPIL_2:%.*]], label [[FOR_INNER_EPIL_2]] |
| 139 | ; CHECK: for.latch.epil.2: |
| 140 | ; CHECK-NEXT: [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], [[FOR_INNER_EPIL_2]] ] |
| 141 | ; CHECK-NEXT: [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[ADD8_EPIL_1]] |
| 142 | ; CHECK-NEXT: store i32 [[ADD_LCSSA_EPIL_2]], i32* [[ARRAYIDX6_EPIL_2]], align 4, !tbaa !0 |
| 143 | ; CHECK-NEXT: [[ADD8_EPIL_2:%.*]] = add nuw i32 [[ADD8_EPIL_1]], 1 |
| 144 | ; CHECK-NEXT: [[EPIL_ITER_SUB_2:%.*]] = sub i32 [[EPIL_ITER_SUB_1]], 1 |
| 145 | ; CHECK-NEXT: br label [[FOR_END_LOOPEXIT_EPILOG_LCSSA]] |
| 146 | define void @test1(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 { |
| 147 | entry: |
| 148 | %cmp = icmp ne i32 %J, 0 |
| 149 | %cmpJ = icmp ne i32 %I, 0 |
| 150 | %or.cond = and i1 %cmp, %cmpJ |
| 151 | br i1 %or.cond, label %for.outer.preheader, label %for.end |
| 152 | |
| 153 | for.outer.preheader: |
| 154 | br label %for.outer |
| 155 | |
| 156 | for.outer: |
| 157 | %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ] |
| 158 | br label %for.inner |
| 159 | |
| 160 | for.inner: |
| 161 | %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ] |
| 162 | %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ] |
| 163 | %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j |
| 164 | %0 = load i32, i32* %arrayidx, align 4, !tbaa !5 |
| 165 | %add = add i32 %0, %sum |
| 166 | %inc = add nuw i32 %j, 1 |
| 167 | %exitcond = icmp eq i32 %inc, %J |
| 168 | br i1 %exitcond, label %for.latch, label %for.inner |
| 169 | |
| 170 | for.latch: |
| 171 | %add.lcssa = phi i32 [ %add, %for.inner ] |
| 172 | %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i |
| 173 | store i32 %add.lcssa, i32* %arrayidx6, align 4, !tbaa !5 |
| 174 | %add8 = add nuw i32 %i, 1 |
| 175 | %exitcond25 = icmp eq i32 %add8, %I |
| 176 | br i1 %exitcond25, label %for.end.loopexit, label %for.outer |
| 177 | |
| 178 | for.end.loopexit: |
| 179 | br label %for.end |
| 180 | |
| 181 | for.end: |
| 182 | ret void |
| 183 | } |
| 184 | |
| 185 | |
| 186 | ; CHECK-LABEL: test2 |
| 187 | ; Tests for(i) { sum = A[i]; for(j) sum += B[j]; A[i] = sum; } |
| 188 | ; A[i] load/store dependency should not block unroll-and-jam |
| 189 | ; CHECK: for.outer: |
| 190 | ; CHECK: %i = phi i32 [ %add9.3, %for.latch ], [ 0, %for.outer.preheader.new ] |
| 191 | ; CHECK: %niter = phi i32 [ %unroll_iter, %for.outer.preheader.new ], [ %niter.nsub.3, %for.latch ] |
| 192 | ; CHECK: br label %for.inner |
| 193 | ; CHECK: for.inner: |
| 194 | ; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ] |
| 195 | ; CHECK: %sum = phi i32 [ %2, %for.outer ], [ %add, %for.inner ] |
| 196 | ; CHECK: %j.1 = phi i32 [ 0, %for.outer ], [ %inc.1, %for.inner ] |
| 197 | ; CHECK: %sum.1 = phi i32 [ %3, %for.outer ], [ %add.1, %for.inner ] |
| 198 | ; CHECK: %j.2 = phi i32 [ 0, %for.outer ], [ %inc.2, %for.inner ] |
| 199 | ; CHECK: %sum.2 = phi i32 [ %4, %for.outer ], [ %add.2, %for.inner ] |
| 200 | ; CHECK: %j.3 = phi i32 [ 0, %for.outer ], [ %inc.3, %for.inner ] |
| 201 | ; CHECK: %sum.3 = phi i32 [ %5, %for.outer ], [ %add.3, %for.inner ] |
| 202 | ; CHECK: br i1 %exitcond.3, label %for.latch, label %for.inner |
| 203 | ; CHECK: for.latch: |
| 204 | ; CHECK: %add.lcssa = phi i32 [ %add, %for.inner ] |
| 205 | ; CHECK: %add.lcssa.1 = phi i32 [ %add.1, %for.inner ] |
| 206 | ; CHECK: %add.lcssa.2 = phi i32 [ %add.2, %for.inner ] |
| 207 | ; CHECK: %add.lcssa.3 = phi i32 [ %add.3, %for.inner ] |
| 208 | ; CHECK: br i1 %niter.ncmp.3, label %for.end10.loopexit.unr-lcssa.loopexit, label %for.outer |
| 209 | ; CHECK: for.end10.loopexit.unr-lcssa.loopexit: |
| 210 | define void @test2(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 { |
| 211 | entry: |
| 212 | %cmp = icmp ne i32 %J, 0 |
| 213 | %cmp125 = icmp ne i32 %I, 0 |
| 214 | %or.cond = and i1 %cmp, %cmp125 |
| 215 | br i1 %or.cond, label %for.outer.preheader, label %for.end10 |
| 216 | |
| 217 | for.outer.preheader: |
| 218 | br label %for.outer |
| 219 | |
| 220 | for.outer: |
| 221 | %i = phi i32 [ %add9, %for.latch ], [ 0, %for.outer.preheader ] |
| 222 | %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i |
| 223 | %0 = load i32, i32* %arrayidx, align 4, !tbaa !5 |
| 224 | br label %for.inner |
| 225 | |
| 226 | for.inner: |
| 227 | %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ] |
| 228 | %sum = phi i32 [ %0, %for.outer ], [ %add, %for.inner ] |
| 229 | %arrayidx6 = getelementptr inbounds i32, i32* %B, i32 %j |
| 230 | %1 = load i32, i32* %arrayidx6, align 4, !tbaa !5 |
| 231 | %add = add i32 %1, %sum |
| 232 | %inc = add nuw i32 %j, 1 |
| 233 | %exitcond = icmp eq i32 %inc, %J |
| 234 | br i1 %exitcond, label %for.latch, label %for.inner |
| 235 | |
| 236 | for.latch: |
| 237 | %add.lcssa = phi i32 [ %add, %for.inner ] |
| 238 | store i32 %add.lcssa, i32* %arrayidx, align 4, !tbaa !5 |
| 239 | %add9 = add nuw i32 %i, 1 |
| 240 | %exitcond28 = icmp eq i32 %add9, %I |
| 241 | br i1 %exitcond28, label %for.end10.loopexit, label %for.outer |
| 242 | |
| 243 | for.end10.loopexit: |
| 244 | br label %for.end10 |
| 245 | |
| 246 | for.end10: |
| 247 | ret void |
| 248 | } |
| 249 | |
| 250 | |
| 251 | ; CHECK-LABEL: test3 |
| 252 | ; Tests Complete unroll-and-jam of the outer loop |
| 253 | ; CHECK: for.outer: |
| 254 | ; CHECK: br label %for.inner |
| 255 | ; CHECK: for.inner: |
| 256 | ; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ] |
| 257 | ; CHECK: %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ] |
| 258 | ; CHECK: %j.1 = phi i32 [ 0, %for.outer ], [ %inc.1, %for.inner ] |
| 259 | ; CHECK: %sum.1 = phi i32 [ 0, %for.outer ], [ %add.1, %for.inner ] |
| 260 | ; CHECK: %j.2 = phi i32 [ 0, %for.outer ], [ %inc.2, %for.inner ] |
| 261 | ; CHECK: %sum.2 = phi i32 [ 0, %for.outer ], [ %add.2, %for.inner ] |
| 262 | ; CHECK: %j.3 = phi i32 [ 0, %for.outer ], [ %inc.3, %for.inner ] |
| 263 | ; CHECK: %sum.3 = phi i32 [ 0, %for.outer ], [ %add.3, %for.inner ] |
| 264 | ; CHECK: br i1 %exitcond.3, label %for.latch, label %for.inner |
| 265 | ; CHECK: for.latch: |
| 266 | ; CHECK: %add.lcssa = phi i32 [ %add, %for.inner ] |
| 267 | ; CHECK: %add.lcssa.1 = phi i32 [ %add.1, %for.inner ] |
| 268 | ; CHECK: %add.lcssa.2 = phi i32 [ %add.2, %for.inner ] |
| 269 | ; CHECK: %add.lcssa.3 = phi i32 [ %add.3, %for.inner ] |
| 270 | ; CHECK: br label %for.end |
| 271 | ; CHECK: for.end: |
| 272 | define void @test3(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 { |
| 273 | entry: |
| 274 | %cmp = icmp eq i32 %J, 0 |
| 275 | br i1 %cmp, label %for.end, label %for.preheader |
| 276 | |
| 277 | for.preheader: |
| 278 | br label %for.outer |
| 279 | |
| 280 | for.outer: |
| 281 | %i = phi i32 [ %add8, %for.latch ], [ 0, %for.preheader ] |
| 282 | br label %for.inner |
| 283 | |
| 284 | for.inner: |
| 285 | %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ] |
| 286 | %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ] |
| 287 | %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j |
| 288 | %0 = load i32, i32* %arrayidx, align 4, !tbaa !5 |
| 289 | %sub = add i32 %sum, 10 |
| 290 | %add = sub i32 %sub, %0 |
| 291 | %inc = add nuw i32 %j, 1 |
| 292 | %exitcond = icmp eq i32 %inc, %J |
| 293 | br i1 %exitcond, label %for.latch, label %for.inner |
| 294 | |
| 295 | for.latch: |
| 296 | %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i |
| 297 | store i32 %add, i32* %arrayidx6, align 4, !tbaa !5 |
| 298 | %add8 = add nuw nsw i32 %i, 1 |
| 299 | %exitcond23 = icmp eq i32 %add8, 4 |
| 300 | br i1 %exitcond23, label %for.end, label %for.outer |
| 301 | |
| 302 | for.end: |
| 303 | ret void |
| 304 | } |
| 305 | |
| 306 | |
| 307 | ; CHECK-LABEL: test4 |
| 308 | ; Tests Complete unroll-and-jam with a trip count of 1 |
| 309 | ; CHECK: for.outer: |
| 310 | ; CHECK: br label %for.inner |
| 311 | ; CHECK: for.inner: |
| 312 | ; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ] |
| 313 | ; CHECK: %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ] |
| 314 | ; CHECK: br i1 %exitcond, label %for.latch, label %for.inner |
| 315 | ; CHECK: for.latch: |
| 316 | ; CHECK: %add.lcssa = phi i32 [ %add, %for.inner ] |
| 317 | ; CHECK: br label %for.end |
| 318 | ; CHECK: for.end: |
| 319 | define void @test4(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 { |
| 320 | entry: |
| 321 | %cmp = icmp eq i32 %J, 0 |
| 322 | br i1 %cmp, label %for.end, label %for.preheader |
| 323 | |
| 324 | for.preheader: |
| 325 | br label %for.outer |
| 326 | |
| 327 | for.outer: |
| 328 | %i = phi i32 [ %add8, %for.latch ], [ 0, %for.preheader ] |
| 329 | br label %for.inner |
| 330 | |
| 331 | for.inner: |
| 332 | %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ] |
| 333 | %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ] |
| 334 | %arrayidx = getelementptr inbounds i32, i32* %B, i32 %j |
| 335 | %0 = load i32, i32* %arrayidx, align 4, !tbaa !5 |
| 336 | %sub = add i32 %sum, 10 |
| 337 | %add = sub i32 %sub, %0 |
| 338 | %inc = add nuw i32 %j, 1 |
| 339 | %exitcond = icmp eq i32 %inc, %J |
| 340 | br i1 %exitcond, label %for.latch, label %for.inner |
| 341 | |
| 342 | for.latch: |
| 343 | %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i |
| 344 | store i32 %add, i32* %arrayidx6, align 4, !tbaa !5 |
| 345 | %add8 = add nuw nsw i32 %i, 1 |
| 346 | %exitcond23 = icmp eq i32 %add8, 1 |
| 347 | br i1 %exitcond23, label %for.end, label %for.outer |
| 348 | |
| 349 | for.end: |
| 350 | ret void |
| 351 | } |
| 352 | |
| 353 | |
| 354 | ; CHECK-LABEL: test5 |
| 355 | ; Multiple SubLoopBlocks |
| 356 | ; CHECK: for.outer: |
| 357 | ; CHECK: br label %for.inner |
| 358 | ; CHECK: for.inner: |
| 359 | ; CHECK: %inc8.sink15 = phi i32 [ 0, %for.outer ], [ %inc8, %for.inc.1 ] |
| 360 | ; CHECK: %inc8.sink15.1 = phi i32 [ 0, %for.outer ], [ %inc8.1, %for.inc.1 ] |
| 361 | ; CHECK: br label %for.inner2 |
| 362 | ; CHECK: for.inner2: |
| 363 | ; CHECK: br i1 %tobool, label %for.cond4, label %for.inc |
| 364 | ; CHECK: for.cond4: |
| 365 | ; CHECK: br i1 %tobool.1, label %for.cond4a, label %for.inc |
| 366 | ; CHECK: for.cond4a: |
| 367 | ; CHECK: br label %for.inc |
| 368 | ; CHECK: for.inc: |
| 369 | ; CHECK: br i1 %tobool.11, label %for.cond4.1, label %for.inc.1 |
| 370 | ; CHECK: for.latch: |
| 371 | ; CHECK: br label %for.end |
| 372 | ; CHECK: for.end: |
| 373 | ; CHECK: ret i32 0 |
| 374 | ; CHECK: for.cond4.1: |
| 375 | ; CHECK: br i1 %tobool.1.1, label %for.cond4a.1, label %for.inc.1 |
| 376 | ; CHECK: for.cond4a.1: |
| 377 | ; CHECK: br label %for.inc.1 |
| 378 | ; CHECK: for.inc.1: |
| 379 | ; CHECK: br i1 %exitcond.1, label %for.latch, label %for.inner |
| 380 | @a = hidden global [1 x i32] zeroinitializer, align 4 |
| 381 | define i32 @test5() #0 { |
| 382 | entry: |
| 383 | br label %for.outer |
| 384 | |
| 385 | for.outer: |
| 386 | %.sink16 = phi i32 [ 0, %entry ], [ %add, %for.latch ] |
| 387 | br label %for.inner |
| 388 | |
| 389 | for.inner: |
| 390 | %inc8.sink15 = phi i32 [ 0, %for.outer ], [ %inc8, %for.inc ] |
| 391 | br label %for.inner2 |
| 392 | |
| 393 | for.inner2: |
| 394 | %l1 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 0, i32 0), align 4 |
| 395 | %tobool = icmp eq i32 %l1, 0 |
| 396 | br i1 %tobool, label %for.cond4, label %for.inc |
| 397 | |
| 398 | for.cond4: |
| 399 | %l0 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @a, i32 1, i32 0), align 4 |
| 400 | %tobool.1 = icmp eq i32 %l0, 0 |
| 401 | br i1 %tobool.1, label %for.cond4a, label %for.inc |
| 402 | |
| 403 | for.cond4a: |
| 404 | br label %for.inc |
| 405 | |
| 406 | for.inc: |
| 407 | %l2 = phi i32 [ 0, %for.inner2 ], [ 1, %for.cond4 ], [ 2, %for.cond4a ] |
| 408 | %inc8 = add nuw nsw i32 %inc8.sink15, 1 |
| 409 | %exitcond = icmp eq i32 %inc8, 3 |
| 410 | br i1 %exitcond, label %for.latch, label %for.inner |
| 411 | |
| 412 | for.latch: |
| 413 | %.lcssa = phi i32 [ %l2, %for.inc ] |
| 414 | %conv11 = and i32 %.sink16, 255 |
| 415 | %add = add nuw nsw i32 %conv11, 4 |
| 416 | %cmp = icmp eq i32 %add, 8 |
| 417 | br i1 %cmp, label %for.end, label %for.outer |
| 418 | |
| 419 | for.end: |
| 420 | %.lcssa.lcssa = phi i32 [ %.lcssa, %for.latch ] |
| 421 | ret i32 0 |
| 422 | } |
| 423 | |
| 424 | |
| 425 | ; CHECK-LABEL: test6 |
| 426 | ; Test odd uses of phi nodes |
| 427 | ; CHECK: for.outer: |
| 428 | ; CHECK: br label %for.inner |
| 429 | ; CHECK: for.inner: |
| 430 | ; CHECK: br i1 %exitcond.3, label %for.inner, label %for.latch |
| 431 | ; CHECK: for.latch: |
| 432 | ; CHECK: br label %for.end |
| 433 | ; CHECK: for.end: |
| 434 | ; CHECK: ret i32 0 |
| 435 | @f = hidden global i32 0, align 4 |
| 436 | define i32 @test6() #0 { |
| 437 | entry: |
| 438 | %f.promoted10 = load i32, i32* @f, align 4, !tbaa !5 |
| 439 | br label %for.outer |
| 440 | |
| 441 | for.outer: |
| 442 | %p0 = phi i32 [ %f.promoted10, %entry ], [ 2, %for.latch ] |
| 443 | %inc5.sink9 = phi i32 [ 2, %entry ], [ %inc5, %for.latch ] |
| 444 | br label %for.inner |
| 445 | |
| 446 | for.inner: |
| 447 | %p1 = phi i32 [ %p0, %for.outer ], [ 2, %for.inner ] |
| 448 | %inc.sink8 = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ] |
| 449 | %inc = add nuw nsw i32 %inc.sink8, 1 |
| 450 | %exitcond = icmp ne i32 %inc, 7 |
| 451 | br i1 %exitcond, label %for.inner, label %for.latch |
| 452 | |
| 453 | for.latch: |
| 454 | %.lcssa = phi i32 [ %p1, %for.inner ] |
| 455 | %inc5 = add nuw nsw i32 %inc5.sink9, 1 |
| 456 | %exitcond11 = icmp ne i32 %inc5, 7 |
| 457 | br i1 %exitcond11, label %for.outer, label %for.end |
| 458 | |
| 459 | for.end: |
| 460 | %.lcssa.lcssa = phi i32 [ %.lcssa, %for.latch ] |
| 461 | %inc.lcssa.lcssa = phi i32 [ 7, %for.latch ] |
| 462 | ret i32 0 |
| 463 | } |
| 464 | |
| 465 | |
| 466 | ; CHECK-LABEL: test7 |
| 467 | ; Has a positive dependency between two stores. Still valid. |
| 468 | ; The negative dependecy is in unroll-and-jam-disabled.ll |
| 469 | ; CHECK: for.outer: |
| 470 | ; CHECK: %i = phi i32 [ %add.3, %for.latch ], [ 0, %for.preheader.new ] |
| 471 | ; CHECK: %niter = phi i32 [ %unroll_iter, %for.preheader.new ], [ %niter.nsub.3, %for.latch ] |
| 472 | ; CHECK: br label %for.inner |
| 473 | ; CHECK: for.latch: |
| 474 | ; CHECK: %add9.lcssa = phi i32 [ %add9, %for.inner ] |
| 475 | ; CHECK: %add9.lcssa.1 = phi i32 [ %add9.1, %for.inner ] |
| 476 | ; CHECK: %add9.lcssa.2 = phi i32 [ %add9.2, %for.inner ] |
| 477 | ; CHECK: %add9.lcssa.3 = phi i32 [ %add9.3, %for.inner ] |
| 478 | ; CHECK: br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit, label %for.outer |
| 479 | ; CHECK: for.inner: |
| 480 | ; CHECK: %sum = phi i32 [ 0, %for.outer ], [ %add9, %for.inner ] |
| 481 | ; CHECK: %j = phi i32 [ 0, %for.outer ], [ %add10, %for.inner ] |
| 482 | ; CHECK: %sum.1 = phi i32 [ 0, %for.outer ], [ %add9.1, %for.inner ] |
| 483 | ; CHECK: %j.1 = phi i32 [ 0, %for.outer ], [ %add10.1, %for.inner ] |
| 484 | ; CHECK: %sum.2 = phi i32 [ 0, %for.outer ], [ %add9.2, %for.inner ] |
| 485 | ; CHECK: %j.2 = phi i32 [ 0, %for.outer ], [ %add10.2, %for.inner ] |
| 486 | ; CHECK: %sum.3 = phi i32 [ 0, %for.outer ], [ %add9.3, %for.inner ] |
| 487 | ; CHECK: %j.3 = phi i32 [ 0, %for.outer ], [ %add10.3, %for.inner ] |
| 488 | ; CHECK: br i1 %exitcond.3, label %for.latch, label %for.inner |
| 489 | ; CHECK: for.end.loopexit.unr-lcssa.loopexit: |
| 490 | define void @test7(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 { |
| 491 | entry: |
| 492 | %cmp = icmp ne i32 %J, 0 |
| 493 | %cmp128 = icmp ne i32 %I, 0 |
| 494 | %or.cond = and i1 %cmp128, %cmp |
| 495 | br i1 %or.cond, label %for.preheader, label %for.end |
| 496 | |
| 497 | for.preheader: |
| 498 | br label %for.outer |
| 499 | |
| 500 | for.outer: |
| 501 | %i = phi i32 [ %add, %for.latch ], [ 0, %for.preheader ] |
| 502 | %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i |
| 503 | store i32 0, i32* %arrayidx, align 4, !tbaa !5 |
| 504 | %add = add nuw i32 %i, 1 |
| 505 | %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add |
| 506 | store i32 2, i32* %arrayidx2, align 4, !tbaa !5 |
| 507 | br label %for.inner |
| 508 | |
| 509 | for.latch: |
| 510 | store i32 %add9, i32* %arrayidx, align 4, !tbaa !5 |
| 511 | %exitcond30 = icmp eq i32 %add, %I |
| 512 | br i1 %exitcond30, label %for.end, label %for.outer |
| 513 | |
| 514 | for.inner: |
| 515 | %sum = phi i32 [ 0, %for.outer ], [ %add9, %for.inner ] |
| 516 | %j = phi i32 [ 0, %for.outer ], [ %add10, %for.inner ] |
| 517 | %arrayidx7 = getelementptr inbounds i32, i32* %B, i32 %j |
| 518 | %l1 = load i32, i32* %arrayidx7, align 4, !tbaa !5 |
| 519 | %add9 = add i32 %l1, %sum |
| 520 | %add10 = add nuw i32 %j, 1 |
| 521 | %exitcond = icmp eq i32 %add10, %J |
| 522 | br i1 %exitcond, label %for.latch, label %for.inner |
| 523 | |
| 524 | for.end: |
| 525 | ret void |
| 526 | } |
| 527 | |
| 528 | |
| 529 | ; CHECK-LABEL: test8 |
| 530 | ; Same as test7 with an extra outer loop nest |
| 531 | ; CHECK: for.outest: |
| 532 | ; CHECK: br label %for.outer |
| 533 | ; CHECK: for.outer: |
| 534 | ; CHECK: %i = phi i32 [ %add.3, %for.latch ], [ 0, %for.outest.new ] |
| 535 | ; CHECK: %niter = phi i32 [ %unroll_iter, %for.outest.new ], [ %niter.nsub.3, %for.latch ] |
| 536 | ; CHECK: br label %for.inner |
| 537 | ; CHECK: for.inner: |
| 538 | ; CHECK: %sum = phi i32 [ 0, %for.outer ], [ %add9, %for.inner ] |
| 539 | ; CHECK: %j = phi i32 [ 0, %for.outer ], [ %add10, %for.inner ] |
| 540 | ; CHECK: %sum.1 = phi i32 [ 0, %for.outer ], [ %add9.1, %for.inner ] |
| 541 | ; CHECK: %j.1 = phi i32 [ 0, %for.outer ], [ %add10.1, %for.inner ] |
| 542 | ; CHECK: %sum.2 = phi i32 [ 0, %for.outer ], [ %add9.2, %for.inner ] |
| 543 | ; CHECK: %j.2 = phi i32 [ 0, %for.outer ], [ %add10.2, %for.inner ] |
| 544 | ; CHECK: %sum.3 = phi i32 [ 0, %for.outer ], [ %add9.3, %for.inner ] |
| 545 | ; CHECK: %j.3 = phi i32 [ 0, %for.outer ], [ %add10.3, %for.inner ] |
| 546 | ; CHECK: br i1 %exitcond.3, label %for.latch, label %for.inner |
| 547 | ; CHECK: for.latch: |
| 548 | ; CHECK: %add9.lcssa = phi i32 [ %add9, %for.inner ] |
| 549 | ; CHECK: %add9.lcssa.1 = phi i32 [ %add9.1, %for.inner ] |
| 550 | ; CHECK: %add9.lcssa.2 = phi i32 [ %add9.2, %for.inner ] |
| 551 | ; CHECK: %add9.lcssa.3 = phi i32 [ %add9.3, %for.inner ] |
| 552 | ; CHECK: br i1 %niter.ncmp.3, label %for.cleanup.unr-lcssa.loopexit, label %for.outer |
| 553 | ; CHECK: for.cleanup.epilog-lcssa: |
| 554 | ; CHECK: br label %for.cleanup |
| 555 | ; CHECK: for.cleanup: |
| 556 | ; CHECK: br i1 %exitcond41, label %for.end.loopexit, label %for.outest |
| 557 | ; CHECK: for.end.loopexit: |
| 558 | ; CHECK: br label %for.end |
| 559 | define void @test8(i32 %I, i32 %J, i32* noalias nocapture %A, i32* noalias nocapture readonly %B) #0 { |
| 560 | entry: |
| 561 | %cmp = icmp eq i32 %J, 0 |
| 562 | %cmp336 = icmp eq i32 %I, 0 |
| 563 | %or.cond = or i1 %cmp, %cmp336 |
| 564 | br i1 %or.cond, label %for.end, label %for.preheader |
| 565 | |
| 566 | for.preheader: |
| 567 | br label %for.outest |
| 568 | |
| 569 | for.outest: |
| 570 | %x.038 = phi i32 [ %inc, %for.cleanup ], [ 0, %for.preheader ] |
| 571 | br label %for.outer |
| 572 | |
| 573 | for.outer: |
| 574 | %i = phi i32 [ %add, %for.latch ], [ 0, %for.outest ] |
| 575 | %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i |
| 576 | store i32 0, i32* %arrayidx, align 4, !tbaa !5 |
| 577 | %add = add nuw i32 %i, 1 |
| 578 | %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %add |
| 579 | store i32 2, i32* %arrayidx6, align 4, !tbaa !5 |
| 580 | br label %for.inner |
| 581 | |
| 582 | for.inner: |
| 583 | %sum = phi i32 [ 0, %for.outer ], [ %add9, %for.inner ] |
| 584 | %j = phi i32 [ 0, %for.outer ], [ %add10, %for.inner ] |
| 585 | %arrayidx11 = getelementptr inbounds i32, i32* %B, i32 %j |
| 586 | %l1 = load i32, i32* %arrayidx11, align 4, !tbaa !5 |
| 587 | %add9 = add i32 %l1, %sum |
| 588 | %add10 = add nuw i32 %j, 1 |
| 589 | %exitcond = icmp eq i32 %add10, %J |
| 590 | br i1 %exitcond, label %for.latch, label %for.inner |
| 591 | |
| 592 | for.latch: |
| 593 | store i32 %add9, i32* %arrayidx, align 4, !tbaa !5 |
| 594 | %exitcond39 = icmp eq i32 %add, %I |
| 595 | br i1 %exitcond39, label %for.cleanup, label %for.outer |
| 596 | |
| 597 | for.cleanup: |
| 598 | %inc = add nuw nsw i32 %x.038, 1 |
| 599 | %exitcond41 = icmp eq i32 %inc, 5 |
| 600 | br i1 %exitcond41, label %for.end, label %for.outest |
| 601 | |
| 602 | for.end: |
| 603 | ret void |
| 604 | } |
| 605 | |
| 606 | |
| 607 | ; CHECK-LABEL: test9 |
| 608 | ; Same as test1 with tbaa, not noalias |
| 609 | ; CHECK: for.outer: |
| 610 | ; CHECK: %i = phi i32 [ %add8.3, %for.latch ], [ 0, %for.outer.preheader.new ] |
| 611 | ; CHECK: %niter = phi i32 [ %unroll_iter, %for.outer.preheader.new ], [ %niter.nsub.3, %for.latch ] |
| 612 | ; CHECK: br label %for.inner |
| 613 | ; CHECK: for.inner: |
| 614 | ; CHECK: %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ] |
| 615 | ; CHECK: %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ] |
| 616 | ; CHECK: %j.1 = phi i32 [ 0, %for.outer ], [ %inc.1, %for.inner ] |
| 617 | ; CHECK: %sum.1 = phi i32 [ 0, %for.outer ], [ %add.1, %for.inner ] |
| 618 | ; CHECK: %j.2 = phi i32 [ 0, %for.outer ], [ %inc.2, %for.inner ] |
| 619 | ; CHECK: %sum.2 = phi i32 [ 0, %for.outer ], [ %add.2, %for.inner ] |
| 620 | ; CHECK: %j.3 = phi i32 [ 0, %for.outer ], [ %inc.3, %for.inner ] |
| 621 | ; CHECK: %sum.3 = phi i32 [ 0, %for.outer ], [ %add.3, %for.inner ] |
| 622 | ; CHECK: br i1 %exitcond.3, label %for.latch, label %for.inner |
| 623 | ; CHECK: for.latch: |
| 624 | ; CHECK: %add.lcssa = phi i32 [ %add, %for.inner ] |
| 625 | ; CHECK: %add.lcssa.1 = phi i32 [ %add.1, %for.inner ] |
| 626 | ; CHECK: %add.lcssa.2 = phi i32 [ %add.2, %for.inner ] |
| 627 | ; CHECK: %add.lcssa.3 = phi i32 [ %add.3, %for.inner ] |
| 628 | ; CHECK: br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit, label %for.outer |
| 629 | ; CHECK: for.end.loopexit.unr-lcssa.loopexit: |
| 630 | define void @test9(i32 %I, i32 %J, i32* nocapture %A, i16* nocapture readonly %B) #0 { |
| 631 | entry: |
| 632 | %cmp = icmp ne i32 %J, 0 |
| 633 | %cmpJ = icmp ne i32 %I, 0 |
| 634 | %or.cond = and i1 %cmp, %cmpJ |
| 635 | br i1 %or.cond, label %for.outer.preheader, label %for.end |
| 636 | |
| 637 | for.outer.preheader: |
| 638 | br label %for.outer |
| 639 | |
| 640 | for.outer: |
| 641 | %i = phi i32 [ %add8, %for.latch ], [ 0, %for.outer.preheader ] |
| 642 | br label %for.inner |
| 643 | |
| 644 | for.inner: |
| 645 | %j = phi i32 [ 0, %for.outer ], [ %inc, %for.inner ] |
| 646 | %sum = phi i32 [ 0, %for.outer ], [ %add, %for.inner ] |
| 647 | %arrayidx = getelementptr inbounds i16, i16* %B, i32 %j |
| 648 | %0 = load i16, i16* %arrayidx, align 4, !tbaa !9 |
| 649 | %sext = sext i16 %0 to i32 |
| 650 | %add = add i32 %sext, %sum |
| 651 | %inc = add nuw i32 %j, 1 |
| 652 | %exitcond = icmp eq i32 %inc, %J |
| 653 | br i1 %exitcond, label %for.latch, label %for.inner |
| 654 | |
| 655 | for.latch: |
| 656 | %add.lcssa = phi i32 [ %add, %for.inner ] |
| 657 | %arrayidx6 = getelementptr inbounds i32, i32* %A, i32 %i |
| 658 | store i32 %add.lcssa, i32* %arrayidx6, align 4, !tbaa !5 |
| 659 | %add8 = add nuw i32 %i, 1 |
| 660 | %exitcond25 = icmp eq i32 %add8, %I |
| 661 | br i1 %exitcond25, label %for.end.loopexit, label %for.outer |
| 662 | |
| 663 | for.end.loopexit: |
| 664 | br label %for.end |
| 665 | |
| 666 | for.end: |
| 667 | ret void |
| 668 | } |
| 669 | |
| 670 | |
| 671 | ; CHECK-LABEL: test10 |
| 672 | ; Be careful not to incorrectly update the exit phi nodes |
| 673 | ; CHECK: %dec.lcssa.lcssa.ph.ph = phi i64 [ 0, %for.inc24 ] |
| 674 | %struct.a = type { i64 } |
| 675 | @g = common global %struct.a zeroinitializer, align 8 |
| 676 | @c = common global [1 x i8] zeroinitializer, align 1 |
| 677 | define signext i16 @test10(i32 %k) #0 { |
| 678 | entry: |
| 679 | %0 = load i8, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @c, i64 0, i64 0), align 1 |
| 680 | %tobool9 = icmp eq i8 %0, 0 |
| 681 | %tobool13 = icmp ne i32 %k, 0 |
| 682 | br label %for.body |
| 683 | |
| 684 | for.body: |
| 685 | %storemerge82 = phi i64 [ 0, %entry ], [ %inc25, %for.inc24 ] |
| 686 | br label %for.body2 |
| 687 | |
| 688 | for.body2: |
| 689 | %storemerge = phi i64 [ 4, %for.body ], [ %dec, %for.inc21 ] |
| 690 | br i1 %tobool9, label %for.body2.split, label %for.body2.split2 |
| 691 | |
| 692 | for.body2.split2: |
| 693 | br i1 %tobool13, label %for.inc21, label %for.inc21.if |
| 694 | |
| 695 | for.body2.split: |
| 696 | br i1 %tobool13, label %for.inc21, label %for.inc21.then |
| 697 | |
| 698 | for.inc21.if: |
| 699 | %storemerge.1 = phi i64 [ 0, %for.body2.split2 ] |
| 700 | br label %for.inc21 |
| 701 | |
| 702 | for.inc21.then: |
| 703 | %storemerge.2 = phi i64 [ 0, %for.body2.split ] |
| 704 | %storemerge.3 = phi i32 [ 0, %for.body2.split ] |
| 705 | br label %for.inc21 |
| 706 | |
| 707 | for.inc21: |
| 708 | %storemerge.4 = phi i64 [ %storemerge.1, %for.inc21.if ], [ %storemerge.2, %for.inc21.then ], [ 4, %for.body2.split2 ], [ 4, %for.body2.split ] |
| 709 | %storemerge.5 = phi i32 [ 0, %for.inc21.if ], [ %storemerge.3, %for.inc21.then ], [ 0, %for.body2.split2 ], [ 0, %for.body2.split ] |
| 710 | %dec = add nsw i64 %storemerge, -1 |
| 711 | %tobool = icmp eq i64 %dec, 0 |
| 712 | br i1 %tobool, label %for.inc24, label %for.body2 |
| 713 | |
| 714 | for.inc24: |
| 715 | %storemerge.4.lcssa = phi i64 [ %storemerge.4, %for.inc21 ] |
| 716 | %storemerge.5.lcssa = phi i32 [ %storemerge.5, %for.inc21 ] |
| 717 | %inc25 = add nuw nsw i64 %storemerge82, 1 |
| 718 | %exitcond = icmp ne i64 %inc25, 5 |
| 719 | br i1 %exitcond, label %for.body, label %for.end26 |
| 720 | |
| 721 | for.end26: |
| 722 | %dec.lcssa.lcssa = phi i64 [ 0, %for.inc24 ] |
| 723 | %storemerge.4.lcssa.lcssa = phi i64 [ %storemerge.4.lcssa, %for.inc24 ] |
| 724 | %storemerge.5.lcssa.lcssa = phi i32 [ %storemerge.5.lcssa, %for.inc24 ] |
| 725 | store i64 %dec.lcssa.lcssa, i64* getelementptr inbounds (%struct.a, %struct.a* @g, i64 0, i32 0), align 8 |
| 726 | ret i16 0 |
| 727 | } |
| 728 | |
| 729 | |
| 730 | !5 = !{!6, !6, i64 0} |
| 731 | !6 = !{!"int", !7, i64 0} |
| 732 | !7 = !{!"omnipotent char", !8, i64 0} |
| 733 | !8 = !{!"Simple C/C++ TBAA"} |
| 734 | !9 = !{!10, !10, i64 0} |
| 735 | !10 = !{!"short", !7, i64 0} |