Warren Ristow | d27b0c6 | 2019-05-08 18:50:07 +0000 | [diff] [blame] | 1 | ; RUN: opt -loop-vectorize -force-vector-width=2 -S < %s 2>&1 | FileCheck %s |
| 2 | ; RUN: opt -indvars -S < %s 2>&1 | FileCheck %s -check-prefix=INDVARCHECK |
| 3 | |
| 4 | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" |
| 5 | target triple = "x86_64-unknown-linux-gnu" |
| 6 | |
| 7 | ; Produced from test-case: |
| 8 | ; |
| 9 | ; void testCountIncrLoop(unsigned char *ptr, int lim, int count, int val) |
| 10 | ; { |
| 11 | ; int inx = 0; |
| 12 | ; for (int outer_i = 0; outer_i < lim; ++outer_i) { |
| 13 | ; if (count > 0) { // At runtime, 'count' is 0, so the following code is dead. |
| 14 | ; int result = val; |
| 15 | ; int tmp = count; |
| 16 | ; |
| 17 | ; while (tmp < 8) { |
| 18 | ; result += val >> tmp; |
| 19 | ; tmp += count; |
| 20 | ; } |
| 21 | ; |
| 22 | ; ptr[inx++] = (unsigned char) result; |
| 23 | ; } |
| 24 | ; } |
| 25 | ; } |
| 26 | ; |
| 27 | ; No explicit division appears in the input, but a division is generated during |
| 28 | ; vectorization, and that division is a division-by-0 when the input 'count' |
| 29 | ; is 0, so it cannot be hoisted above the guard of 'count > 0'. |
| 30 | |
| 31 | ; Verify that a 'udiv' does not appear in the 'loop1.preheader' block, and that |
| 32 | ; a 'udiv' has been inserted at the top of the 'while.body.preheader' block. |
| 33 | define void @testCountIncrLoop(i8* %ptr, i32 %lim, i32 %count, i32 %val) { |
| 34 | ; CHECK-LABEL: @testCountIncrLoop( |
| 35 | ; CHECK-NEXT: entry: |
| 36 | ; CHECK: loop1.preheader: |
| 37 | ; CHECK-NOT: udiv |
| 38 | ; CHECK: loop1.body: |
| 39 | ; CHECK: while.cond.preheader: |
| 40 | ; CHECK: while.body.preheader: |
| 41 | ; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0:%.*]], [[COUNT:%.*]] |
| 42 | ; CHECK: vector.ph: |
| 43 | ; CHECK: exit: |
| 44 | ; CHECK: ret void |
| 45 | ; |
| 46 | entry: |
| 47 | %cmp1 = icmp sgt i32 %lim, 0 |
| 48 | br i1 %cmp1, label %loop1.preheader, label %exit |
| 49 | |
| 50 | loop1.preheader: ; preds = %entry |
| 51 | %cmp2 = icmp sgt i32 %count, 0 |
| 52 | %cmp4 = icmp slt i32 %count, 8 |
| 53 | br label %loop1.body |
| 54 | |
| 55 | loop1.body: ; preds = %loop1.inc, %loop1.preheader |
| 56 | %outer_i = phi i32 [ 0, %loop1.preheader ], [ %outer_i.1, %loop1.inc ] |
| 57 | %inx.1 = phi i32 [ 0, %loop1.preheader ], [ %inx.2, %loop1.inc ] |
| 58 | br i1 %cmp2, label %while.cond.preheader, label %loop1.inc |
| 59 | |
| 60 | while.cond.preheader: ; preds = %loop1.body |
| 61 | br i1 %cmp4, label %while.body, label %while.end |
| 62 | |
| 63 | while.body: ; preds = %while.body, %while.cond.preheader |
| 64 | %tmp = phi i32 [ %add3, %while.body ], [ %count, %while.cond.preheader ] |
| 65 | %result.1 = phi i32 [ %add, %while.body ], [ %val, %while.cond.preheader ] |
| 66 | %shr = ashr i32 %val, %tmp |
| 67 | %add = add nsw i32 %shr, %result.1 |
| 68 | %add3 = add nsw i32 %tmp, %count |
| 69 | %cmp3 = icmp slt i32 %add3, 8 |
| 70 | br i1 %cmp3, label %while.body, label %while.end |
| 71 | |
| 72 | while.end: ; preds = %while.body, %while.cond.preheader |
| 73 | %result.0.lcssa = phi i32 [ %val, %while.cond.preheader ], [ %add, %while.body ] |
| 74 | %conv = trunc i32 %result.0.lcssa to i8 |
| 75 | %inc = add nsw i32 %inx.1, 1 |
| 76 | %idxprom = sext i32 %inx.1 to i64 |
| 77 | %arrayidx = getelementptr inbounds i8, i8* %ptr, i64 %idxprom |
| 78 | store i8 %conv, i8* %arrayidx, align 1 |
| 79 | br label %loop1.inc |
| 80 | |
| 81 | loop1.inc: ; preds = %while.end, %loop1.body |
| 82 | %inx.2 = phi i32 [ %inc, %while.end ], [ %inx.1, %loop1.body ] |
| 83 | %outer_i.1 = add nuw nsw i32 %outer_i, 1 |
| 84 | %exitcond = icmp eq i32 %outer_i.1, %lim |
| 85 | br i1 %exitcond, label %exit, label %loop1.body |
| 86 | |
| 87 | exit: ; preds = %loop1.inc, %entry |
| 88 | ret void |
| 89 | } |
| 90 | |
| 91 | ; These next tests are all based on the following source code, with slight |
| 92 | ; variations on the calculation of 'incr' (all of which are loop-invariant |
| 93 | ; divisions, but only some of which can be safely hoisted): |
| 94 | ; |
| 95 | ; uint32_t foo(uint32_t *ptr, uint32_t start1, uint32_t start2) { |
| 96 | ; uint32_t counter1, counter2; |
| 97 | ; uint32_t val = start1; |
| 98 | ; for (counter1 = 1; counter1 < 100; ++counter1) { |
| 99 | ; uint32_t index = 0; |
| 100 | ; val += ptr[index]; |
| 101 | ; for (counter2 = start2; counter2 < 10; ++counter2) { |
| 102 | ; // Division is loop invariant, and denominator is guaranteed non-zero: |
| 103 | ; // Safe to hoist it out of the inner loop. |
| 104 | ; uint32_t incr = 16 / counter1; |
| 105 | ; index += incr; |
| 106 | ; val += ptr[index]; |
| 107 | ; } |
| 108 | ; } |
| 109 | ; return val; |
| 110 | ; } |
| 111 | |
| 112 | ; This version is as written above, where 'incr' is '16/counter1', and it is |
| 113 | ; guaranted that 'counter1' is always non-zero. So it is safe to hoist the |
| 114 | ; division from the inner loop to the preheader. |
| 115 | ; |
| 116 | ; Verify that the 'udiv' is hoisted to the preheader, and is not in the loop body. |
| 117 | define i32 @NonZeroDivHoist(i32* nocapture readonly %ptr, i32 %start1, i32 %start2) { |
| 118 | ; INDVARCHECK-LABEL: @NonZeroDivHoist( |
| 119 | ; INDVARCHECK-NEXT: entry: |
| 120 | ; INDVARCHECK: for.body3.lr.ph: |
| 121 | ; INDVARCHECK-NEXT: [[TMP0:%.*]] = udiv i64 16, [[INDVARS_IV:%.*]] |
| 122 | ; INDVARCHECK-NEXT: br label [[FOR_BODY3:%.*]] |
| 123 | ; INDVARCHECK: for.body3: |
| 124 | ; INDVARCHECK-NOT: udiv |
| 125 | ; INDVARCHECK: for.end10: |
| 126 | ; |
| 127 | entry: |
| 128 | br label %for.cond |
| 129 | |
| 130 | for.cond: ; preds = %for.end, %entry |
| 131 | %val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ] |
| 132 | %counter1.0 = phi i32 [ 1, %entry ], [ %inc9, %for.end ] |
| 133 | %cmp = icmp ult i32 %counter1.0, 100 |
| 134 | br i1 %cmp, label %for.body, label %for.end10 |
| 135 | |
| 136 | for.body: ; preds = %for.cond |
| 137 | %tmp = load i32, i32* %ptr, align 4 |
| 138 | %add = add i32 %tmp, %val.0 |
| 139 | %cmp224 = icmp ult i32 %start2, 10 |
| 140 | br i1 %cmp224, label %for.body3.lr.ph, label %for.end |
| 141 | |
| 142 | for.body3.lr.ph: ; preds = %for.body |
| 143 | br label %for.body3 |
| 144 | |
| 145 | for.body3: ; preds = %for.body3, %for.body3.lr.ph |
| 146 | %index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ] |
| 147 | %val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ] |
| 148 | %counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ] |
| 149 | %div = udiv i32 16, %counter1.0 |
| 150 | %add4 = add i32 %div, %index.027 |
| 151 | %idxprom5 = zext i32 %add4 to i64 |
| 152 | %arrayidx6 = getelementptr inbounds i32, i32* %ptr, i64 %idxprom5 |
| 153 | %tmp1 = load i32, i32* %arrayidx6, align 4 |
| 154 | %add7 = add i32 %tmp1, %val.126 |
| 155 | %inc = add i32 %counter2.025, 1 |
| 156 | %cmp2 = icmp ult i32 %inc, 10 |
| 157 | br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge |
| 158 | |
| 159 | for.cond1.for.end_crit_edge: ; preds = %for.body3 |
| 160 | %split = phi i32 [ %add7, %for.body3 ] |
| 161 | br label %for.end |
| 162 | |
| 163 | for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body |
| 164 | %val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ] |
| 165 | %inc9 = add i32 %counter1.0, 1 |
| 166 | br label %for.cond |
| 167 | |
| 168 | for.end10: ; preds = %for.cond |
| 169 | %val.0.lcssa = phi i32 [ %val.0, %for.cond ] |
| 170 | ret i32 %val.0.lcssa |
| 171 | } |
| 172 | |
| 173 | ; This version is identical to the above 'NonZeroDivHoist' case, except the |
| 174 | ; outer ('counter1') loop starts at the unknown value of 'start1' rather than 1, |
| 175 | ; and so it is illegal to hoist the division because if 'start1' is 0, hoisting |
| 176 | ; it would incorrectly cause a divide-by-zero trap. |
| 177 | ; |
| 178 | ; Verify that the 'udiv' is not hoisted to the preheader, and it remains in the |
| 179 | ; loop body. |
| 180 | define i32 @ZeroDivNoHoist(i32* nocapture readonly %ptr, i32 %start1, i32 %start2) { |
| 181 | ; INDVARCHECK-LABEL: @ZeroDivNoHoist( |
| 182 | ; INDVARCHECK-NEXT: entry: |
| 183 | ; INDVARCHECK-NOT: udiv |
| 184 | ; INDVARCHECK: for.body3: |
| 185 | ; INDVARCHECK: [[TMP1:%.*]] = udiv i64 16, [[INDVARS_IV:%.*]] |
| 186 | ; INDVARCHECK: for.cond1.for.end_crit_edge: |
| 187 | ; |
| 188 | entry: |
| 189 | br label %for.cond |
| 190 | |
| 191 | for.cond: ; preds = %for.end, %entry |
| 192 | %val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ] |
| 193 | %counter1.0 = phi i32 [ %start1, %entry ], [ %inc9, %for.end ] |
| 194 | %cmp = icmp ult i32 %counter1.0, 100 |
| 195 | br i1 %cmp, label %for.body, label %for.end10 |
| 196 | |
| 197 | for.body: ; preds = %for.cond |
| 198 | %tmp = load i32, i32* %ptr, align 4 |
| 199 | %add = add i32 %tmp, %val.0 |
| 200 | %cmp224 = icmp ult i32 %start2, 10 |
| 201 | br i1 %cmp224, label %for.body3.lr.ph, label %for.end |
| 202 | |
| 203 | for.body3.lr.ph: ; preds = %for.body |
| 204 | br label %for.body3 |
| 205 | |
| 206 | for.body3: ; preds = %for.body3, %for.body3.lr.ph |
| 207 | %index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ] |
| 208 | %val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ] |
| 209 | %counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ] |
| 210 | %div = udiv i32 16, %counter1.0 |
| 211 | %add4 = add i32 %div, %index.027 |
| 212 | %idxprom5 = zext i32 %add4 to i64 |
| 213 | %arrayidx6 = getelementptr inbounds i32, i32* %ptr, i64 %idxprom5 |
| 214 | %tmp1 = load i32, i32* %arrayidx6, align 4 |
| 215 | %add7 = add i32 %tmp1, %val.126 |
| 216 | %inc = add i32 %counter2.025, 1 |
| 217 | %cmp2 = icmp ult i32 %inc, 10 |
| 218 | br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge |
| 219 | |
| 220 | for.cond1.for.end_crit_edge: ; preds = %for.body3 |
| 221 | %split = phi i32 [ %add7, %for.body3 ] |
| 222 | br label %for.end |
| 223 | |
| 224 | for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body |
| 225 | %val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ] |
| 226 | %inc9 = add i32 %counter1.0, 1 |
| 227 | br label %for.cond |
| 228 | |
| 229 | for.end10: ; preds = %for.cond |
| 230 | %val.0.lcssa = phi i32 [ %val.0, %for.cond ] |
| 231 | ret i32 %val.0.lcssa |
| 232 | } |
| 233 | |
| 234 | ; This version is has a clearly safe division by a non-zero constant (16). The |
| 235 | ; division is transformed to a logical-shift-right of 4, and it is safely |
| 236 | ; hoisted. |
| 237 | ; |
| 238 | ; Verify that the division-operation is hoisted, and that it appears as a |
| 239 | ; right-shift ('lshr') rather than an explicit division. |
| 240 | define i32 @DivBy16Hoist(i32* nocapture readonly %ptr, i32 %start1, i32 %start2) { |
| 241 | ; INDVARCHECK-LABEL: @DivBy16Hoist( |
| 242 | ; INDVARCHECK-NEXT: entry: |
| 243 | ; INDVARCHECK: for.cond: |
| 244 | ; INDVARCHECK: [[TMP1:%.*]] = lshr i64 [[INDVARS_IV:%.*]], 4 |
| 245 | ; INDVARCHECK: for.body: |
| 246 | ; INDVARCHECK-NOT: lshr |
| 247 | ; INDVARCHECK-NOT: udiv |
| 248 | ; INDVARCHECK: for.end10: |
| 249 | ; |
| 250 | entry: |
| 251 | br label %for.cond |
| 252 | |
| 253 | for.cond: ; preds = %for.end, %entry |
| 254 | %val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ] |
| 255 | %counter1.0 = phi i32 [ %start1, %entry ], [ %inc9, %for.end ] |
| 256 | %cmp = icmp ult i32 %counter1.0, 100 |
| 257 | br i1 %cmp, label %for.body, label %for.end10 |
| 258 | |
| 259 | for.body: ; preds = %for.cond |
| 260 | %tmp = load i32, i32* %ptr, align 4 |
| 261 | %add = add i32 %tmp, %val.0 |
| 262 | %cmp224 = icmp ult i32 %start2, 10 |
| 263 | br i1 %cmp224, label %for.body3.lr.ph, label %for.end |
| 264 | |
| 265 | for.body3.lr.ph: ; preds = %for.body |
| 266 | br label %for.body3 |
| 267 | |
| 268 | for.body3: ; preds = %for.body3, %for.body3.lr.ph |
| 269 | %index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ] |
| 270 | %val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ] |
| 271 | %counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ] |
| 272 | %div = udiv i32 %counter1.0, 16 |
| 273 | %add4 = add i32 %div, %index.027 |
| 274 | %idxprom5 = zext i32 %add4 to i64 |
| 275 | %arrayidx6 = getelementptr inbounds i32, i32* %ptr, i64 %idxprom5 |
| 276 | %tmp1 = load i32, i32* %arrayidx6, align 4 |
| 277 | %add7 = add i32 %tmp1, %val.126 |
| 278 | %inc = add i32 %counter2.025, 1 |
| 279 | %cmp2 = icmp ult i32 %inc, 10 |
| 280 | br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge |
| 281 | |
| 282 | for.cond1.for.end_crit_edge: ; preds = %for.body3 |
| 283 | %split = phi i32 [ %add7, %for.body3 ] |
| 284 | br label %for.end |
| 285 | |
| 286 | for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body |
| 287 | %val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ] |
| 288 | %inc9 = add i32 %counter1.0, 1 |
| 289 | br label %for.cond |
| 290 | |
| 291 | for.end10: ; preds = %for.cond |
| 292 | %val.0.lcssa = phi i32 [ %val.0, %for.cond ] |
| 293 | ret i32 %val.0.lcssa |
| 294 | } |
| 295 | |
| 296 | ; This version is has a clearly safe division by a non-zero constant (17). The |
| 297 | ; division is safely hoisted, as it was in the 'DivBy16Hoist' verison, but here |
| 298 | ; it remains a division, rather than being transformed to a right-shift. |
| 299 | ; |
| 300 | ; Verify that the division-operation is hoisted. |
| 301 | define i32 @DivBy17Hoist(i32* nocapture readonly %ptr, i32 %start1, i32 %start2) { |
| 302 | ; INDVARCHECK-LABEL: @DivBy17Hoist( |
| 303 | ; INDVARCHECK-NEXT: entry: |
| 304 | ; INDVARCHECK: for.cond: |
| 305 | ; INDVARCHECK: [[TMP1:%.*]] = udiv i64 [[INDVARS_IV:%.*]], 17 |
| 306 | ; INDVARCHECK: for.body: |
| 307 | ; INDVARCHECK-NOT: udiv |
| 308 | ; INDVARCHECK: for.end10: |
| 309 | ; |
| 310 | entry: |
| 311 | br label %for.cond |
| 312 | |
| 313 | for.cond: ; preds = %for.end, %entry |
| 314 | %val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ] |
| 315 | %counter1.0 = phi i32 [ %start1, %entry ], [ %inc9, %for.end ] |
| 316 | %cmp = icmp ult i32 %counter1.0, 100 |
| 317 | br i1 %cmp, label %for.body, label %for.end10 |
| 318 | |
| 319 | for.body: ; preds = %for.cond |
| 320 | %tmp = load i32, i32* %ptr, align 4 |
| 321 | %add = add i32 %tmp, %val.0 |
| 322 | %cmp224 = icmp ult i32 %start2, 10 |
| 323 | br i1 %cmp224, label %for.body3.lr.ph, label %for.end |
| 324 | |
| 325 | for.body3.lr.ph: ; preds = %for.body |
| 326 | br label %for.body3 |
| 327 | |
| 328 | for.body3: ; preds = %for.body3, %for.body3.lr.ph |
| 329 | %index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ] |
| 330 | %val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ] |
| 331 | %counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ] |
| 332 | %div = udiv i32 %counter1.0, 17 |
| 333 | %add4 = add i32 %div, %index.027 |
| 334 | %idxprom5 = zext i32 %add4 to i64 |
| 335 | %arrayidx6 = getelementptr inbounds i32, i32* %ptr, i64 %idxprom5 |
| 336 | %tmp1 = load i32, i32* %arrayidx6, align 4 |
| 337 | %add7 = add i32 %tmp1, %val.126 |
| 338 | %inc = add i32 %counter2.025, 1 |
| 339 | %cmp2 = icmp ult i32 %inc, 10 |
| 340 | br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge |
| 341 | |
| 342 | for.cond1.for.end_crit_edge: ; preds = %for.body3 |
| 343 | %split = phi i32 [ %add7, %for.body3 ] |
| 344 | br label %for.end |
| 345 | |
| 346 | for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body |
| 347 | %val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ] |
| 348 | %inc9 = add i32 %counter1.0, 1 |
| 349 | br label %for.cond |
| 350 | |
| 351 | for.end10: ; preds = %for.cond |
| 352 | %val.0.lcssa = phi i32 [ %val.0, %for.cond ] |
| 353 | ret i32 %val.0.lcssa |
| 354 | } |