Brendon Cahoon | 7c8a3b0 | 2015-05-14 20:36:19 +0000 | [diff] [blame^] | 1 | ; RUN: llc -O3 -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s |
| 2 | ; |
| 3 | ; Generate hardware loop when unknown trip count loop is vectorized. |
| 4 | |
| 5 | ; CHECK: loop0(.LBB{{[0-9]*}}_{{[0-9]*}}, r{{[0-9]+}}) |
| 6 | ; CHECK: endloop0 |
| 7 | ; CHECK: loop0(.LBB{{[0-9]*}}_{{[0-9]*}}, r{{[0-9]+}}) |
| 8 | ; CHECK: endloop0 |
| 9 | |
| 10 | @A = common global [1000 x i32] zeroinitializer, align 8 |
| 11 | @B = common global [1000 x i32] zeroinitializer, align 8 |
| 12 | |
| 13 | define i32 @dotprod2(i32 %count) #0 { |
| 14 | entry.split: |
| 15 | %cmp6 = icmp sgt i32 %count, 0 |
| 16 | br i1 %cmp6, label %polly.cond, label %for.end |
| 17 | |
| 18 | for.end.loopexit: |
| 19 | br label %for.end |
| 20 | |
| 21 | for.end: |
| 22 | %sum.0.lcssa.reg2mem.0.load37 = phi i32 [ 0, %entry.split ], [ %p_add34, %polly.loop_if13 ], [ %p_add, %for.end.loopexit ] |
| 23 | ret i32 %sum.0.lcssa.reg2mem.0.load37 |
| 24 | |
| 25 | polly.cond: |
| 26 | %0 = icmp sgt i32 %count, 1 |
| 27 | br i1 %0, label %polly.loop_if, label %polly.loop_if13 |
| 28 | |
| 29 | polly.loop_exit.loopexit: |
| 30 | br label %polly.loop_exit |
| 31 | |
| 32 | polly.loop_exit: |
| 33 | %1 = phi <2 x i32> [ zeroinitializer, %polly.loop_if ], [ %addp_vec, %polly.loop_exit.loopexit ] |
| 34 | %2 = extractelement <2 x i32> %1, i32 0 |
| 35 | %3 = extractelement <2 x i32> %1, i32 1 |
| 36 | %add_sum = add i32 %2, %3 |
| 37 | br label %polly.loop_if13 |
| 38 | |
| 39 | polly.loop_if: |
| 40 | %4 = add i32 %count, -1 |
| 41 | %leftover_lb = and i32 %4, -2 |
| 42 | %polly.loop_guard = icmp eq i32 %leftover_lb, 0 |
| 43 | br i1 %polly.loop_guard, label %polly.loop_exit, label %polly.loop_preheader |
| 44 | |
| 45 | polly.stmt.for.body: |
| 46 | %addp_vec28 = phi <2 x i32> [ zeroinitializer, %polly.loop_preheader ], [ %addp_vec, %polly.stmt.for.body ] |
| 47 | %scevgep.phi = phi i32* [ getelementptr inbounds ([1000 x i32], [1000 x i32]* @A, i32 0, i32 0), %polly.loop_preheader ], [ %scevgep.inc, %polly.stmt.for.body ] |
| 48 | %scevgep9.phi = phi i32* [ getelementptr inbounds ([1000 x i32], [1000 x i32]* @B, i32 0, i32 0), %polly.loop_preheader ], [ %scevgep9.inc, %polly.stmt.for.body ] |
| 49 | %polly.indvar = phi i32 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.for.body ] |
| 50 | %vector_ptr = bitcast i32* %scevgep.phi to <2 x i32>* |
| 51 | %_p_vec_full = load <2 x i32>, <2 x i32>* %vector_ptr, align 8 |
| 52 | %vector_ptr10 = bitcast i32* %scevgep9.phi to <2 x i32>* |
| 53 | %_p_vec_full11 = load <2 x i32>, <2 x i32>* %vector_ptr10, align 8 |
| 54 | %mulp_vec = mul <2 x i32> %_p_vec_full11, %_p_vec_full |
| 55 | %addp_vec = add <2 x i32> %mulp_vec, %addp_vec28 |
| 56 | %polly.indvar_next = add nsw i32 %polly.indvar, 2 |
| 57 | %polly.loop_cond = icmp eq i32 %polly.indvar, %polly.adjust_ub |
| 58 | %scevgep.inc = getelementptr i32, i32* %scevgep.phi, i32 2 |
| 59 | %scevgep9.inc = getelementptr i32, i32* %scevgep9.phi, i32 2 |
| 60 | br i1 %polly.loop_cond, label %polly.loop_exit.loopexit, label %polly.stmt.for.body |
| 61 | |
| 62 | polly.loop_preheader: |
| 63 | %polly.adjust_ub = add i32 %leftover_lb, -2 |
| 64 | br label %polly.stmt.for.body |
| 65 | |
| 66 | polly.loop_if13: |
| 67 | %p_add34 = phi i32 [ 0, %polly.cond ], [ %add_sum, %polly.loop_exit ] |
| 68 | %merge.lb = phi i32 [ 0, %polly.cond ], [ %leftover_lb, %polly.loop_exit ] |
| 69 | %polly.loop_guard17 = icmp slt i32 %merge.lb, %count |
| 70 | br i1 %polly.loop_guard17, label %polly.loop_preheader15, label %for.end |
| 71 | |
| 72 | polly.stmt.for.body22: |
| 73 | %p_add30 = phi i32 [ %p_add34, %polly.loop_preheader15 ], [ %p_add, %polly.stmt.for.body22 ] |
| 74 | %polly.indvar18 = phi i32 [ %merge.lb, %polly.loop_preheader15 ], [ %polly.indvar_next19, %polly.stmt.for.body22 ] |
| 75 | %5 = tail call i32 @llvm.annotation.i32(i32 %polly.indvar18, i8* null, i8* null, i32 0), !polly.loop.smallTripCount !0 |
| 76 | %scevgep23 = getelementptr [1000 x i32], [1000 x i32]* @A, i32 0, i32 %polly.indvar18 |
| 77 | %_p_scalar_ = load i32, i32* %scevgep23, align 4 |
| 78 | %scevgep24 = getelementptr [1000 x i32], [1000 x i32]* @B, i32 0, i32 %polly.indvar18 |
| 79 | %_p_scalar_25 = load i32, i32* %scevgep24, align 4 |
| 80 | %p_mul = mul nsw i32 %_p_scalar_25, %_p_scalar_ |
| 81 | %p_add = add nsw i32 %p_mul, %p_add30 |
| 82 | %polly.indvar_next19 = add nsw i32 %polly.indvar18, 1 |
| 83 | %polly.loop_cond21 = icmp slt i32 %polly.indvar18, %polly.adjust_ub20 |
| 84 | br i1 %polly.loop_cond21, label %polly.stmt.for.body22, label %for.end.loopexit |
| 85 | |
| 86 | polly.loop_preheader15: |
| 87 | %polly.adjust_ub20 = add i32 %count, -1 |
| 88 | br label %polly.stmt.for.body22 |
| 89 | } |
| 90 | |
| 91 | declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) #1 |
| 92 | |
| 93 | !0 = !{} |