Brendon Cahoon | 254f889 | 2016-07-29 16:44:44 +0000 | [diff] [blame] | 1 | ; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner < %s | FileCheck %s |
| 2 | ; RUN: llc -march=hexagon -mcpu=hexagonv5 -O2 < %s | FileCheck %s |
| 3 | ; RUN: llc -march=hexagon -mcpu=hexagonv5 -O3 < %s | FileCheck %s |
| 4 | ; |
| 5 | ; Check that we pipeline a vectorized dot product in a single packet. |
| 6 | ; |
| 7 | ; CHECK: { |
| 8 | ; CHECK: += mpyi |
| 9 | ; CHECK: += mpyi |
| 10 | ; CHECK: memd |
| 11 | ; CHECK: memd |
| 12 | ; CHECK: } :endloop0 |
| 13 | |
| 14 | @a = common global [5000 x i32] zeroinitializer, align 8 |
| 15 | @b = common global [5000 x i32] zeroinitializer, align 8 |
| 16 | |
| 17 | define i32 @vecMultGlobal() { |
| 18 | entry: |
| 19 | br label %polly.loop_body |
| 20 | |
| 21 | polly.loop_after: |
| 22 | %0 = extractelement <2 x i32> %addp_vec, i32 0 |
| 23 | %1 = extractelement <2 x i32> %addp_vec, i32 1 |
| 24 | %add_sum = add i32 %0, %1 |
| 25 | ret i32 %add_sum |
| 26 | |
| 27 | polly.loop_body: |
| 28 | %polly.loopiv13 = phi i32 [ 0, %entry ], [ %polly.next_loopiv, %polly.loop_body ] |
| 29 | %reduction.012 = phi <2 x i32> [ zeroinitializer, %entry ], [ %addp_vec, %polly.loop_body ] |
| 30 | %polly.next_loopiv = add nsw i32 %polly.loopiv13, 2 |
| 31 | %p_arrayidx1 = getelementptr [5000 x i32], [5000 x i32]* @b, i32 0, i32 %polly.loopiv13 |
| 32 | %p_arrayidx = getelementptr [5000 x i32], [5000 x i32]* @a, i32 0, i32 %polly.loopiv13 |
| 33 | %vector_ptr = bitcast i32* %p_arrayidx1 to <2 x i32>* |
| 34 | %_p_vec_full = load <2 x i32>, <2 x i32>* %vector_ptr, align 8 |
| 35 | %vector_ptr7 = bitcast i32* %p_arrayidx to <2 x i32>* |
| 36 | %_p_vec_full8 = load <2 x i32>, <2 x i32>* %vector_ptr7, align 8 |
| 37 | %mulp_vec = mul <2 x i32> %_p_vec_full8, %_p_vec_full |
| 38 | %addp_vec = add <2 x i32> %mulp_vec, %reduction.012 |
| 39 | %2 = icmp slt i32 %polly.next_loopiv, 5000 |
| 40 | br i1 %2, label %polly.loop_body, label %polly.loop_after |
| 41 | } |