Krzysztof Parzyszek | 7793ddb | 2016-02-12 22:53:35 +0000 | [diff] [blame^] | 1 | ; RUN: llc -march=hexagon -mcpu=hexagonv60 -enable-hexagon-hvx-double \ |
| 2 | ; RUN: -hexagon-bit=0 < %s | FileCheck %s |
| 3 | |
| 4 | ; This spill should be eliminated. |
| 5 | ; CHECK-NOT: vmem(r29+#6) |
| 6 | |
| 7 | define void @test(i8* noalias nocapture %key, i8* noalias nocapture %data1) #0 { |
| 8 | entry: |
| 9 | %0 = bitcast i8* %key to <32 x i32>* |
| 10 | %1 = bitcast i8* %data1 to <32 x i32>* |
| 11 | br label %for.body |
| 12 | |
| 13 | for.body: |
| 14 | %pkey.0542 = phi <32 x i32>* [ %0, %entry ], [ null, %for.body ] |
| 15 | %pdata0.0541 = phi <32 x i32>* [ null, %entry ], [ %add.ptr48, %for.body ] |
| 16 | %pdata1.0540 = phi <32 x i32>* [ %1, %entry ], [ %add.ptr49, %for.body ] |
| 17 | %dAccum0.0539 = phi <64 x i32> [ undef, %entry ], [ %86, %for.body ] |
| 18 | %2 = load <32 x i32>, <32 x i32>* %pkey.0542, align 128 |
| 19 | %3 = load <32 x i32>, <32 x i32>* %pdata0.0541, align 128 |
| 20 | %4 = load <32 x i32>, <32 x i32>* undef, align 128 |
| 21 | %arrayidx4 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 2 |
| 22 | %5 = load <32 x i32>, <32 x i32>* %arrayidx4, align 128 |
| 23 | %arrayidx5 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 2 |
| 24 | %6 = load <32 x i32>, <32 x i32>* %arrayidx5, align 128 |
| 25 | %7 = load <32 x i32>, <32 x i32>* null, align 128 |
| 26 | %8 = load <32 x i32>, <32 x i32>* undef, align 128 |
| 27 | %9 = load <32 x i32>, <32 x i32>* null, align 128 |
| 28 | %arrayidx9 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 3 |
| 29 | %arrayidx10 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 6 |
| 30 | %10 = load <32 x i32>, <32 x i32>* %arrayidx10, align 128 |
| 31 | %arrayidx12 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 4 |
| 32 | %11 = load <32 x i32>, <32 x i32>* %arrayidx12, align 128 |
| 33 | %arrayidx13 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 8 |
| 34 | %arrayidx14 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 8 |
| 35 | %12 = load <32 x i32>, <32 x i32>* %arrayidx14, align 128 |
| 36 | %arrayidx15 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 5 |
| 37 | %13 = load <32 x i32>, <32 x i32>* %arrayidx15, align 128 |
| 38 | %arrayidx16 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 10 |
| 39 | %arrayidx17 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 10 |
| 40 | %14 = load <32 x i32>, <32 x i32>* %arrayidx17, align 128 |
| 41 | %arrayidx18 = getelementptr inbounds <32 x i32>, <32 x i32>* %pkey.0542, i32 6 |
| 42 | %15 = load <32 x i32>, <32 x i32>* %arrayidx18, align 128 |
| 43 | %arrayidx19 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 12 |
| 44 | %16 = load <32 x i32>, <32 x i32>* %arrayidx19, align 128 |
| 45 | %arrayidx20 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 12 |
| 46 | %17 = load <32 x i32>, <32 x i32>* %arrayidx20, align 128 |
| 47 | %arrayidx22 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 14 |
| 48 | %18 = load <32 x i32>, <32 x i32>* %arrayidx22, align 128 |
| 49 | %arrayidx23 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 14 |
| 50 | %19 = load <32 x i32>, <32 x i32>* %arrayidx23, align 128 |
| 51 | %20 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %2, <32 x i32> %11) |
| 52 | %21 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %11, <32 x i32> %2) |
| 53 | %22 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %2, <32 x i32> %11) |
| 54 | %23 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> undef, <32 x i32> %3) |
| 55 | %24 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %20, <32 x i32> %12, <32 x i32> undef) |
| 56 | %25 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %7, <32 x i32> %15) |
| 57 | %26 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %15, <32 x i32> %7) |
| 58 | %27 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %7, <32 x i32> %15) |
| 59 | %28 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %16, <32 x i32> %8) |
| 60 | %29 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %8, <32 x i32> %16) |
| 61 | %30 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %17, <32 x i32> %9) |
| 62 | %31 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %25, <32 x i32> %9, <32 x i32> %17) |
| 63 | %32 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %4, <32 x i32> %13) |
| 64 | %33 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %13, <32 x i32> %4) |
| 65 | %34 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %4, <32 x i32> %13) |
| 66 | %35 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> undef, <32 x i32> %5) |
| 67 | %36 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %5, <32 x i32> undef) |
| 68 | %37 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %14, <32 x i32> %6) |
| 69 | %38 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %32, <32 x i32> %6, <32 x i32> %14) |
| 70 | %39 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> undef) |
| 71 | %40 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> zeroinitializer) |
| 72 | %41 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %18, <32 x i32> %10) |
| 73 | %42 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %10, <32 x i32> %18) |
| 74 | %43 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> %19, <32 x i32> undef) |
| 75 | %44 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> zeroinitializer, <32 x i32> undef, <32 x i32> %19) |
| 76 | %45 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %21, <32 x i32> %26) |
| 77 | %46 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %26, <32 x i32> %21) |
| 78 | %47 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %21, <32 x i32> %26) |
| 79 | %48 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %28, <32 x i32> %23) |
| 80 | %49 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %23, <32 x i32> %28) |
| 81 | %50 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %30, <32 x i32> %24) |
| 82 | %51 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %45, <32 x i32> %24, <32 x i32> %30) |
| 83 | %52 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %22, <32 x i32> %27) |
| 84 | %53 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %27, <32 x i32> %22) |
| 85 | %54 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %22, <32 x i32> %27) |
| 86 | %55 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> %29, <32 x i32> undef) |
| 87 | %56 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %52, <32 x i32> undef, <32 x i32> %31) |
| 88 | %57 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %33, <32 x i32> %39) |
| 89 | %58 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %39, <32 x i32> %33) |
| 90 | %59 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %33, <32 x i32> %39) |
| 91 | %60 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %41, <32 x i32> %35) |
| 92 | %61 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %57, <32 x i32> %43, <32 x i32> %37) |
| 93 | %62 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %34, <32 x i32> %40) |
| 94 | %63 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %42, <32 x i32> %36) |
| 95 | %64 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %62, <32 x i32> %38, <32 x i32> %44) |
| 96 | %65 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %46, <32 x i32> %58) |
| 97 | %66 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %58, <32 x i32> %46) |
| 98 | %67 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %60, <32 x i32> %48) |
| 99 | %68 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %65, <32 x i32> %61, <32 x i32> %50) |
| 100 | %69 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %47, <32 x i32> %59) |
| 101 | %70 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %69, <32 x i32> %51, <32 x i32> zeroinitializer) |
| 102 | %71 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %53, <32 x i32> zeroinitializer) |
| 103 | %72 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %71, <32 x i32> %63, <32 x i32> %55) |
| 104 | %73 = tail call <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32> %54, <32 x i32> undef) |
| 105 | %74 = tail call <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1> %73, <32 x i32> %56, <32 x i32> %64) |
| 106 | %75 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %68, <32 x i32> %67) |
| 107 | %76 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %70, <32 x i32> undef) |
| 108 | %77 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> zeroinitializer, <32 x i32> %72) |
| 109 | %78 = tail call <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32> %74, <32 x i32> zeroinitializer) |
| 110 | %79 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %dAccum0.0539, <32 x i32> %75, i32 65537) |
| 111 | %80 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %79, <32 x i32> zeroinitializer, i32 65537) |
| 112 | %81 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %80, <32 x i32> zeroinitializer, i32 65537) |
| 113 | %82 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %81, <32 x i32> %76, i32 65537) |
| 114 | %83 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %82, <32 x i32> %77, i32 65537) |
| 115 | %84 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %83, <32 x i32> zeroinitializer, i32 65537) |
| 116 | %85 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %84, <32 x i32> undef, i32 65537) |
| 117 | %86 = tail call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %85, <32 x i32> %78, i32 65537) |
| 118 | store <32 x i32> %66, <32 x i32>* %pkey.0542, align 128 |
| 119 | store <32 x i32> %75, <32 x i32>* %pdata0.0541, align 128 |
| 120 | store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx4, align 128 |
| 121 | store <32 x i32> zeroinitializer, <32 x i32>* undef, align 128 |
| 122 | store <32 x i32> zeroinitializer, <32 x i32>* %arrayidx20, align 128 |
| 123 | store <32 x i32> zeroinitializer, <32 x i32>* null, align 128 |
| 124 | %add.ptr48 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata0.0541, i32 16 |
| 125 | %add.ptr49 = getelementptr inbounds <32 x i32>, <32 x i32>* %pdata1.0540, i32 16 |
| 126 | br i1 false, label %for.end, label %for.body |
| 127 | |
| 128 | for.end: |
| 129 | %87 = tail call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %86) |
| 130 | ret void |
| 131 | } |
| 132 | |
| 133 | declare <1024 x i1> @llvm.hexagon.V6.vgtb.128B(<32 x i32>, <32 x i32>) #1 |
| 134 | |
| 135 | declare <32 x i32> @llvm.hexagon.V6.vmux.128B(<1024 x i1>, <32 x i32>, <32 x i32>) #1 |
| 136 | |
| 137 | declare <32 x i32> @llvm.hexagon.V6.vshuffeb.128B(<32 x i32>, <32 x i32>) #1 |
| 138 | |
| 139 | declare <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32>, <32 x i32>, i32) #1 |
| 140 | |
| 141 | declare <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32>) #1 |
| 142 | |
| 143 | attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } |
| 144 | attributes #1 = { nounwind readnone } |