| Saleem Abdulrasool | 7258735 | 2014-04-03 16:01:44 +0000 | [diff] [blame] | 1 | ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 2 | ; Implement ctpop with vcnt |
| 3 | |
| 4 | define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 5 | ;CHECK-LABEL: vcnt8: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 6 | ;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}} |
| 7 | %tmp1 = load <8 x i8>* %A |
| 8 | %tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1) |
| 9 | ret <8 x i8> %tmp2 |
| 10 | } |
| 11 | |
| 12 | define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 13 | ;CHECK-LABEL: vcntQ8: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 14 | ;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}} |
| 15 | %tmp1 = load <16 x i8>* %A |
| 16 | %tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1) |
| 17 | ret <16 x i8> %tmp2 |
| 18 | } |
| 19 | |
| 20 | define <4 x i16> @vcnt16(<4 x i16>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 21 | ; CHECK-LABEL: vcnt16: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 22 | ; CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}} |
| 23 | ; CHECK: vrev16.8 {{d[0-9]+}}, {{d[0-9]+}} |
| 24 | ; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} |
| 25 | ; CHECK: vuzp.8 {{d[0-9]+}}, {{d[0-9]+}} |
| 26 | ; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}} |
| 27 | %tmp1 = load <4 x i16>* %A |
| 28 | %tmp2 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %tmp1) |
| 29 | ret <4 x i16> %tmp2 |
| 30 | } |
| 31 | |
| 32 | define <8 x i16> @vcntQ16(<8 x i16>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 33 | ; CHECK-LABEL: vcntQ16: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 34 | ; CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}} |
| 35 | ; CHECK: vrev16.8 {{q[0-9]+}}, {{q[0-9]+}} |
| 36 | ; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} |
| 37 | ; CHECK: vuzp.8 {{q[0-9]+}}, {{q[0-9]+}} |
| 38 | ; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}} |
| 39 | %tmp1 = load <8 x i16>* %A |
| 40 | %tmp2 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %tmp1) |
| 41 | ret <8 x i16> %tmp2 |
| 42 | } |
| 43 | |
| 44 | define <2 x i32> @vcnt32(<2 x i32>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 45 | ; CHECK-LABEL: vcnt32: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 46 | ; CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}} |
| 47 | ; CHECK: vrev16.8 {{d[0-9]+}}, {{d[0-9]+}} |
| 48 | ; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} |
| 49 | ; CHECK: vuzp.8 {{d[0-9]+}}, {{d[0-9]+}} |
| 50 | ; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}} |
| 51 | ; CHECK: vrev32.16 {{d[0-9]+}}, {{d[0-9]+}} |
| 52 | ; CHECK: vuzp.16 {{d[0-9]+}}, {{d[0-9]+}} |
| 53 | ; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}} |
| 54 | %tmp1 = load <2 x i32>* %A |
| 55 | %tmp2 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %tmp1) |
| 56 | ret <2 x i32> %tmp2 |
| 57 | } |
| 58 | |
| 59 | define <4 x i32> @vcntQ32(<4 x i32>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 60 | ; CHECK-LABEL: vcntQ32: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 61 | ; CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}} |
| 62 | ; CHECK: vrev16.8 {{q[0-9]+}}, {{q[0-9]+}} |
| 63 | ; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} |
| 64 | ; CHECK: vuzp.8 {{q[0-9]+}}, {{q[0-9]+}} |
| 65 | ; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}} |
| 66 | ; CHECK: vrev32.16 {{q[0-9]+}}, {{q[0-9]+}} |
| 67 | ; CHECK: vuzp.16 {{q[0-9]+}}, {{q[0-9]+}} |
| 68 | ; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}} |
| 69 | %tmp1 = load <4 x i32>* %A |
| 70 | %tmp2 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %tmp1) |
| 71 | ret <4 x i32> %tmp2 |
| 72 | } |
| 73 | |
| 74 | declare <8 x i8> @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone |
| 75 | declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone |
| 76 | declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) nounwind readnone |
| 77 | declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone |
| 78 | declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone |
| 79 | declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone |
| 80 | |
| 81 | define <8 x i8> @vclz8(<8 x i8>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 82 | ;CHECK-LABEL: vclz8: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 83 | ;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}} |
| 84 | %tmp1 = load <8 x i8>* %A |
| 85 | %tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0) |
| 86 | ret <8 x i8> %tmp2 |
| 87 | } |
| 88 | |
| 89 | define <4 x i16> @vclz16(<4 x i16>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 90 | ;CHECK-LABEL: vclz16: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 91 | ;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}} |
| 92 | %tmp1 = load <4 x i16>* %A |
| 93 | %tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0) |
| 94 | ret <4 x i16> %tmp2 |
| 95 | } |
| 96 | |
| 97 | define <2 x i32> @vclz32(<2 x i32>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 98 | ;CHECK-LABEL: vclz32: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 99 | ;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}} |
| 100 | %tmp1 = load <2 x i32>* %A |
| 101 | %tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0) |
| 102 | ret <2 x i32> %tmp2 |
| 103 | } |
| 104 | |
| 105 | define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 106 | ;CHECK-LABEL: vclzQ8: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 107 | ;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}} |
| 108 | %tmp1 = load <16 x i8>* %A |
| 109 | %tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0) |
| 110 | ret <16 x i8> %tmp2 |
| 111 | } |
| 112 | |
| 113 | define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 114 | ;CHECK-LABEL: vclzQ16: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 115 | ;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}} |
| 116 | %tmp1 = load <8 x i16>* %A |
| 117 | %tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0) |
| 118 | ret <8 x i16> %tmp2 |
| 119 | } |
| 120 | |
| 121 | define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 122 | ;CHECK-LABEL: vclzQ32: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 123 | ;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}} |
| 124 | %tmp1 = load <4 x i32>* %A |
| 125 | %tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0) |
| 126 | ret <4 x i32> %tmp2 |
| 127 | } |
| 128 | |
| 129 | declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone |
| 130 | declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone |
| 131 | declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone |
| 132 | |
| 133 | declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone |
| 134 | declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone |
| 135 | declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone |
| 136 | |
| 137 | define <8 x i8> @vclss8(<8 x i8>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 138 | ;CHECK-LABEL: vclss8: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 139 | ;CHECK: vcls.s8 |
| 140 | %tmp1 = load <8 x i8>* %A |
| 141 | %tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1) |
| 142 | ret <8 x i8> %tmp2 |
| 143 | } |
| 144 | |
| 145 | define <4 x i16> @vclss16(<4 x i16>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 146 | ;CHECK-LABEL: vclss16: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 147 | ;CHECK: vcls.s16 |
| 148 | %tmp1 = load <4 x i16>* %A |
| 149 | %tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1) |
| 150 | ret <4 x i16> %tmp2 |
| 151 | } |
| 152 | |
| 153 | define <2 x i32> @vclss32(<2 x i32>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 154 | ;CHECK-LABEL: vclss32: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 155 | ;CHECK: vcls.s32 |
| 156 | %tmp1 = load <2 x i32>* %A |
| 157 | %tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1) |
| 158 | ret <2 x i32> %tmp2 |
| 159 | } |
| 160 | |
| 161 | define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 162 | ;CHECK-LABEL: vclsQs8: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 163 | ;CHECK: vcls.s8 |
| 164 | %tmp1 = load <16 x i8>* %A |
| 165 | %tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1) |
| 166 | ret <16 x i8> %tmp2 |
| 167 | } |
| 168 | |
| 169 | define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 170 | ;CHECK-LABEL: vclsQs16: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 171 | ;CHECK: vcls.s16 |
| 172 | %tmp1 = load <8 x i16>* %A |
| 173 | %tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1) |
| 174 | ret <8 x i16> %tmp2 |
| 175 | } |
| 176 | |
| 177 | define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind { |
| Stephen Lin | d24ab20 | 2013-07-14 06:24:09 +0000 | [diff] [blame] | 178 | ;CHECK-LABEL: vclsQs32: |
| Evan Cheng | b4eae13 | 2012-12-04 22:41:50 +0000 | [diff] [blame] | 179 | ;CHECK: vcls.s32 |
| 180 | %tmp1 = load <4 x i32>* %A |
| 181 | %tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1) |
| 182 | ret <4 x i32> %tmp2 |
| 183 | } |
| 184 | |
| 185 | declare <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone |
| 186 | declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone |
| 187 | declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone |
| 188 | |
| 189 | declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone |
| 190 | declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone |
| 191 | declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone |