| Tim Northover | 3b0846e | 2014-05-24 12:50:23 +0000 | [diff] [blame] | 1 | ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s | 
|  | 2 |  | 
|  | 3 | define <8 x i8> @v_dup8(i8 %A) nounwind { | 
|  | 4 | ;CHECK-LABEL: v_dup8: | 
|  | 5 | ;CHECK: dup.8b | 
|  | 6 | %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0 | 
|  | 7 | %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1 | 
|  | 8 | %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2 | 
|  | 9 | %tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3 | 
|  | 10 | %tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4 | 
|  | 11 | %tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5 | 
|  | 12 | %tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6 | 
|  | 13 | %tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7 | 
|  | 14 | ret <8 x i8> %tmp8 | 
|  | 15 | } | 
|  | 16 |  | 
|  | 17 | define <4 x i16> @v_dup16(i16 %A) nounwind { | 
|  | 18 | ;CHECK-LABEL: v_dup16: | 
|  | 19 | ;CHECK: dup.4h | 
|  | 20 | %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0 | 
|  | 21 | %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1 | 
|  | 22 | %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2 | 
|  | 23 | %tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3 | 
|  | 24 | ret <4 x i16> %tmp4 | 
|  | 25 | } | 
|  | 26 |  | 
|  | 27 | define <2 x i32> @v_dup32(i32 %A) nounwind { | 
|  | 28 | ;CHECK-LABEL: v_dup32: | 
|  | 29 | ;CHECK: dup.2s | 
|  | 30 | %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0 | 
|  | 31 | %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1 | 
|  | 32 | ret <2 x i32> %tmp2 | 
|  | 33 | } | 
|  | 34 |  | 
|  | 35 | define <2 x float> @v_dupfloat(float %A) nounwind { | 
|  | 36 | ;CHECK-LABEL: v_dupfloat: | 
|  | 37 | ;CHECK: dup.2s | 
|  | 38 | %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0 | 
|  | 39 | %tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1 | 
|  | 40 | ret <2 x float> %tmp2 | 
|  | 41 | } | 
|  | 42 |  | 
|  | 43 | define <16 x i8> @v_dupQ8(i8 %A) nounwind { | 
|  | 44 | ;CHECK-LABEL: v_dupQ8: | 
|  | 45 | ;CHECK: dup.16b | 
|  | 46 | %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0 | 
|  | 47 | %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1 | 
|  | 48 | %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2 | 
|  | 49 | %tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3 | 
|  | 50 | %tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4 | 
|  | 51 | %tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5 | 
|  | 52 | %tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6 | 
|  | 53 | %tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7 | 
|  | 54 | %tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8 | 
|  | 55 | %tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9 | 
|  | 56 | %tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10 | 
|  | 57 | %tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11 | 
|  | 58 | %tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12 | 
|  | 59 | %tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13 | 
|  | 60 | %tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14 | 
|  | 61 | %tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15 | 
|  | 62 | ret <16 x i8> %tmp16 | 
|  | 63 | } | 
|  | 64 |  | 
|  | 65 | define <8 x i16> @v_dupQ16(i16 %A) nounwind { | 
|  | 66 | ;CHECK-LABEL: v_dupQ16: | 
|  | 67 | ;CHECK: dup.8h | 
|  | 68 | %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0 | 
|  | 69 | %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1 | 
|  | 70 | %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2 | 
|  | 71 | %tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3 | 
|  | 72 | %tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4 | 
|  | 73 | %tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5 | 
|  | 74 | %tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6 | 
|  | 75 | %tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7 | 
|  | 76 | ret <8 x i16> %tmp8 | 
|  | 77 | } | 
|  | 78 |  | 
|  | 79 | define <4 x i32> @v_dupQ32(i32 %A) nounwind { | 
|  | 80 | ;CHECK-LABEL: v_dupQ32: | 
|  | 81 | ;CHECK: dup.4s | 
|  | 82 | %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0 | 
|  | 83 | %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1 | 
|  | 84 | %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2 | 
|  | 85 | %tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3 | 
|  | 86 | ret <4 x i32> %tmp4 | 
|  | 87 | } | 
|  | 88 |  | 
|  | 89 | define <4 x float> @v_dupQfloat(float %A) nounwind { | 
|  | 90 | ;CHECK-LABEL: v_dupQfloat: | 
|  | 91 | ;CHECK: dup.4s | 
|  | 92 | %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0 | 
|  | 93 | %tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1 | 
|  | 94 | %tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2 | 
|  | 95 | %tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3 | 
|  | 96 | ret <4 x float> %tmp4 | 
|  | 97 | } | 
|  | 98 |  | 
|  | 99 | ; Check to make sure it works with shuffles, too. | 
|  | 100 |  | 
|  | 101 | define <8 x i8> @v_shuffledup8(i8 %A) nounwind { | 
|  | 102 | ;CHECK-LABEL: v_shuffledup8: | 
|  | 103 | ;CHECK: dup.8b | 
|  | 104 | %tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0 | 
|  | 105 | %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer | 
|  | 106 | ret <8 x i8> %tmp2 | 
|  | 107 | } | 
|  | 108 |  | 
|  | 109 | define <4 x i16> @v_shuffledup16(i16 %A) nounwind { | 
|  | 110 | ;CHECK-LABEL: v_shuffledup16: | 
|  | 111 | ;CHECK: dup.4h | 
|  | 112 | %tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0 | 
|  | 113 | %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer | 
|  | 114 | ret <4 x i16> %tmp2 | 
|  | 115 | } | 
|  | 116 |  | 
|  | 117 | define <2 x i32> @v_shuffledup32(i32 %A) nounwind { | 
|  | 118 | ;CHECK-LABEL: v_shuffledup32: | 
|  | 119 | ;CHECK: dup.2s | 
|  | 120 | %tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0 | 
|  | 121 | %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer | 
|  | 122 | ret <2 x i32> %tmp2 | 
|  | 123 | } | 
|  | 124 |  | 
|  | 125 | define <2 x float> @v_shuffledupfloat(float %A) nounwind { | 
|  | 126 | ;CHECK-LABEL: v_shuffledupfloat: | 
|  | 127 | ;CHECK: dup.2s | 
|  | 128 | %tmp1 = insertelement <2 x float> undef, float %A, i32 0 | 
|  | 129 | %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer | 
|  | 130 | ret <2 x float> %tmp2 | 
|  | 131 | } | 
|  | 132 |  | 
|  | 133 | define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind { | 
|  | 134 | ;CHECK-LABEL: v_shuffledupQ8: | 
|  | 135 | ;CHECK: dup.16b | 
|  | 136 | %tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0 | 
|  | 137 | %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer | 
|  | 138 | ret <16 x i8> %tmp2 | 
|  | 139 | } | 
|  | 140 |  | 
|  | 141 | define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind { | 
|  | 142 | ;CHECK-LABEL: v_shuffledupQ16: | 
|  | 143 | ;CHECK: dup.8h | 
|  | 144 | %tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0 | 
|  | 145 | %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer | 
|  | 146 | ret <8 x i16> %tmp2 | 
|  | 147 | } | 
|  | 148 |  | 
|  | 149 | define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind { | 
|  | 150 | ;CHECK-LABEL: v_shuffledupQ32: | 
|  | 151 | ;CHECK: dup.4s | 
|  | 152 | %tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0 | 
|  | 153 | %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer | 
|  | 154 | ret <4 x i32> %tmp2 | 
|  | 155 | } | 
|  | 156 |  | 
|  | 157 | define <4 x float> @v_shuffledupQfloat(float %A) nounwind { | 
|  | 158 | ;CHECK-LABEL: v_shuffledupQfloat: | 
|  | 159 | ;CHECK: dup.4s | 
|  | 160 | %tmp1 = insertelement <4 x float> undef, float %A, i32 0 | 
|  | 161 | %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer | 
|  | 162 | ret <4 x float> %tmp2 | 
|  | 163 | } | 
|  | 164 |  | 
|  | 165 | define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind { | 
|  | 166 | ;CHECK-LABEL: vduplane8: | 
|  | 167 | ;CHECK: dup.8b | 
|  | 168 | %tmp1 = load <8 x i8>* %A | 
|  | 169 | %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > | 
|  | 170 | ret <8 x i8> %tmp2 | 
|  | 171 | } | 
|  | 172 |  | 
|  | 173 | define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind { | 
|  | 174 | ;CHECK-LABEL: vduplane16: | 
|  | 175 | ;CHECK: dup.4h | 
|  | 176 | %tmp1 = load <4 x i16>* %A | 
|  | 177 | %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > | 
|  | 178 | ret <4 x i16> %tmp2 | 
|  | 179 | } | 
|  | 180 |  | 
|  | 181 | define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind { | 
|  | 182 | ;CHECK-LABEL: vduplane32: | 
|  | 183 | ;CHECK: dup.2s | 
|  | 184 | %tmp1 = load <2 x i32>* %A | 
|  | 185 | %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 > | 
|  | 186 | ret <2 x i32> %tmp2 | 
|  | 187 | } | 
|  | 188 |  | 
|  | 189 | define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind { | 
|  | 190 | ;CHECK-LABEL: vduplanefloat: | 
|  | 191 | ;CHECK: dup.2s | 
|  | 192 | %tmp1 = load <2 x float>* %A | 
|  | 193 | %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 > | 
|  | 194 | ret <2 x float> %tmp2 | 
|  | 195 | } | 
|  | 196 |  | 
|  | 197 | define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind { | 
|  | 198 | ;CHECK-LABEL: vduplaneQ8: | 
|  | 199 | ;CHECK: dup.16b | 
|  | 200 | %tmp1 = load <8 x i8>* %A | 
|  | 201 | %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > | 
|  | 202 | ret <16 x i8> %tmp2 | 
|  | 203 | } | 
|  | 204 |  | 
|  | 205 | define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind { | 
|  | 206 | ;CHECK-LABEL: vduplaneQ16: | 
|  | 207 | ;CHECK: dup.8h | 
|  | 208 | %tmp1 = load <4 x i16>* %A | 
|  | 209 | %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 > | 
|  | 210 | ret <8 x i16> %tmp2 | 
|  | 211 | } | 
|  | 212 |  | 
|  | 213 | define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind { | 
|  | 214 | ;CHECK-LABEL: vduplaneQ32: | 
|  | 215 | ;CHECK: dup.4s | 
|  | 216 | %tmp1 = load <2 x i32>* %A | 
|  | 217 | %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > | 
|  | 218 | ret <4 x i32> %tmp2 | 
|  | 219 | } | 
|  | 220 |  | 
|  | 221 | define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind { | 
|  | 222 | ;CHECK-LABEL: vduplaneQfloat: | 
|  | 223 | ;CHECK: dup.4s | 
|  | 224 | %tmp1 = load <2 x float>* %A | 
|  | 225 | %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > | 
|  | 226 | ret <4 x float> %tmp2 | 
|  | 227 | } | 
|  | 228 |  | 
|  | 229 | define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone { | 
|  | 230 | ;CHECK-LABEL: foo: | 
|  | 231 | ;CHECK: dup.2d | 
|  | 232 | entry: | 
|  | 233 | %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1> | 
|  | 234 | ret <2 x i64> %0 | 
|  | 235 | } | 
|  | 236 |  | 
|  | 237 | define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone { | 
|  | 238 | ;CHECK-LABEL: bar: | 
|  | 239 | ;CHECK: dup.2d | 
|  | 240 | entry: | 
|  | 241 | %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0> | 
|  | 242 | ret <2 x i64> %0 | 
|  | 243 | } | 
|  | 244 |  | 
|  | 245 | define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone { | 
|  | 246 | ;CHECK-LABEL: baz: | 
|  | 247 | ;CHECK: dup.2d | 
|  | 248 | entry: | 
|  | 249 | %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1> | 
|  | 250 | ret <2 x double> %0 | 
|  | 251 | } | 
|  | 252 |  | 
|  | 253 | define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone { | 
|  | 254 | ;CHECK-LABEL: qux: | 
|  | 255 | ;CHECK: dup.2d | 
|  | 256 | entry: | 
|  | 257 | %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0> | 
|  | 258 | ret <2 x double> %0 | 
|  | 259 | } | 
|  | 260 |  | 
|  | 261 | define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone  { | 
|  | 262 | ; CHECK-LABEL: f: | 
|  | 263 | ; CHECK-NEXT: fmov s0, w0 | 
|  | 264 | ; CHECK-NEXT: ins.s v0[1], w1 | 
|  | 265 | ; CHECK-NEXT: ret | 
|  | 266 | %vecinit = insertelement <2 x i32> undef, i32 %a, i32 0 | 
|  | 267 | %vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1 | 
|  | 268 | ret <2 x i32> %vecinit1 | 
|  | 269 | } | 
|  | 270 |  | 
|  | 271 | define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone  { | 
|  | 272 | ; CHECK-LABEL: g: | 
|  | 273 | ; CHECK-NEXT: fmov s0, w0 | 
|  | 274 | ; CHECK-NEXT: ins.s v0[1], w1 | 
|  | 275 | ; CHECK-NEXT: ins.s v0[2], w1 | 
|  | 276 | ; CHECK-NEXT: ins.s v0[3], w0 | 
|  | 277 | ; CHECK-NEXT: ret | 
|  | 278 | %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0 | 
|  | 279 | %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1 | 
|  | 280 | %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2 | 
|  | 281 | %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a, i32 3 | 
|  | 282 | ret <4 x i32> %vecinit3 | 
|  | 283 | } | 
|  | 284 |  | 
|  | 285 | define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone  { | 
|  | 286 | ; CHECK-LABEL: h: | 
|  | 287 | ; CHECK-NEXT: fmov d0, x0 | 
|  | 288 | ; CHECK-NEXT: ins.d v0[1], x1 | 
|  | 289 | ; CHECK-NEXT: ret | 
|  | 290 | %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0 | 
|  | 291 | %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1 | 
|  | 292 | ret <2 x i64> %vecinit1 | 
|  | 293 | } | 
|  | 294 |  | 
|  | 295 | ; We used to spot this as a BUILD_VECTOR implementable by dup, but assume that | 
|  | 296 | ; the single value needed was of the same type as the vector. This is false if | 
|  | 297 | ; the scalar corresponding to the vector type is illegal (e.g. a <4 x i16> | 
|  | 298 | ; BUILD_VECTOR will have an i32 as its source). In that case, the operation is | 
|  | 299 | ; not a simple "dup vD.4h, vN.h[idx]" after all, and we crashed. | 
|  | 300 | ; | 
|  | 301 | ; *However*, it is a dup vD.4h, vN.h[2*idx]. | 
|  | 302 | define <4 x i16> @test_build_illegal(<4 x i32> %in) { | 
|  | 303 | ; CHECK-LABEL: test_build_illegal: | 
|  | 304 | ; CHECK: dup.4h v0, v0[6] | 
|  | 305 | %val = extractelement <4 x i32> %in, i32 3 | 
|  | 306 | %smallval = trunc i32 %val to i16 | 
|  | 307 | %vec = insertelement <4x i16> undef, i16 %smallval, i32 3 | 
|  | 308 |  | 
|  | 309 | ret <4 x i16> %vec | 
|  | 310 | } | 
|  | 311 |  | 
|  | 312 | ; We used to inherit an already extract_subvectored v4i16 from | 
|  | 313 | ; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing | 
|  | 314 | ; the formation of an indexed-by-7 MLS. | 
|  | 315 | define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 { | 
|  | 316 | ; CHECK-LABEL: test_high_splat: | 
|  | 317 | ; CHECK: mls.4h v0, v1, v2[7] | 
|  | 318 | entry: | 
|  | 319 | %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> | 
|  | 320 | %mul = mul <4 x i16> %shuffle, %b | 
|  | 321 | %sub = sub <4 x i16> %a, %mul | 
|  | 322 | ret <4 x i16> %sub | 
|  | 323 | } |