blob: 0c56b46c417605dbccc92810dd9472f70754a406 [file] [log] [blame]
Tim Northover3b0846e2014-05-24 12:50:23 +00001; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
2
3define <8 x i8> @v_dup8(i8 %A) nounwind {
4;CHECK-LABEL: v_dup8:
5;CHECK: dup.8b
6 %tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
7 %tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
8 %tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
9 %tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
10 %tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
11 %tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
12 %tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
13 %tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
14 ret <8 x i8> %tmp8
15}
16
17define <4 x i16> @v_dup16(i16 %A) nounwind {
18;CHECK-LABEL: v_dup16:
19;CHECK: dup.4h
20 %tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
21 %tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
22 %tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
23 %tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
24 ret <4 x i16> %tmp4
25}
26
27define <2 x i32> @v_dup32(i32 %A) nounwind {
28;CHECK-LABEL: v_dup32:
29;CHECK: dup.2s
30 %tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
31 %tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
32 ret <2 x i32> %tmp2
33}
34
35define <2 x float> @v_dupfloat(float %A) nounwind {
36;CHECK-LABEL: v_dupfloat:
37;CHECK: dup.2s
38 %tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
39 %tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
40 ret <2 x float> %tmp2
41}
42
43define <16 x i8> @v_dupQ8(i8 %A) nounwind {
44;CHECK-LABEL: v_dupQ8:
45;CHECK: dup.16b
46 %tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
47 %tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
48 %tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
49 %tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
50 %tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
51 %tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
52 %tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
53 %tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
54 %tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
55 %tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
56 %tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
57 %tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
58 %tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
59 %tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
60 %tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
61 %tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
62 ret <16 x i8> %tmp16
63}
64
65define <8 x i16> @v_dupQ16(i16 %A) nounwind {
66;CHECK-LABEL: v_dupQ16:
67;CHECK: dup.8h
68 %tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
69 %tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
70 %tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
71 %tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
72 %tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
73 %tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
74 %tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
75 %tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
76 ret <8 x i16> %tmp8
77}
78
79define <4 x i32> @v_dupQ32(i32 %A) nounwind {
80;CHECK-LABEL: v_dupQ32:
81;CHECK: dup.4s
82 %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
83 %tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
84 %tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
85 %tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
86 ret <4 x i32> %tmp4
87}
88
89define <4 x float> @v_dupQfloat(float %A) nounwind {
90;CHECK-LABEL: v_dupQfloat:
91;CHECK: dup.4s
92 %tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
93 %tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
94 %tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
95 %tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
96 ret <4 x float> %tmp4
97}
98
99; Check to make sure it works with shuffles, too.
100
101define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
102;CHECK-LABEL: v_shuffledup8:
103;CHECK: dup.8b
104 %tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
105 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
106 ret <8 x i8> %tmp2
107}
108
109define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
110;CHECK-LABEL: v_shuffledup16:
111;CHECK: dup.4h
112 %tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
113 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
114 ret <4 x i16> %tmp2
115}
116
117define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
118;CHECK-LABEL: v_shuffledup32:
119;CHECK: dup.2s
120 %tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
121 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
122 ret <2 x i32> %tmp2
123}
124
125define <2 x float> @v_shuffledupfloat(float %A) nounwind {
126;CHECK-LABEL: v_shuffledupfloat:
127;CHECK: dup.2s
128 %tmp1 = insertelement <2 x float> undef, float %A, i32 0
129 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
130 ret <2 x float> %tmp2
131}
132
133define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
134;CHECK-LABEL: v_shuffledupQ8:
135;CHECK: dup.16b
136 %tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
137 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
138 ret <16 x i8> %tmp2
139}
140
141define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
142;CHECK-LABEL: v_shuffledupQ16:
143;CHECK: dup.8h
144 %tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
145 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
146 ret <8 x i16> %tmp2
147}
148
149define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
150;CHECK-LABEL: v_shuffledupQ32:
151;CHECK: dup.4s
152 %tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
153 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
154 ret <4 x i32> %tmp2
155}
156
157define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
158;CHECK-LABEL: v_shuffledupQfloat:
159;CHECK: dup.4s
160 %tmp1 = insertelement <4 x float> undef, float %A, i32 0
161 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
162 ret <4 x float> %tmp2
163}
164
165define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
166;CHECK-LABEL: vduplane8:
167;CHECK: dup.8b
168 %tmp1 = load <8 x i8>* %A
169 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
170 ret <8 x i8> %tmp2
171}
172
173define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
174;CHECK-LABEL: vduplane16:
175;CHECK: dup.4h
176 %tmp1 = load <4 x i16>* %A
177 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
178 ret <4 x i16> %tmp2
179}
180
181define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
182;CHECK-LABEL: vduplane32:
183;CHECK: dup.2s
184 %tmp1 = load <2 x i32>* %A
185 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
186 ret <2 x i32> %tmp2
187}
188
189define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
190;CHECK-LABEL: vduplanefloat:
191;CHECK: dup.2s
192 %tmp1 = load <2 x float>* %A
193 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
194 ret <2 x float> %tmp2
195}
196
197define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
198;CHECK-LABEL: vduplaneQ8:
199;CHECK: dup.16b
200 %tmp1 = load <8 x i8>* %A
201 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
202 ret <16 x i8> %tmp2
203}
204
205define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
206;CHECK-LABEL: vduplaneQ16:
207;CHECK: dup.8h
208 %tmp1 = load <4 x i16>* %A
209 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
210 ret <8 x i16> %tmp2
211}
212
213define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
214;CHECK-LABEL: vduplaneQ32:
215;CHECK: dup.4s
216 %tmp1 = load <2 x i32>* %A
217 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
218 ret <4 x i32> %tmp2
219}
220
221define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
222;CHECK-LABEL: vduplaneQfloat:
223;CHECK: dup.4s
224 %tmp1 = load <2 x float>* %A
225 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
226 ret <4 x float> %tmp2
227}
228
229define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
230;CHECK-LABEL: foo:
231;CHECK: dup.2d
232entry:
233 %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
234 ret <2 x i64> %0
235}
236
237define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
238;CHECK-LABEL: bar:
239;CHECK: dup.2d
240entry:
241 %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
242 ret <2 x i64> %0
243}
244
245define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
246;CHECK-LABEL: baz:
247;CHECK: dup.2d
248entry:
249 %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
250 ret <2 x double> %0
251}
252
253define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
254;CHECK-LABEL: qux:
255;CHECK: dup.2d
256entry:
257 %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
258 ret <2 x double> %0
259}
260
261define <2 x i32> @f(i32 %a, i32 %b) nounwind readnone {
262; CHECK-LABEL: f:
263; CHECK-NEXT: fmov s0, w0
264; CHECK-NEXT: ins.s v0[1], w1
265; CHECK-NEXT: ret
266 %vecinit = insertelement <2 x i32> undef, i32 %a, i32 0
267 %vecinit1 = insertelement <2 x i32> %vecinit, i32 %b, i32 1
268 ret <2 x i32> %vecinit1
269}
270
271define <4 x i32> @g(i32 %a, i32 %b) nounwind readnone {
272; CHECK-LABEL: g:
273; CHECK-NEXT: fmov s0, w0
274; CHECK-NEXT: ins.s v0[1], w1
275; CHECK-NEXT: ins.s v0[2], w1
276; CHECK-NEXT: ins.s v0[3], w0
277; CHECK-NEXT: ret
278 %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
279 %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
280 %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %b, i32 2
281 %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a, i32 3
282 ret <4 x i32> %vecinit3
283}
284
285define <2 x i64> @h(i64 %a, i64 %b) nounwind readnone {
286; CHECK-LABEL: h:
287; CHECK-NEXT: fmov d0, x0
288; CHECK-NEXT: ins.d v0[1], x1
289; CHECK-NEXT: ret
290 %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
291 %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
292 ret <2 x i64> %vecinit1
293}
294
295; We used to spot this as a BUILD_VECTOR implementable by dup, but assume that
296; the single value needed was of the same type as the vector. This is false if
297; the scalar corresponding to the vector type is illegal (e.g. a <4 x i16>
298; BUILD_VECTOR will have an i32 as its source). In that case, the operation is
299; not a simple "dup vD.4h, vN.h[idx]" after all, and we crashed.
300;
301; *However*, it is a dup vD.4h, vN.h[2*idx].
302define <4 x i16> @test_build_illegal(<4 x i32> %in) {
303; CHECK-LABEL: test_build_illegal:
304; CHECK: dup.4h v0, v0[6]
305 %val = extractelement <4 x i32> %in, i32 3
306 %smallval = trunc i32 %val to i16
307 %vec = insertelement <4x i16> undef, i16 %smallval, i32 3
308
309 ret <4 x i16> %vec
310}
311
312; We used to inherit an already extract_subvectored v4i16 from
313; SelectionDAGBuilder here. We then added a DUPLANE on top of that, preventing
314; the formation of an indexed-by-7 MLS.
315define <4 x i16> @test_high_splat(<4 x i16> %a, <4 x i16> %b, <8 x i16> %v) #0 {
316; CHECK-LABEL: test_high_splat:
317; CHECK: mls.4h v0, v1, v2[7]
318entry:
319 %shuffle = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
320 %mul = mul <4 x i16> %shuffle, %b
321 %sub = sub <4 x i16> %a, %mul
322 ret <4 x i16> %sub
323}