blob: 8ad9e5a11d96ca5a3311455c8a1150027c676d2c [file] [log] [blame]
Chandler Carruth3c7bf042014-10-02 07:22:26 +00001; RUN: llc < %s -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2; RUN: llc < %s -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
3; RUN: llc < %s -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
Chandler Carruth7b270672014-10-02 07:13:25 +00006;
7; Verify that the DAG combiner correctly folds bitwise operations across
8; shuffles, nested shuffles with undef, pairs of nested shuffles, and other
9; basic and always-safe patterns. Also test that the DAG combiner will combine
10; target-specific shuffle instructions where reasonable.
Chandler Carruth0d6d1f22014-06-27 11:34:40 +000011
Chandler Carruth0d6d1f22014-06-27 11:34:40 +000012target triple = "x86_64-unknown-unknown"
13
Chandler Carruth688001f2014-06-27 11:40:13 +000014declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +000015declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
16declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8)
17
Chandler Carruth688001f2014-06-27 11:40:13 +000018define <4 x i32> @combine_pshufd1(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000019; ALL-LABEL: combine_pshufd1:
20; ALL: # BB#0: # %entry
21; ALL-NEXT: retq
22entry:
23 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
24 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000025 ret <4 x i32> %c
26}
27
28define <4 x i32> @combine_pshufd2(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000029; ALL-LABEL: combine_pshufd2:
30; ALL: # BB#0: # %entry
31; ALL-NEXT: retq
32entry:
33 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000034 %b.cast = bitcast <4 x i32> %b to <8 x i16>
35 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 -28)
36 %c.cast = bitcast <8 x i16> %c to <4 x i32>
Chandler Carruth3c7bf042014-10-02 07:22:26 +000037 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000038 ret <4 x i32> %d
39}
40
41define <4 x i32> @combine_pshufd3(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000042; ALL-LABEL: combine_pshufd3:
43; ALL: # BB#0: # %entry
44; ALL-NEXT: retq
45entry:
46 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000047 %b.cast = bitcast <4 x i32> %b to <8 x i16>
48 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 -28)
49 %c.cast = bitcast <8 x i16> %c to <4 x i32>
Chandler Carruth3c7bf042014-10-02 07:22:26 +000050 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000051 ret <4 x i32> %d
52}
53
54define <4 x i32> @combine_pshufd4(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000055; SSE-LABEL: combine_pshufd4:
56; SSE: # BB#0: # %entry
57; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
58; SSE-NEXT: retq
59;
60; AVX-LABEL: combine_pshufd4:
61; AVX: # BB#0: # %entry
62; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
63; AVX-NEXT: retq
64entry:
65 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -31)
Chandler Carruth688001f2014-06-27 11:40:13 +000066 %b.cast = bitcast <4 x i32> %b to <8 x i16>
67 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 27)
68 %c.cast = bitcast <8 x i16> %c to <4 x i32>
Chandler Carruth3c7bf042014-10-02 07:22:26 +000069 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -31)
Chandler Carruth688001f2014-06-27 11:40:13 +000070 ret <4 x i32> %d
71}
72
73define <4 x i32> @combine_pshufd5(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000074; SSE-LABEL: combine_pshufd5:
75; SSE: # BB#0: # %entry
76; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
77; SSE-NEXT: retq
78;
79; AVX-LABEL: combine_pshufd5:
80; AVX: # BB#0: # %entry
81; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
82; AVX-NEXT: retq
83entry:
84 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -76)
Chandler Carruth688001f2014-06-27 11:40:13 +000085 %b.cast = bitcast <4 x i32> %b to <8 x i16>
86 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 27)
87 %c.cast = bitcast <8 x i16> %c to <4 x i32>
88 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -76)
89 ret <4 x i32> %d
90}
91
Benjamin Kramere739cf32014-07-02 15:09:44 +000092define <4 x i32> @combine_pshufd6(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000093; SSE-LABEL: combine_pshufd6:
94; SSE: # BB#0: # %entry
95; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
96; SSE-NEXT: retq
97;
98; AVX-LABEL: combine_pshufd6:
99; AVX: # BB#0: # %entry
100; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
101; AVX-NEXT: retq
102entry:
Benjamin Kramere739cf32014-07-02 15:09:44 +0000103 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0)
104 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8)
105 ret <4 x i32> %c
106}
107
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000108define <8 x i16> @combine_pshuflw1(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000109; ALL-LABEL: combine_pshuflw1:
110; ALL: # BB#0: # %entry
111; ALL-NEXT: retq
112entry:
113 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
114 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000115 ret <8 x i16> %c
116}
117
118define <8 x i16> @combine_pshuflw2(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000119; ALL-LABEL: combine_pshuflw2:
120; ALL: # BB#0: # %entry
121; ALL-NEXT: retq
122entry:
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000123 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000124 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 -28)
125 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000126 ret <8 x i16> %d
127}
128
129define <8 x i16> @combine_pshuflw3(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000130; SSE-LABEL: combine_pshuflw3:
131; SSE: # BB#0: # %entry
132; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
133; SSE-NEXT: retq
134;
135; AVX-LABEL: combine_pshuflw3:
136; AVX: # BB#0: # %entry
137; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
138; AVX-NEXT: retq
139entry:
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000140 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000141 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 27)
142 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000143 ret <8 x i16> %d
144}
145
146define <8 x i16> @combine_pshufhw1(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000147; SSE-LABEL: combine_pshufhw1:
148; SSE: # BB#0: # %entry
149; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
150; SSE-NEXT: retq
151;
152; AVX-LABEL: combine_pshufhw1:
153; AVX: # BB#0: # %entry
154; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
155; AVX-NEXT: retq
156entry:
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000157 %b = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27)
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000158 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27)
159 %d = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %c, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000160 ret <8 x i16> %d
161}
Chandler Carruth21105012014-10-02 07:30:24 +0000162
163define <4 x i32> @combine_bitwise_ops_test1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
164; SSE-LABEL: combine_bitwise_ops_test1:
165; SSE: # BB#0:
166; SSE-NEXT: pand %xmm1, %xmm0
167; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
168; SSE-NEXT: retq
169;
170; AVX-LABEL: combine_bitwise_ops_test1:
171; AVX: # BB#0:
172; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
173; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
174; AVX-NEXT: retq
175 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
176 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
177 %and = and <4 x i32> %shuf1, %shuf2
178 ret <4 x i32> %and
179}
180
181define <4 x i32> @combine_bitwise_ops_test2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
182; SSE-LABEL: combine_bitwise_ops_test2:
183; SSE: # BB#0:
184; SSE-NEXT: por %xmm1, %xmm0
185; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
186; SSE-NEXT: retq
187;
188; AVX-LABEL: combine_bitwise_ops_test2:
189; AVX: # BB#0:
190; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
191; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
192; AVX-NEXT: retq
193 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
194 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
195 %or = or <4 x i32> %shuf1, %shuf2
196 ret <4 x i32> %or
197}
198
199define <4 x i32> @combine_bitwise_ops_test3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
200; SSE-LABEL: combine_bitwise_ops_test3:
201; SSE: # BB#0:
202; SSE-NEXT: pxor %xmm1, %xmm0
203; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
204; SSE-NEXT: retq
205;
206; AVX-LABEL: combine_bitwise_ops_test3:
207; AVX: # BB#0:
208; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
209; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
210; AVX-NEXT: retq
211 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
212 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
213 %xor = xor <4 x i32> %shuf1, %shuf2
214 ret <4 x i32> %xor
215}
216
217define <4 x i32> @combine_bitwise_ops_test4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
218; SSE-LABEL: combine_bitwise_ops_test4:
219; SSE: # BB#0:
220; SSE-NEXT: pand %xmm1, %xmm0
221; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
222; SSE-NEXT: retq
223;
224; AVX-LABEL: combine_bitwise_ops_test4:
225; AVX: # BB#0:
226; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
227; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
228; AVX-NEXT: retq
229 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
230 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
231 %and = and <4 x i32> %shuf1, %shuf2
232 ret <4 x i32> %and
233}
234
235define <4 x i32> @combine_bitwise_ops_test5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
236; SSE-LABEL: combine_bitwise_ops_test5:
237; SSE: # BB#0:
238; SSE-NEXT: por %xmm1, %xmm0
239; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
240; SSE-NEXT: retq
241;
242; AVX-LABEL: combine_bitwise_ops_test5:
243; AVX: # BB#0:
244; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
245; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
246; AVX-NEXT: retq
247 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
248 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
249 %or = or <4 x i32> %shuf1, %shuf2
250 ret <4 x i32> %or
251}
252
253define <4 x i32> @combine_bitwise_ops_test6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
254; SSE-LABEL: combine_bitwise_ops_test6:
255; SSE: # BB#0:
256; SSE-NEXT: pxor %xmm1, %xmm0
257; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
258; SSE-NEXT: retq
259;
260; AVX-LABEL: combine_bitwise_ops_test6:
261; AVX: # BB#0:
262; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
263; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
264; AVX-NEXT: retq
265 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
266 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
267 %xor = xor <4 x i32> %shuf1, %shuf2
268 ret <4 x i32> %xor
269}
270
271
272; Verify that DAGCombiner moves the shuffle after the xor/and/or even if shuffles
273; are not performing a swizzle operations.
274
275define <4 x i32> @combine_bitwise_ops_test1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
276; SSE2-LABEL: combine_bitwise_ops_test1b:
277; SSE2: # BB#0:
278; SSE2-NEXT: andps %xmm1, %xmm0
279; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000280; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth21105012014-10-02 07:30:24 +0000281; SSE2-NEXT: retq
282;
283; SSSE3-LABEL: combine_bitwise_ops_test1b:
284; SSSE3: # BB#0:
285; SSSE3-NEXT: andps %xmm1, %xmm0
286; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000287; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth21105012014-10-02 07:30:24 +0000288; SSSE3-NEXT: retq
289;
290; SSE41-LABEL: combine_bitwise_ops_test1b:
291; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000292; SSE41-NEXT: pand %xmm1, %xmm0
293; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000294; SSE41-NEXT: retq
295;
296; AVX1-LABEL: combine_bitwise_ops_test1b:
297; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000298; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
299; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000300; AVX1-NEXT: retq
301;
302; AVX2-LABEL: combine_bitwise_ops_test1b:
303; AVX2: # BB#0:
304; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
305; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
306; AVX2-NEXT: retq
307 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
308 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
309 %and = and <4 x i32> %shuf1, %shuf2
310 ret <4 x i32> %and
311}
312
313define <4 x i32> @combine_bitwise_ops_test2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
314; SSE2-LABEL: combine_bitwise_ops_test2b:
315; SSE2: # BB#0:
316; SSE2-NEXT: orps %xmm1, %xmm0
317; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000318; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth21105012014-10-02 07:30:24 +0000319; SSE2-NEXT: retq
320;
321; SSSE3-LABEL: combine_bitwise_ops_test2b:
322; SSSE3: # BB#0:
323; SSSE3-NEXT: orps %xmm1, %xmm0
324; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000325; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth21105012014-10-02 07:30:24 +0000326; SSSE3-NEXT: retq
327;
328; SSE41-LABEL: combine_bitwise_ops_test2b:
329; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000330; SSE41-NEXT: por %xmm1, %xmm0
331; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000332; SSE41-NEXT: retq
333;
334; AVX1-LABEL: combine_bitwise_ops_test2b:
335; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000336; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
337; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000338; AVX1-NEXT: retq
339;
340; AVX2-LABEL: combine_bitwise_ops_test2b:
341; AVX2: # BB#0:
342; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
343; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
344; AVX2-NEXT: retq
345 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
346 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
347 %or = or <4 x i32> %shuf1, %shuf2
348 ret <4 x i32> %or
349}
350
351define <4 x i32> @combine_bitwise_ops_test3b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
352; SSE2-LABEL: combine_bitwise_ops_test3b:
353; SSE2: # BB#0:
354; SSE2-NEXT: xorps %xmm1, %xmm0
355; SSE2-NEXT: xorps %xmm1, %xmm1
356; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000357; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth21105012014-10-02 07:30:24 +0000358; SSE2-NEXT: retq
359;
360; SSSE3-LABEL: combine_bitwise_ops_test3b:
361; SSSE3: # BB#0:
362; SSSE3-NEXT: xorps %xmm1, %xmm0
363; SSSE3-NEXT: xorps %xmm1, %xmm1
364; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000365; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth21105012014-10-02 07:30:24 +0000366; SSSE3-NEXT: retq
367;
368; SSE41-LABEL: combine_bitwise_ops_test3b:
369; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000370; SSE41-NEXT: pxor %xmm1, %xmm0
371; SSE41-NEXT: pxor %xmm1, %xmm1
372; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000373; SSE41-NEXT: retq
374;
375; AVX1-LABEL: combine_bitwise_ops_test3b:
376; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000377; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
378; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
379; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000380; AVX1-NEXT: retq
381;
382; AVX2-LABEL: combine_bitwise_ops_test3b:
383; AVX2: # BB#0:
384; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
385; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
386; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
387; AVX2-NEXT: retq
388 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
389 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
390 %xor = xor <4 x i32> %shuf1, %shuf2
391 ret <4 x i32> %xor
392}
393
394define <4 x i32> @combine_bitwise_ops_test4b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
395; SSE2-LABEL: combine_bitwise_ops_test4b:
396; SSE2: # BB#0:
397; SSE2-NEXT: andps %xmm1, %xmm0
398; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
Chandler Carruth0927da42014-10-05 22:57:31 +0000399; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
400; SSE2-NEXT: movaps %xmm2, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000401; SSE2-NEXT: retq
402;
403; SSSE3-LABEL: combine_bitwise_ops_test4b:
404; SSSE3: # BB#0:
405; SSSE3-NEXT: andps %xmm1, %xmm0
406; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
Chandler Carruth0927da42014-10-05 22:57:31 +0000407; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
408; SSSE3-NEXT: movaps %xmm2, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000409; SSSE3-NEXT: retq
410;
411; SSE41-LABEL: combine_bitwise_ops_test4b:
412; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000413; SSE41-NEXT: pand %xmm1, %xmm0
414; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
415; SSE41-NEXT: movdqa %xmm2, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000416; SSE41-NEXT: retq
417;
418; AVX1-LABEL: combine_bitwise_ops_test4b:
419; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000420; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
421; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000422; AVX1-NEXT: retq
423;
424; AVX2-LABEL: combine_bitwise_ops_test4b:
425; AVX2: # BB#0:
426; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
427; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
428; AVX2-NEXT: retq
429 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
430 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
431 %and = and <4 x i32> %shuf1, %shuf2
432 ret <4 x i32> %and
433}
434
435define <4 x i32> @combine_bitwise_ops_test5b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
436; SSE2-LABEL: combine_bitwise_ops_test5b:
437; SSE2: # BB#0:
438; SSE2-NEXT: orps %xmm1, %xmm0
439; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
Chandler Carruth0927da42014-10-05 22:57:31 +0000440; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
441; SSE2-NEXT: movaps %xmm2, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000442; SSE2-NEXT: retq
443;
444; SSSE3-LABEL: combine_bitwise_ops_test5b:
445; SSSE3: # BB#0:
446; SSSE3-NEXT: orps %xmm1, %xmm0
447; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
Chandler Carruth0927da42014-10-05 22:57:31 +0000448; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
449; SSSE3-NEXT: movaps %xmm2, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000450; SSSE3-NEXT: retq
451;
452; SSE41-LABEL: combine_bitwise_ops_test5b:
453; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000454; SSE41-NEXT: por %xmm1, %xmm0
455; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
456; SSE41-NEXT: movdqa %xmm2, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000457; SSE41-NEXT: retq
458;
459; AVX1-LABEL: combine_bitwise_ops_test5b:
460; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000461; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
462; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000463; AVX1-NEXT: retq
464;
465; AVX2-LABEL: combine_bitwise_ops_test5b:
466; AVX2: # BB#0:
467; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
468; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
469; AVX2-NEXT: retq
470 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
471 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
472 %or = or <4 x i32> %shuf1, %shuf2
473 ret <4 x i32> %or
474}
475
476define <4 x i32> @combine_bitwise_ops_test6b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
477; SSE2-LABEL: combine_bitwise_ops_test6b:
478; SSE2: # BB#0:
479; SSE2-NEXT: xorps %xmm1, %xmm0
480; SSE2-NEXT: xorps %xmm1, %xmm1
481; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000482; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
483; SSE2-NEXT: movaps %xmm1, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000484; SSE2-NEXT: retq
485;
486; SSSE3-LABEL: combine_bitwise_ops_test6b:
487; SSSE3: # BB#0:
488; SSSE3-NEXT: xorps %xmm1, %xmm0
489; SSSE3-NEXT: xorps %xmm1, %xmm1
490; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000491; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
492; SSSE3-NEXT: movaps %xmm1, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000493; SSSE3-NEXT: retq
494;
495; SSE41-LABEL: combine_bitwise_ops_test6b:
496; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000497; SSE41-NEXT: pxor %xmm1, %xmm0
498; SSE41-NEXT: pxor %xmm1, %xmm1
499; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
500; SSE41-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000501; SSE41-NEXT: retq
502;
503; AVX1-LABEL: combine_bitwise_ops_test6b:
504; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000505; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
506; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
507; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000508; AVX1-NEXT: retq
509;
510; AVX2-LABEL: combine_bitwise_ops_test6b:
511; AVX2: # BB#0:
512; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
513; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
514; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
515; AVX2-NEXT: retq
516 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
517 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
518 %xor = xor <4 x i32> %shuf1, %shuf2
519 ret <4 x i32> %xor
520}
521
522define <4 x i32> @combine_bitwise_ops_test1c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
523; SSE-LABEL: combine_bitwise_ops_test1c:
524; SSE: # BB#0:
525; SSE-NEXT: andps %xmm1, %xmm0
526; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
527; SSE-NEXT: retq
528;
529; AVX-LABEL: combine_bitwise_ops_test1c:
530; AVX: # BB#0:
531; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
532; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
533; AVX-NEXT: retq
534 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
535 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
536 %and = and <4 x i32> %shuf1, %shuf2
537 ret <4 x i32> %and
538}
539
540define <4 x i32> @combine_bitwise_ops_test2c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
541; SSE-LABEL: combine_bitwise_ops_test2c:
542; SSE: # BB#0:
543; SSE-NEXT: orps %xmm1, %xmm0
544; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
545; SSE-NEXT: retq
546;
547; AVX-LABEL: combine_bitwise_ops_test2c:
548; AVX: # BB#0:
549; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
550; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
551; AVX-NEXT: retq
552 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
553 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
554 %or = or <4 x i32> %shuf1, %shuf2
555 ret <4 x i32> %or
556}
557
558define <4 x i32> @combine_bitwise_ops_test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
559; SSE-LABEL: combine_bitwise_ops_test3c:
560; SSE: # BB#0:
561; SSE-NEXT: xorps %xmm1, %xmm0
562; SSE-NEXT: xorps %xmm1, %xmm1
563; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
564; SSE-NEXT: retq
565;
566; AVX-LABEL: combine_bitwise_ops_test3c:
567; AVX: # BB#0:
568; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
569; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
570; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
571; AVX-NEXT: retq
572 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
573 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
574 %xor = xor <4 x i32> %shuf1, %shuf2
575 ret <4 x i32> %xor
576}
577
578define <4 x i32> @combine_bitwise_ops_test4c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
579; SSE-LABEL: combine_bitwise_ops_test4c:
580; SSE: # BB#0:
581; SSE-NEXT: andps %xmm1, %xmm0
582; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
583; SSE-NEXT: movaps %xmm2, %xmm0
584; SSE-NEXT: retq
585;
586; AVX-LABEL: combine_bitwise_ops_test4c:
587; AVX: # BB#0:
588; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
589; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3]
590; AVX-NEXT: retq
591 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
592 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
593 %and = and <4 x i32> %shuf1, %shuf2
594 ret <4 x i32> %and
595}
596
597define <4 x i32> @combine_bitwise_ops_test5c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
598; SSE-LABEL: combine_bitwise_ops_test5c:
599; SSE: # BB#0:
600; SSE-NEXT: orps %xmm1, %xmm0
601; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
602; SSE-NEXT: movaps %xmm2, %xmm0
603; SSE-NEXT: retq
604;
605; AVX-LABEL: combine_bitwise_ops_test5c:
606; AVX: # BB#0:
607; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
608; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3]
609; AVX-NEXT: retq
610 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
611 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
612 %or = or <4 x i32> %shuf1, %shuf2
613 ret <4 x i32> %or
614}
615
616define <4 x i32> @combine_bitwise_ops_test6c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
617; SSE-LABEL: combine_bitwise_ops_test6c:
618; SSE: # BB#0:
619; SSE-NEXT: xorps %xmm1, %xmm0
620; SSE-NEXT: xorps %xmm1, %xmm1
621; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
622; SSE-NEXT: movaps %xmm1, %xmm0
623; SSE-NEXT: retq
624;
625; AVX-LABEL: combine_bitwise_ops_test6c:
626; AVX: # BB#0:
627; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
628; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
629; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[1,3]
630; AVX-NEXT: retq
631 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
632 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
633 %xor = xor <4 x i32> %shuf1, %shuf2
634 ret <4 x i32> %xor
635}
Chandler Carruthb2941e22014-10-02 07:42:58 +0000636
637define <4 x i32> @combine_nested_undef_test1(<4 x i32> %A, <4 x i32> %B) {
638; SSE-LABEL: combine_nested_undef_test1:
639; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000640; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000641; SSE-NEXT: retq
642;
643; AVX-LABEL: combine_nested_undef_test1:
644; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000645; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000646; AVX-NEXT: retq
647 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
648 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
649 ret <4 x i32> %2
650}
651
652define <4 x i32> @combine_nested_undef_test2(<4 x i32> %A, <4 x i32> %B) {
653; SSE-LABEL: combine_nested_undef_test2:
654; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000655; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000656; SSE-NEXT: retq
657;
658; AVX-LABEL: combine_nested_undef_test2:
659; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000660; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000661; AVX-NEXT: retq
662 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
663 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
664 ret <4 x i32> %2
665}
666
667define <4 x i32> @combine_nested_undef_test3(<4 x i32> %A, <4 x i32> %B) {
668; SSE-LABEL: combine_nested_undef_test3:
669; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000670; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000671; SSE-NEXT: retq
672;
673; AVX-LABEL: combine_nested_undef_test3:
674; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000675; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000676; AVX-NEXT: retq
677 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
678 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
679 ret <4 x i32> %2
680}
681
682define <4 x i32> @combine_nested_undef_test4(<4 x i32> %A, <4 x i32> %B) {
683; SSE-LABEL: combine_nested_undef_test4:
684; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000685; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000686; SSE-NEXT: retq
687;
Chandler Carruth99627bf2014-10-04 03:52:55 +0000688; AVX1-LABEL: combine_nested_undef_test4:
689; AVX1: # BB#0:
690; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
691; AVX1-NEXT: retq
692;
693; AVX2-LABEL: combine_nested_undef_test4:
694; AVX2: # BB#0:
695; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
696; AVX2-NEXT: retq
Chandler Carruthb2941e22014-10-02 07:42:58 +0000697 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 7, i32 1>
698 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 3>
699 ret <4 x i32> %2
700}
701
702define <4 x i32> @combine_nested_undef_test5(<4 x i32> %A, <4 x i32> %B) {
703; SSE-LABEL: combine_nested_undef_test5:
704; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000705; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000706; SSE-NEXT: retq
707;
708; AVX-LABEL: combine_nested_undef_test5:
709; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000710; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000711; AVX-NEXT: retq
712 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
713 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 4, i32 3>
714 ret <4 x i32> %2
715}
716
717define <4 x i32> @combine_nested_undef_test6(<4 x i32> %A, <4 x i32> %B) {
718; SSE-LABEL: combine_nested_undef_test6:
719; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000720; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000721; SSE-NEXT: retq
722;
723; AVX-LABEL: combine_nested_undef_test6:
724; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000725; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000726; AVX-NEXT: retq
727 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
728 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 4>
729 ret <4 x i32> %2
730}
731
732define <4 x i32> @combine_nested_undef_test7(<4 x i32> %A, <4 x i32> %B) {
733; SSE-LABEL: combine_nested_undef_test7:
734; SSE: # BB#0:
735; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
736; SSE-NEXT: retq
737;
738; AVX-LABEL: combine_nested_undef_test7:
739; AVX: # BB#0:
740; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
741; AVX-NEXT: retq
742 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
743 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
744 ret <4 x i32> %2
745}
746
747define <4 x i32> @combine_nested_undef_test8(<4 x i32> %A, <4 x i32> %B) {
748; SSE-LABEL: combine_nested_undef_test8:
749; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000750; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000751; SSE-NEXT: retq
752;
753; AVX-LABEL: combine_nested_undef_test8:
754; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000755; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000756; AVX-NEXT: retq
757 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
758 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
759 ret <4 x i32> %2
760}
761
762define <4 x i32> @combine_nested_undef_test9(<4 x i32> %A, <4 x i32> %B) {
763; SSE-LABEL: combine_nested_undef_test9:
764; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000765; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,2]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000766; SSE-NEXT: retq
767;
768; AVX-LABEL: combine_nested_undef_test9:
769; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000770; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,2]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000771; AVX-NEXT: retq
772 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 2, i32 5>
773 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
774 ret <4 x i32> %2
775}
776
777define <4 x i32> @combine_nested_undef_test10(<4 x i32> %A, <4 x i32> %B) {
778; SSE-LABEL: combine_nested_undef_test10:
779; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000780; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000781; SSE-NEXT: retq
782;
783; AVX-LABEL: combine_nested_undef_test10:
784; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000785; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000786; AVX-NEXT: retq
787 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
788 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
789 ret <4 x i32> %2
790}
791
792define <4 x i32> @combine_nested_undef_test11(<4 x i32> %A, <4 x i32> %B) {
793; SSE-LABEL: combine_nested_undef_test11:
794; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000795; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000796; SSE-NEXT: retq
797;
798; AVX-LABEL: combine_nested_undef_test11:
799; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000800; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000801; AVX-NEXT: retq
802 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 2, i32 5, i32 4>
803 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 0>
804 ret <4 x i32> %2
805}
806
807define <4 x i32> @combine_nested_undef_test12(<4 x i32> %A, <4 x i32> %B) {
808; SSE-LABEL: combine_nested_undef_test12:
809; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000810; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000811; SSE-NEXT: retq
812;
813; AVX1-LABEL: combine_nested_undef_test12:
814; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000815; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000816; AVX1-NEXT: retq
817;
818; AVX2-LABEL: combine_nested_undef_test12:
819; AVX2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000820; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
Chandler Carruthb2941e22014-10-02 07:42:58 +0000821; AVX2-NEXT: retq
822 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 0, i32 2, i32 4>
823 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 0, i32 4>
824 ret <4 x i32> %2
825}
826
827; The following pair of shuffles is folded into vector %A.
828define <4 x i32> @combine_nested_undef_test13(<4 x i32> %A, <4 x i32> %B) {
829; ALL-LABEL: combine_nested_undef_test13:
830; ALL: # BB#0:
831; ALL-NEXT: retq
832 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 4, i32 2, i32 6>
833 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 0, i32 2, i32 4>
834 ret <4 x i32> %2
835}
836
837; The following pair of shuffles is folded into vector %B.
838define <4 x i32> @combine_nested_undef_test14(<4 x i32> %A, <4 x i32> %B) {
839; SSE-LABEL: combine_nested_undef_test14:
840; SSE: # BB#0:
841; SSE-NEXT: movaps %xmm1, %xmm0
842; SSE-NEXT: retq
843;
844; AVX-LABEL: combine_nested_undef_test14:
845; AVX: # BB#0:
846; AVX-NEXT: vmovaps %xmm1, %xmm0
847; AVX-NEXT: retq
848 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
849 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 1, i32 4>
850 ret <4 x i32> %2
851}
852
853
854; Verify that we don't optimize the following cases. We expect more than one shuffle.
855;
856; FIXME: Many of these already don't make sense, and the rest should stop
857; making sense with th enew vector shuffle lowering. Revisit at least testing for
858; it.
859
860define <4 x i32> @combine_nested_undef_test15(<4 x i32> %A, <4 x i32> %B) {
861; SSE-LABEL: combine_nested_undef_test15:
862; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000863; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000864; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,1]
865; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,1,0,3]
866; SSE-NEXT: retq
867;
868; AVX-LABEL: combine_nested_undef_test15:
869; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000870; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000871; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[3,1]
872; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
873; AVX-NEXT: retq
874 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
875 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
876 ret <4 x i32> %2
877}
878
879define <4 x i32> @combine_nested_undef_test16(<4 x i32> %A, <4 x i32> %B) {
880; SSE2-LABEL: combine_nested_undef_test16:
881; SSE2: # BB#0:
882; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000883; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
884; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000885; SSE2-NEXT: retq
886;
887; SSSE3-LABEL: combine_nested_undef_test16:
888; SSSE3: # BB#0:
889; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000890; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
891; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000892; SSSE3-NEXT: retq
893;
894; SSE41-LABEL: combine_nested_undef_test16:
895; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000896; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000897; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
898; SSE41-NEXT: retq
899;
900; AVX1-LABEL: combine_nested_undef_test16:
901; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000902; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000903; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
904; AVX1-NEXT: retq
905;
906; AVX2-LABEL: combine_nested_undef_test16:
907; AVX2: # BB#0:
908; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
909; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
910; AVX2-NEXT: retq
911 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
912 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
913 ret <4 x i32> %2
914}
915
916define <4 x i32> @combine_nested_undef_test17(<4 x i32> %A, <4 x i32> %B) {
917; SSE-LABEL: combine_nested_undef_test17:
918; SSE: # BB#0:
919; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
920; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[3,1]
921; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,1,0,3]
922; SSE-NEXT: retq
923;
924; AVX-LABEL: combine_nested_undef_test17:
925; AVX: # BB#0:
926; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
927; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[3,1]
928; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
929; AVX-NEXT: retq
930 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
931 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
932 ret <4 x i32> %2
933}
934
935define <4 x i32> @combine_nested_undef_test18(<4 x i32> %A, <4 x i32> %B) {
936; SSE-LABEL: combine_nested_undef_test18:
937; SSE: # BB#0:
938; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,3]
939; SSE-NEXT: retq
940;
941; AVX-LABEL: combine_nested_undef_test18:
942; AVX: # BB#0:
943; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,0,3]
944; AVX-NEXT: retq
945 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
946 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
947 ret <4 x i32> %2
948}
949
950define <4 x i32> @combine_nested_undef_test19(<4 x i32> %A, <4 x i32> %B) {
951; SSE-LABEL: combine_nested_undef_test19:
952; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000953; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000954; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
955; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,0,0]
956; SSE-NEXT: retq
957;
958; AVX-LABEL: combine_nested_undef_test19:
959; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000960; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000961; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
962; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,0,0,0]
963; AVX-NEXT: retq
964 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
965 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
966 ret <4 x i32> %2
967}
968
969define <4 x i32> @combine_nested_undef_test20(<4 x i32> %A, <4 x i32> %B) {
970; SSE-LABEL: combine_nested_undef_test20:
971; SSE: # BB#0:
972; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2],xmm1[0,0]
973; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
974; SSE-NEXT: retq
975;
976; AVX-LABEL: combine_nested_undef_test20:
977; AVX: # BB#0:
978; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2],xmm1[0,0]
979; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
980; AVX-NEXT: retq
981 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 3, i32 2, i32 4, i32 4>
982 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
983 ret <4 x i32> %2
984}
985
986define <4 x i32> @combine_nested_undef_test21(<4 x i32> %A, <4 x i32> %B) {
987; SSE-LABEL: combine_nested_undef_test21:
988; SSE: # BB#0:
989; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
990; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[3,1]
991; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
992; SSE-NEXT: retq
993;
994; AVX-LABEL: combine_nested_undef_test21:
995; AVX: # BB#0:
996; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
997; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[3,1]
998; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
999; AVX-NEXT: retq
1000 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
1001 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
1002 ret <4 x i32> %2
1003}
1004
1005
1006; Test that we correctly combine shuffles according to rule
1007; shuffle(shuffle(x, y), undef) -> shuffle(y, undef)
1008
1009define <4 x i32> @combine_nested_undef_test22(<4 x i32> %A, <4 x i32> %B) {
1010; SSE-LABEL: combine_nested_undef_test22:
1011; SSE: # BB#0:
1012; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,3]
1013; SSE-NEXT: retq
1014;
1015; AVX-LABEL: combine_nested_undef_test22:
1016; AVX: # BB#0:
1017; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,1,3]
1018; AVX-NEXT: retq
1019 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
1020 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 3>
1021 ret <4 x i32> %2
1022}
1023
1024define <4 x i32> @combine_nested_undef_test23(<4 x i32> %A, <4 x i32> %B) {
1025; SSE-LABEL: combine_nested_undef_test23:
1026; SSE: # BB#0:
1027; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
1028; SSE-NEXT: retq
1029;
1030; AVX-LABEL: combine_nested_undef_test23:
1031; AVX: # BB#0:
1032; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
1033; AVX-NEXT: retq
1034 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
1035 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
1036 ret <4 x i32> %2
1037}
1038
1039define <4 x i32> @combine_nested_undef_test24(<4 x i32> %A, <4 x i32> %B) {
1040; SSE-LABEL: combine_nested_undef_test24:
1041; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001042; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +00001043; SSE-NEXT: retq
1044;
1045; AVX-LABEL: combine_nested_undef_test24:
1046; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001047; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,3,2,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +00001048; AVX-NEXT: retq
1049 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
1050 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 4>
1051 ret <4 x i32> %2
1052}
1053
1054define <4 x i32> @combine_nested_undef_test25(<4 x i32> %A, <4 x i32> %B) {
1055; SSE-LABEL: combine_nested_undef_test25:
1056; SSE: # BB#0:
1057; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1058; SSE-NEXT: retq
1059;
Chandler Carruth99627bf2014-10-04 03:52:55 +00001060; AVX1-LABEL: combine_nested_undef_test25:
1061; AVX1: # BB#0:
1062; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1063; AVX1-NEXT: retq
1064;
1065; AVX2-LABEL: combine_nested_undef_test25:
1066; AVX2: # BB#0:
1067; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1068; AVX2-NEXT: retq
Chandler Carruthb2941e22014-10-02 07:42:58 +00001069 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 5, i32 2, i32 4>
1070 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 3, i32 1>
1071 ret <4 x i32> %2
1072}
1073
1074define <4 x i32> @combine_nested_undef_test26(<4 x i32> %A, <4 x i32> %B) {
1075; SSE-LABEL: combine_nested_undef_test26:
1076; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001077; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +00001078; SSE-NEXT: retq
1079;
1080; AVX-LABEL: combine_nested_undef_test26:
1081; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001082; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +00001083; AVX-NEXT: retq
1084 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 6, i32 7>
1085 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
1086 ret <4 x i32> %2
1087}
1088
1089define <4 x i32> @combine_nested_undef_test27(<4 x i32> %A, <4 x i32> %B) {
1090; SSE-LABEL: combine_nested_undef_test27:
1091; SSE: # BB#0:
1092; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1093; SSE-NEXT: retq
1094;
Chandler Carruth99627bf2014-10-04 03:52:55 +00001095; AVX1-LABEL: combine_nested_undef_test27:
1096; AVX1: # BB#0:
1097; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1098; AVX1-NEXT: retq
1099;
1100; AVX2-LABEL: combine_nested_undef_test27:
1101; AVX2: # BB#0:
1102; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1103; AVX2-NEXT: retq
Chandler Carruthb2941e22014-10-02 07:42:58 +00001104 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 2, i32 1, i32 5, i32 4>
1105 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
1106 ret <4 x i32> %2
1107}
1108
1109define <4 x i32> @combine_nested_undef_test28(<4 x i32> %A, <4 x i32> %B) {
1110; SSE-LABEL: combine_nested_undef_test28:
1111; SSE: # BB#0:
1112; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
1113; SSE-NEXT: retq
1114;
1115; AVX-LABEL: combine_nested_undef_test28:
1116; AVX: # BB#0:
1117; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
1118; AVX-NEXT: retq
1119 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
1120 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 2>
1121 ret <4 x i32> %2
1122}
Chandler Carruth782b0a72014-10-02 07:56:47 +00001123
1124define <4 x float> @combine_test1(<4 x float> %a, <4 x float> %b) {
1125; SSE2-LABEL: combine_test1:
1126; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001127; SSE2-NEXT: movaps %xmm1, %xmm2
1128; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1129; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1130; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,3]
Chandler Carruth0927da42014-10-05 22:57:31 +00001131; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1132; SSE2-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001133; SSE2-NEXT: retq
1134;
1135; SSSE3-LABEL: combine_test1:
1136; SSSE3: # BB#0:
1137; SSSE3-NEXT: movaps %xmm1, %xmm0
1138; SSSE3-NEXT: retq
1139;
1140; SSE41-LABEL: combine_test1:
1141; SSE41: # BB#0:
1142; SSE41-NEXT: movaps %xmm1, %xmm0
1143; SSE41-NEXT: retq
1144;
1145; AVX-LABEL: combine_test1:
1146; AVX: # BB#0:
1147; AVX-NEXT: vmovaps %xmm1, %xmm0
1148; AVX-NEXT: retq
1149 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1150 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1151 ret <4 x float> %2
1152}
1153
1154define <4 x float> @combine_test2(<4 x float> %a, <4 x float> %b) {
1155; SSE2-LABEL: combine_test2:
1156; SSE2: # BB#0:
1157; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001158; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001159; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
1160; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
1161; SSE2-NEXT: retq
1162;
1163; SSSE3-LABEL: combine_test2:
1164; SSSE3: # BB#0:
1165; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001166; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001167; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
1168; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
1169; SSSE3-NEXT: retq
1170;
1171; SSE41-LABEL: combine_test2:
1172; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001173; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001174; SSE41-NEXT: movaps %xmm1, %xmm0
1175; SSE41-NEXT: retq
1176;
1177; AVX-LABEL: combine_test2:
1178; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001179; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001180; AVX-NEXT: retq
1181 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1182 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1183 ret <4 x float> %2
1184}
1185
1186define <4 x float> @combine_test3(<4 x float> %a, <4 x float> %b) {
1187; SSE-LABEL: combine_test3:
1188; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001189; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001190; SSE-NEXT: retq
1191;
1192; AVX-LABEL: combine_test3:
1193; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001194; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001195; AVX-NEXT: retq
1196 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
1197 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
1198 ret <4 x float> %2
1199}
1200
1201define <4 x float> @combine_test4(<4 x float> %a, <4 x float> %b) {
1202; SSE-LABEL: combine_test4:
1203; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001204; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
1205; SSE-NEXT: movapd %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001206; SSE-NEXT: retq
1207;
1208; AVX-LABEL: combine_test4:
1209; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001210; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001211; AVX-NEXT: retq
1212 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
1213 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1214 ret <4 x float> %2
1215}
1216
1217define <4 x float> @combine_test5(<4 x float> %a, <4 x float> %b) {
1218; SSE2-LABEL: combine_test5:
1219; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001220; SSE2-NEXT: movaps %xmm1, %xmm2
1221; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1222; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1223; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
1224; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1225; SSE2-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001226; SSE2-NEXT: retq
1227;
1228; SSSE3-LABEL: combine_test5:
1229; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001230; SSSE3-NEXT: movaps %xmm1, %xmm2
1231; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1232; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1233; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
1234; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1235; SSSE3-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001236; SSSE3-NEXT: retq
1237;
1238; SSE41-LABEL: combine_test5:
1239; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001240; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1],xmm1[2,3]
1241; SSE41-NEXT: movaps %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001242; SSE41-NEXT: retq
1243;
1244; AVX-LABEL: combine_test5:
1245; AVX: # BB#0:
1246; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1247; AVX-NEXT: retq
1248 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1249 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
1250 ret <4 x float> %2
1251}
1252
1253define <4 x i32> @combine_test6(<4 x i32> %a, <4 x i32> %b) {
1254; SSE2-LABEL: combine_test6:
1255; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001256; SSE2-NEXT: movaps %xmm1, %xmm2
1257; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1258; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1259; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,3]
Chandler Carruth0927da42014-10-05 22:57:31 +00001260; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1261; SSE2-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001262; SSE2-NEXT: retq
1263;
1264; SSSE3-LABEL: combine_test6:
1265; SSSE3: # BB#0:
1266; SSSE3-NEXT: movaps %xmm1, %xmm0
1267; SSSE3-NEXT: retq
1268;
1269; SSE41-LABEL: combine_test6:
1270; SSE41: # BB#0:
1271; SSE41-NEXT: movaps %xmm1, %xmm0
1272; SSE41-NEXT: retq
1273;
1274; AVX-LABEL: combine_test6:
1275; AVX: # BB#0:
1276; AVX-NEXT: vmovaps %xmm1, %xmm0
1277; AVX-NEXT: retq
1278 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1279 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1280 ret <4 x i32> %2
1281}
1282
1283define <4 x i32> @combine_test7(<4 x i32> %a, <4 x i32> %b) {
1284; SSE2-LABEL: combine_test7:
1285; SSE2: # BB#0:
1286; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001287; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001288; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
1289; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
1290; SSE2-NEXT: retq
1291;
1292; SSSE3-LABEL: combine_test7:
1293; SSSE3: # BB#0:
1294; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001295; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001296; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
1297; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
1298; SSSE3-NEXT: retq
1299;
1300; SSE41-LABEL: combine_test7:
1301; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001302; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1303; SSE41-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001304; SSE41-NEXT: retq
1305;
Chandler Carruth99627bf2014-10-04 03:52:55 +00001306; AVX1-LABEL: combine_test7:
1307; AVX1: # BB#0:
1308; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1309; AVX1-NEXT: retq
1310;
1311; AVX2-LABEL: combine_test7:
1312; AVX2: # BB#0:
1313; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1314; AVX2-NEXT: retq
Chandler Carruth782b0a72014-10-02 07:56:47 +00001315 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1316 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1317 ret <4 x i32> %2
1318}
1319
1320define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) {
1321; SSE-LABEL: combine_test8:
1322; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001323; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001324; SSE-NEXT: retq
1325;
1326; AVX-LABEL: combine_test8:
1327; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001328; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001329; AVX-NEXT: retq
1330 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
1331 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
1332 ret <4 x i32> %2
1333}
1334
1335define <4 x i32> @combine_test9(<4 x i32> %a, <4 x i32> %b) {
1336; SSE-LABEL: combine_test9:
1337; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001338; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
1339; SSE-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001340; SSE-NEXT: retq
1341;
1342; AVX-LABEL: combine_test9:
1343; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001344; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001345; AVX-NEXT: retq
1346 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
1347 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1348 ret <4 x i32> %2
1349}
1350
1351define <4 x i32> @combine_test10(<4 x i32> %a, <4 x i32> %b) {
1352; SSE2-LABEL: combine_test10:
1353; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001354; SSE2-NEXT: movaps %xmm1, %xmm2
1355; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1356; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1357; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
1358; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1359; SSE2-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001360; SSE2-NEXT: retq
1361;
1362; SSSE3-LABEL: combine_test10:
1363; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001364; SSSE3-NEXT: movaps %xmm1, %xmm2
1365; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1366; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1367; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
1368; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1369; SSSE3-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001370; SSSE3-NEXT: retq
1371;
1372; SSE41-LABEL: combine_test10:
1373; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001374; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1375; SSE41-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001376; SSE41-NEXT: retq
1377;
1378; AVX1-LABEL: combine_test10:
1379; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001380; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001381; AVX1-NEXT: retq
1382;
1383; AVX2-LABEL: combine_test10:
1384; AVX2: # BB#0:
1385; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1386; AVX2-NEXT: retq
1387 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1388 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
1389 ret <4 x i32> %2
1390}
1391
1392define <4 x float> @combine_test11(<4 x float> %a, <4 x float> %b) {
1393; ALL-LABEL: combine_test11:
1394; ALL: # BB#0:
1395; ALL-NEXT: retq
1396 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1397 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1398 ret <4 x float> %2
1399}
1400
1401define <4 x float> @combine_test12(<4 x float> %a, <4 x float> %b) {
1402; SSE2-LABEL: combine_test12:
1403; SSE2: # BB#0:
1404; SSE2-NEXT: movss %xmm0, %xmm1
1405; SSE2-NEXT: movss %xmm0, %xmm1
1406; SSE2-NEXT: movaps %xmm1, %xmm0
1407; SSE2-NEXT: retq
1408;
1409; SSSE3-LABEL: combine_test12:
1410; SSSE3: # BB#0:
1411; SSSE3-NEXT: movss %xmm0, %xmm1
1412; SSSE3-NEXT: movss %xmm0, %xmm1
1413; SSSE3-NEXT: movaps %xmm1, %xmm0
1414; SSSE3-NEXT: retq
1415;
1416; SSE41-LABEL: combine_test12:
1417; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001418; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001419; SSE41-NEXT: movaps %xmm1, %xmm0
1420; SSE41-NEXT: retq
1421;
1422; AVX-LABEL: combine_test12:
1423; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001424; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001425; AVX-NEXT: retq
1426 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1427 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1428 ret <4 x float> %2
1429}
1430
1431define <4 x float> @combine_test13(<4 x float> %a, <4 x float> %b) {
1432; SSE-LABEL: combine_test13:
1433; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001434; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001435; SSE-NEXT: retq
1436;
1437; AVX-LABEL: combine_test13:
1438; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001439; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001440; AVX-NEXT: retq
1441 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1442 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1443 ret <4 x float> %2
1444}
1445
1446define <4 x float> @combine_test14(<4 x float> %a, <4 x float> %b) {
1447; SSE-LABEL: combine_test14:
1448; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001449; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001450; SSE-NEXT: retq
1451;
1452; AVX-LABEL: combine_test14:
1453; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001454; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001455; AVX-NEXT: retq
1456 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
1457 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1458 ret <4 x float> %2
1459}
1460
1461define <4 x float> @combine_test15(<4 x float> %a, <4 x float> %b) {
1462; SSE2-LABEL: combine_test15:
1463; SSE2: # BB#0:
1464; SSE2-NEXT: movaps %xmm0, %xmm2
1465; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
1466; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
1467; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
1468; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
1469; SSE2-NEXT: retq
1470;
1471; SSSE3-LABEL: combine_test15:
1472; SSSE3: # BB#0:
1473; SSSE3-NEXT: movaps %xmm0, %xmm2
1474; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
1475; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
1476; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
1477; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
1478; SSSE3-NEXT: retq
1479;
1480; SSE41-LABEL: combine_test15:
1481; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001482; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1],xmm1[2,3]
1483; SSE41-NEXT: movaps %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001484; SSE41-NEXT: retq
1485;
1486; AVX-LABEL: combine_test15:
1487; AVX: # BB#0:
1488; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1489; AVX-NEXT: retq
1490 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
1491 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
1492 ret <4 x float> %2
1493}
1494
1495define <4 x i32> @combine_test16(<4 x i32> %a, <4 x i32> %b) {
1496; ALL-LABEL: combine_test16:
1497; ALL: # BB#0:
1498; ALL-NEXT: retq
1499 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1500 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1501 ret <4 x i32> %2
1502}
1503
1504define <4 x i32> @combine_test17(<4 x i32> %a, <4 x i32> %b) {
1505; SSE2-LABEL: combine_test17:
1506; SSE2: # BB#0:
1507; SSE2-NEXT: movss %xmm0, %xmm1
1508; SSE2-NEXT: movss %xmm0, %xmm1
1509; SSE2-NEXT: movaps %xmm1, %xmm0
1510; SSE2-NEXT: retq
1511;
1512; SSSE3-LABEL: combine_test17:
1513; SSSE3: # BB#0:
1514; SSSE3-NEXT: movss %xmm0, %xmm1
1515; SSSE3-NEXT: movss %xmm0, %xmm1
1516; SSSE3-NEXT: movaps %xmm1, %xmm0
1517; SSSE3-NEXT: retq
1518;
1519; SSE41-LABEL: combine_test17:
1520; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001521; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1522; SSE41-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001523; SSE41-NEXT: retq
1524;
Chandler Carruth99627bf2014-10-04 03:52:55 +00001525; AVX1-LABEL: combine_test17:
1526; AVX1: # BB#0:
1527; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1528; AVX1-NEXT: retq
1529;
1530; AVX2-LABEL: combine_test17:
1531; AVX2: # BB#0:
1532; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1533; AVX2-NEXT: retq
Chandler Carruth782b0a72014-10-02 07:56:47 +00001534 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1535 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1536 ret <4 x i32> %2
1537}
1538
1539define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) {
1540; SSE-LABEL: combine_test18:
1541; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001542; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001543; SSE-NEXT: retq
1544;
1545; AVX-LABEL: combine_test18:
1546; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001547; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001548; AVX-NEXT: retq
1549 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1550 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1551 ret <4 x i32> %2
1552}
1553
1554define <4 x i32> @combine_test19(<4 x i32> %a, <4 x i32> %b) {
1555; SSE-LABEL: combine_test19:
1556; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001557; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001558; SSE-NEXT: retq
1559;
1560; AVX-LABEL: combine_test19:
1561; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001562; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001563; AVX-NEXT: retq
1564 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
1565 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1566 ret <4 x i32> %2
1567}
1568
1569define <4 x i32> @combine_test20(<4 x i32> %a, <4 x i32> %b) {
1570; SSE2-LABEL: combine_test20:
1571; SSE2: # BB#0:
1572; SSE2-NEXT: movaps %xmm0, %xmm2
1573; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
1574; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
1575; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
1576; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
1577; SSE2-NEXT: retq
1578;
1579; SSSE3-LABEL: combine_test20:
1580; SSSE3: # BB#0:
1581; SSSE3-NEXT: movaps %xmm0, %xmm2
1582; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
1583; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
1584; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
1585; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
1586; SSSE3-NEXT: retq
1587;
1588; SSE41-LABEL: combine_test20:
1589; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001590; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1591; SSE41-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001592; SSE41-NEXT: retq
1593;
1594; AVX1-LABEL: combine_test20:
1595; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001596; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001597; AVX1-NEXT: retq
1598;
1599; AVX2-LABEL: combine_test20:
1600; AVX2: # BB#0:
1601; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1602; AVX2-NEXT: retq
1603 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
1604 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
1605 ret <4 x i32> %2
1606}
1607
1608
1609; Check some negative cases.
1610; FIXME: Do any of these really make sense? Are they redundant with the above tests?
1611
1612define <4 x float> @combine_test1b(<4 x float> %a, <4 x float> %b) {
1613; SSE2-LABEL: combine_test1b:
1614; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001615; SSE2-NEXT: movaps %xmm1, %xmm2
1616; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1617; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1618; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[0,0]
1619; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001620; SSE2-NEXT: movaps %xmm1, %xmm0
1621; SSE2-NEXT: retq
1622;
1623; SSSE3-LABEL: combine_test1b:
1624; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001625; SSSE3-NEXT: movaps %xmm1, %xmm2
1626; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1627; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1628; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[0,0]
1629; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001630; SSSE3-NEXT: movaps %xmm1, %xmm0
1631; SSSE3-NEXT: retq
1632;
1633; SSE41-LABEL: combine_test1b:
1634; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001635; SSE41-NEXT: movaps %xmm1, %xmm2
1636; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
1637; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[0,0]
1638; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001639; SSE41-NEXT: movaps %xmm1, %xmm0
1640; SSE41-NEXT: retq
1641;
1642; AVX-LABEL: combine_test1b:
1643; AVX: # BB#0:
1644; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
1645; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1646; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[2,0]
1647; AVX-NEXT: retq
1648 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1649 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 0>
1650 ret <4 x float> %2
1651}
1652
1653define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) {
1654; SSE2-LABEL: combine_test2b:
1655; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001656; SSE2-NEXT: movaps %xmm1, %xmm2
1657; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1658; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1659; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
Chandler Carruth0927da42014-10-05 22:57:31 +00001660; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1661; SSE2-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001662; SSE2-NEXT: retq
1663;
1664; SSSE3-LABEL: combine_test2b:
1665; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001666; SSSE3-NEXT: movaps %xmm1, %xmm2
1667; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1668; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1669; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
Chandler Carruth0927da42014-10-05 22:57:31 +00001670; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1671; SSSE3-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001672; SSSE3-NEXT: retq
1673;
1674; SSE41-LABEL: combine_test2b:
1675; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001676; SSE41-NEXT: movaps %xmm1, %xmm2
1677; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
1678; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
Chandler Carruth0927da42014-10-05 22:57:31 +00001679; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1680; SSE41-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001681; SSE41-NEXT: retq
1682;
1683; AVX-LABEL: combine_test2b:
1684; AVX: # BB#0:
1685; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
1686; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,1]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001687; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001688; AVX-NEXT: retq
1689 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1690 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5>
1691 ret <4 x float> %2
1692}
1693
1694define <4 x float> @combine_test3b(<4 x float> %a, <4 x float> %b) {
1695; SSE-LABEL: combine_test3b:
1696; SSE: # BB#0:
1697; SSE-NEXT: movaps %xmm1, %xmm2
1698; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[3,0]
1699; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm2[0,2]
1700; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[3,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001701; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001702; SSE-NEXT: retq
1703;
1704; AVX-LABEL: combine_test3b:
1705; AVX: # BB#0:
1706; AVX-NEXT: vshufps {{.*#+}} xmm2 = xmm1[2,0],xmm0[3,0]
1707; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm2[0,2]
1708; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[3,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001709; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001710; AVX-NEXT: retq
1711 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 3>
1712 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 7>
1713 ret <4 x float> %2
1714}
1715
1716define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) {
1717; SSE2-LABEL: combine_test4b:
1718; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001719; SSE2-NEXT: movaps %xmm1, %xmm2
1720; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1721; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1722; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[3,0]
1723; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[0,2]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001724; SSE2-NEXT: movaps %xmm1, %xmm0
1725; SSE2-NEXT: retq
1726;
1727; SSSE3-LABEL: combine_test4b:
1728; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001729; SSSE3-NEXT: movaps %xmm1, %xmm2
1730; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1731; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1732; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[3,0]
1733; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[0,2]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001734; SSSE3-NEXT: movaps %xmm1, %xmm0
1735; SSSE3-NEXT: retq
1736;
1737; SSE41-LABEL: combine_test4b:
1738; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001739; SSE41-NEXT: movaps %xmm1, %xmm2
1740; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
1741; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[3,0]
1742; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[0,2]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001743; SSE41-NEXT: movaps %xmm1, %xmm0
1744; SSE41-NEXT: retq
1745;
1746; AVX-LABEL: combine_test4b:
1747; AVX: # BB#0:
1748; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
1749; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
1750; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[1,1],xmm0[0,2]
1751; AVX-NEXT: retq
1752 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1753 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 5, i32 5, i32 2, i32 7>
1754 ret <4 x float> %2
1755}
1756
1757
1758; Verify that we correctly fold shuffles even when we use illegal vector types.
1759
1760define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) {
1761; SSE2-LABEL: combine_test1c:
1762; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001763; SSE2-NEXT: movd (%rdi), %xmm0
1764; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1765; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1766; SSE2-NEXT: movd (%rsi), %xmm1
1767; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1768; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001769; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001770; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001771; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
1772; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
1773; SSE2-NEXT: retq
1774;
1775; SSSE3-LABEL: combine_test1c:
1776; SSSE3: # BB#0:
1777; SSSE3-NEXT: movd (%rdi), %xmm0
Chandler Carruth99627bf2014-10-04 03:52:55 +00001778; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1779; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1780; SSSE3-NEXT: movd (%rsi), %xmm1
1781; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1782; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1783; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
1784; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1785; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
1786; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001787; SSSE3-NEXT: retq
1788;
1789; SSE41-LABEL: combine_test1c:
1790; SSE41: # BB#0:
1791; SSE41-NEXT: pmovzxbd (%rdi), %xmm1
1792; SSE41-NEXT: pmovzxbd (%rsi), %xmm0
Chandler Carruth99627bf2014-10-04 03:52:55 +00001793; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001794; SSE41-NEXT: retq
1795;
Chandler Carruth99627bf2014-10-04 03:52:55 +00001796; AVX1-LABEL: combine_test1c:
1797; AVX1: # BB#0:
1798; AVX1-NEXT: vpmovzxbd (%rdi), %xmm0
1799; AVX1-NEXT: vpmovzxbd (%rsi), %xmm1
1800; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1801; AVX1-NEXT: retq
1802;
1803; AVX2-LABEL: combine_test1c:
1804; AVX2: # BB#0:
1805; AVX2-NEXT: vpmovzxbd (%rdi), %xmm0
1806; AVX2-NEXT: vpmovzxbd (%rsi), %xmm1
1807; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1808; AVX2-NEXT: retq
Chandler Carruth782b0a72014-10-02 07:56:47 +00001809 %A = load <4 x i8>* %a
1810 %B = load <4 x i8>* %b
1811 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1812 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1813 ret <4 x i8> %2
1814}
1815
1816define <4 x i8> @combine_test2c(<4 x i8>* %a, <4 x i8>* %b) {
1817; SSE2-LABEL: combine_test2c:
1818; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001819; SSE2-NEXT: movd (%rdi), %xmm0
1820; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1821; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1822; SSE2-NEXT: movd (%rsi), %xmm1
1823; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1824; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1825; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001826; SSE2-NEXT: retq
1827;
1828; SSSE3-LABEL: combine_test2c:
1829; SSSE3: # BB#0:
1830; SSSE3-NEXT: movd (%rdi), %xmm0
Chandler Carruth99627bf2014-10-04 03:52:55 +00001831; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1832; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1833; SSSE3-NEXT: movd (%rsi), %xmm1
1834; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1835; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1836; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001837; SSSE3-NEXT: retq
1838;
1839; SSE41-LABEL: combine_test2c:
1840; SSE41: # BB#0:
1841; SSE41-NEXT: pmovzxbd (%rdi), %xmm0
1842; SSE41-NEXT: pmovzxbd (%rsi), %xmm1
Chandler Carruth99627bf2014-10-04 03:52:55 +00001843; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001844; SSE41-NEXT: retq
1845;
1846; AVX-LABEL: combine_test2c:
1847; AVX: # BB#0:
1848; AVX-NEXT: vpmovzxbd (%rdi), %xmm0
1849; AVX-NEXT: vpmovzxbd (%rsi), %xmm1
Chandler Carruth99627bf2014-10-04 03:52:55 +00001850; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001851; AVX-NEXT: retq
1852 %A = load <4 x i8>* %a
1853 %B = load <4 x i8>* %b
1854 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 1, i32 5>
1855 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
1856 ret <4 x i8> %2
1857}
1858
1859define <4 x i8> @combine_test3c(<4 x i8>* %a, <4 x i8>* %b) {
1860; SSE2-LABEL: combine_test3c:
1861; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001862; SSE2-NEXT: movd (%rdi), %xmm1
1863; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1864; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1865; SSE2-NEXT: movd (%rsi), %xmm0
1866; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1867; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1868; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001869; SSE2-NEXT: retq
1870;
1871; SSSE3-LABEL: combine_test3c:
1872; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001873; SSSE3-NEXT: movd (%rdi), %xmm1
1874; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1875; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1876; SSSE3-NEXT: movd (%rsi), %xmm0
1877; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1878; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1879; SSSE3-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001880; SSSE3-NEXT: retq
1881;
1882; SSE41-LABEL: combine_test3c:
1883; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001884; SSE41-NEXT: pmovzxbd (%rdi), %xmm1
1885; SSE41-NEXT: pmovzxbd (%rsi), %xmm0
1886; SSE41-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001887; SSE41-NEXT: retq
1888;
1889; AVX-LABEL: combine_test3c:
1890; AVX: # BB#0:
1891; AVX-NEXT: vpmovzxbd (%rdi), %xmm0
1892; AVX-NEXT: vpmovzxbd (%rsi), %xmm1
Chandler Carruth99627bf2014-10-04 03:52:55 +00001893; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001894; AVX-NEXT: retq
1895 %A = load <4 x i8>* %a
1896 %B = load <4 x i8>* %b
1897 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
1898 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1899 ret <4 x i8> %2
1900}
1901
1902define <4 x i8> @combine_test4c(<4 x i8>* %a, <4 x i8>* %b) {
1903; SSE2-LABEL: combine_test4c:
1904; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001905; SSE2-NEXT: movd (%rdi), %xmm1
1906; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1907; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1908; SSE2-NEXT: movd (%rsi), %xmm2
1909; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
1910; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
1911; SSE2-NEXT: movdqa %xmm2, %xmm0
1912; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
1913; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1914; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[2,0]
1915; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001916; SSE2-NEXT: retq
1917;
1918; SSSE3-LABEL: combine_test4c:
1919; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001920; SSSE3-NEXT: movd (%rdi), %xmm1
1921; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1922; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001923; SSSE3-NEXT: movd (%rsi), %xmm2
Chandler Carruth99627bf2014-10-04 03:52:55 +00001924; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
1925; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
1926; SSSE3-NEXT: movdqa %xmm2, %xmm0
1927; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
1928; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001929; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[2,0]
1930; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
1931; SSSE3-NEXT: retq
1932;
1933; SSE41-LABEL: combine_test4c:
1934; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001935; SSE41-NEXT: pmovzxbd (%rdi), %xmm1
1936; SSE41-NEXT: pmovzxbd (%rsi), %xmm0
1937; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001938; SSE41-NEXT: retq
1939;
1940; AVX1-LABEL: combine_test4c:
1941; AVX1: # BB#0:
1942; AVX1-NEXT: vpmovzxbd (%rdi), %xmm0
1943; AVX1-NEXT: vpmovzxbd (%rsi), %xmm1
Chandler Carruth99627bf2014-10-04 03:52:55 +00001944; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001945; AVX1-NEXT: retq
1946;
1947; AVX2-LABEL: combine_test4c:
1948; AVX2: # BB#0:
1949; AVX2-NEXT: vpmovzxbd (%rdi), %xmm0
1950; AVX2-NEXT: vpmovzxbd (%rsi), %xmm1
1951; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1952; AVX2-NEXT: retq
1953 %A = load <4 x i8>* %a
1954 %B = load <4 x i8>* %b
1955 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1956 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
1957 ret <4 x i8> %2
1958}
1959
1960
1961; The following test cases are generated from this C++ code
1962;
1963;__m128 blend_01(__m128 a, __m128 b)
1964;{
1965; __m128 s = a;
1966; s = _mm_blend_ps( s, b, 1<<0 );
1967; s = _mm_blend_ps( s, b, 1<<1 );
1968; return s;
1969;}
1970;
1971;__m128 blend_02(__m128 a, __m128 b)
1972;{
1973; __m128 s = a;
1974; s = _mm_blend_ps( s, b, 1<<0 );
1975; s = _mm_blend_ps( s, b, 1<<2 );
1976; return s;
1977;}
1978;
1979;__m128 blend_123(__m128 a, __m128 b)
1980;{
1981; __m128 s = a;
1982; s = _mm_blend_ps( s, b, 1<<1 );
1983; s = _mm_blend_ps( s, b, 1<<2 );
1984; s = _mm_blend_ps( s, b, 1<<3 );
1985; return s;
1986;}
1987
1988; Ideally, we should collapse the following shuffles into a single one.
1989
1990define <4 x float> @combine_blend_01(<4 x float> %a, <4 x float> %b) {
1991; SSE2-LABEL: combine_blend_01:
1992; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001993; SSE2-NEXT: movsd %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001994; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1995; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1996; SSE2-NEXT: movaps %xmm1, %xmm0
1997; SSE2-NEXT: retq
1998;
1999; SSSE3-LABEL: combine_blend_01:
2000; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002001; SSSE3-NEXT: movsd %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002002; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
2003; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
2004; SSSE3-NEXT: movaps %xmm1, %xmm0
2005; SSSE3-NEXT: retq
2006;
2007; SSE41-LABEL: combine_blend_01:
2008; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002009; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
2010; SSE41-NEXT: movapd %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002011; SSE41-NEXT: retq
2012;
2013; AVX-LABEL: combine_blend_01:
2014; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002015; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00002016; AVX-NEXT: retq
2017 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
2018 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
2019 ret <4 x float> %shuffle6
2020}
2021
2022define <4 x float> @combine_blend_02(<4 x float> %a, <4 x float> %b) {
2023; SSE2-LABEL: combine_blend_02:
2024; SSE2: # BB#0:
2025; SSE2-NEXT: movss %xmm1, %xmm0
2026; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
2027; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
2028; SSE2-NEXT: retq
2029;
2030; SSSE3-LABEL: combine_blend_02:
2031; SSSE3: # BB#0:
2032; SSSE3-NEXT: movss %xmm1, %xmm0
2033; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
2034; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
2035; SSSE3-NEXT: retq
2036;
2037; SSE41-LABEL: combine_blend_02:
2038; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002039; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
2040; SSE41-NEXT: movaps %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002041; SSE41-NEXT: retq
2042;
2043; AVX-LABEL: combine_blend_02:
2044; AVX: # BB#0:
2045; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
2046; AVX-NEXT: retq
2047 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 undef, i32 3>
2048 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
2049 ret <4 x float> %shuffle6
2050}
2051
2052define <4 x float> @combine_blend_123(<4 x float> %a, <4 x float> %b) {
2053; SSE2-LABEL: combine_blend_123:
2054; SSE2: # BB#0:
2055; SSE2-NEXT: movaps %xmm1, %xmm2
Chandler Carruth99627bf2014-10-04 03:52:55 +00002056; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm0[0,0]
2057; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[2,3]
2058; SSE2-NEXT: movsd %xmm2, %xmm1
2059; SSE2-NEXT: movaps %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002060; SSE2-NEXT: retq
2061;
2062; SSSE3-LABEL: combine_blend_123:
2063; SSSE3: # BB#0:
2064; SSSE3-NEXT: movaps %xmm1, %xmm2
Chandler Carruth99627bf2014-10-04 03:52:55 +00002065; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm0[0,0]
2066; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[2,3]
2067; SSSE3-NEXT: movsd %xmm2, %xmm1
2068; SSSE3-NEXT: movaps %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002069; SSSE3-NEXT: retq
2070;
2071; SSE41-LABEL: combine_blend_123:
2072; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002073; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00002074; SSE41-NEXT: movaps %xmm1, %xmm0
2075; SSE41-NEXT: retq
2076;
2077; AVX-LABEL: combine_blend_123:
2078; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002079; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00002080; AVX-NEXT: retq
2081 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
2082 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
2083 %shuffle12 = shufflevector <4 x float> %shuffle6, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
2084 ret <4 x float> %shuffle12
2085}
2086
2087define <4 x i32> @combine_test_movhl_1(<4 x i32> %a, <4 x i32> %b) {
2088; SSE-LABEL: combine_test_movhl_1:
2089; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002090; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2091; SSE-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002092; SSE-NEXT: retq
2093;
2094; AVX-LABEL: combine_test_movhl_1:
2095; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002096; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00002097; AVX-NEXT: retq
2098 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 7, i32 5, i32 3>
2099 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 1, i32 0, i32 3>
2100 ret <4 x i32> %2
2101}
2102
2103define <4 x i32> @combine_test_movhl_2(<4 x i32> %a, <4 x i32> %b) {
2104; SSE-LABEL: combine_test_movhl_2:
2105; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002106; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2107; SSE-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002108; SSE-NEXT: retq
2109;
2110; AVX-LABEL: combine_test_movhl_2:
2111; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002112; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00002113; AVX-NEXT: retq
2114 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 0, i32 3, i32 6>
2115 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 3, i32 7, i32 0, i32 2>
2116 ret <4 x i32> %2
2117}
2118
2119define <4 x i32> @combine_test_movhl_3(<4 x i32> %a, <4 x i32> %b) {
2120; SSE-LABEL: combine_test_movhl_3:
2121; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002122; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2123; SSE-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002124; SSE-NEXT: retq
2125;
2126; AVX-LABEL: combine_test_movhl_3:
2127; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002128; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00002129; AVX-NEXT: retq
2130 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 6, i32 3, i32 2>
2131 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 0, i32 3, i32 2>
2132 ret <4 x i32> %2
2133}
Chandler Carruth71f41872014-10-02 08:02:34 +00002134
2135
2136; Verify that we fold shuffles according to rule:
2137; (shuffle(shuffle A, Undef, M0), B, M1) -> (shuffle A, B, M2)
2138
2139define <4 x float> @combine_undef_input_test1(<4 x float> %a, <4 x float> %b) {
2140; SSE2-LABEL: combine_undef_input_test1:
2141; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002142; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002143; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[1,2]
2144; SSE2-NEXT: movaps %xmm1, %xmm0
2145; SSE2-NEXT: retq
2146;
2147; SSSE3-LABEL: combine_undef_input_test1:
2148; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002149; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002150; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[1,2]
2151; SSSE3-NEXT: movaps %xmm1, %xmm0
2152; SSSE3-NEXT: retq
2153;
2154; SSE41-LABEL: combine_undef_input_test1:
2155; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002156; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
2157; SSE41-NEXT: movapd %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002158; SSE41-NEXT: retq
2159;
2160; AVX-LABEL: combine_undef_input_test1:
2161; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002162; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002163; AVX-NEXT: retq
2164 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
2165 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 1, i32 2>
2166 ret <4 x float> %2
2167}
2168
2169define <4 x float> @combine_undef_input_test2(<4 x float> %a, <4 x float> %b) {
2170; SSE-LABEL: combine_undef_input_test2:
2171; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002172; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002173; SSE-NEXT: retq
2174;
2175; AVX-LABEL: combine_undef_input_test2:
2176; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002177; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002178; AVX-NEXT: retq
2179 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
2180 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
2181 ret <4 x float> %2
2182}
2183
2184define <4 x float> @combine_undef_input_test3(<4 x float> %a, <4 x float> %b) {
2185; SSE-LABEL: combine_undef_input_test3:
2186; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002187; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002188; SSE-NEXT: retq
2189;
2190; AVX-LABEL: combine_undef_input_test3:
2191; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002192; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002193; AVX-NEXT: retq
2194 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
2195 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
2196 ret <4 x float> %2
2197}
2198
2199define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) {
2200; SSE-LABEL: combine_undef_input_test4:
2201; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002202; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2203; SSE-NEXT: movapd %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002204; SSE-NEXT: retq
2205;
2206; AVX-LABEL: combine_undef_input_test4:
2207; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002208; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002209; AVX-NEXT: retq
2210 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
2211 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
2212 ret <4 x float> %2
2213}
2214
2215define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
2216; SSE2-LABEL: combine_undef_input_test5:
2217; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002218; SSE2-NEXT: movsd %xmm0, %xmm1
2219; SSE2-NEXT: movaps %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002220; SSE2-NEXT: retq
2221;
2222; SSSE3-LABEL: combine_undef_input_test5:
2223; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002224; SSSE3-NEXT: movsd %xmm0, %xmm1
2225; SSSE3-NEXT: movaps %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002226; SSSE3-NEXT: retq
2227;
2228; SSE41-LABEL: combine_undef_input_test5:
2229; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002230; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002231; SSE41-NEXT: retq
2232;
2233; AVX-LABEL: combine_undef_input_test5:
2234; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002235; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002236; AVX-NEXT: retq
2237 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
2238 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 6, i32 7>
2239 ret <4 x float> %2
2240}
2241
2242
2243; Verify that we fold shuffles according to rule:
2244; (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2)
2245
2246define <4 x float> @combine_undef_input_test6(<4 x float> %a) {
2247; ALL-LABEL: combine_undef_input_test6:
2248; ALL: # BB#0:
2249; ALL-NEXT: retq
2250 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
2251 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 1, i32 2>
2252 ret <4 x float> %2
2253}
2254
2255define <4 x float> @combine_undef_input_test7(<4 x float> %a) {
2256; SSE2-LABEL: combine_undef_input_test7:
2257; SSE2: # BB#0:
2258; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2259; SSE2-NEXT: retq
2260;
2261; SSSE3-LABEL: combine_undef_input_test7:
2262; SSSE3: # BB#0:
2263; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2264; SSSE3-NEXT: retq
2265;
2266; SSE41-LABEL: combine_undef_input_test7:
2267; SSE41: # BB#0:
2268; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2269; SSE41-NEXT: retq
2270;
2271; AVX-LABEL: combine_undef_input_test7:
2272; AVX: # BB#0:
2273; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2274; AVX-NEXT: retq
2275 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
2276 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
2277 ret <4 x float> %2
2278}
2279
2280define <4 x float> @combine_undef_input_test8(<4 x float> %a) {
2281; SSE2-LABEL: combine_undef_input_test8:
2282; SSE2: # BB#0:
2283; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2284; SSE2-NEXT: retq
2285;
2286; SSSE3-LABEL: combine_undef_input_test8:
2287; SSSE3: # BB#0:
2288; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2289; SSSE3-NEXT: retq
2290;
2291; SSE41-LABEL: combine_undef_input_test8:
2292; SSE41: # BB#0:
2293; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2294; SSE41-NEXT: retq
2295;
2296; AVX-LABEL: combine_undef_input_test8:
2297; AVX: # BB#0:
2298; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2299; AVX-NEXT: retq
2300 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
2301 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
2302 ret <4 x float> %2
2303}
2304
2305define <4 x float> @combine_undef_input_test9(<4 x float> %a) {
2306; SSE-LABEL: combine_undef_input_test9:
2307; SSE: # BB#0:
2308; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
2309; SSE-NEXT: retq
2310;
2311; AVX-LABEL: combine_undef_input_test9:
2312; AVX: # BB#0:
2313; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
2314; AVX-NEXT: retq
2315 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
2316 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
2317 ret <4 x float> %2
2318}
2319
2320define <4 x float> @combine_undef_input_test10(<4 x float> %a) {
2321; ALL-LABEL: combine_undef_input_test10:
2322; ALL: # BB#0:
2323; ALL-NEXT: retq
2324 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
2325 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 6, i32 7>
2326 ret <4 x float> %2
2327}
2328
2329define <4 x float> @combine_undef_input_test11(<4 x float> %a, <4 x float> %b) {
2330; SSE2-LABEL: combine_undef_input_test11:
2331; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002332; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002333; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[1,2]
2334; SSE2-NEXT: movaps %xmm1, %xmm0
2335; SSE2-NEXT: retq
2336;
2337; SSSE3-LABEL: combine_undef_input_test11:
2338; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002339; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002340; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[1,2]
2341; SSSE3-NEXT: movaps %xmm1, %xmm0
2342; SSSE3-NEXT: retq
2343;
2344; SSE41-LABEL: combine_undef_input_test11:
2345; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002346; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
2347; SSE41-NEXT: movapd %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002348; SSE41-NEXT: retq
2349;
2350; AVX-LABEL: combine_undef_input_test11:
2351; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002352; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002353; AVX-NEXT: retq
2354 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
2355 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 6>
2356 ret <4 x float> %2
2357}
2358
2359define <4 x float> @combine_undef_input_test12(<4 x float> %a, <4 x float> %b) {
2360; SSE-LABEL: combine_undef_input_test12:
2361; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002362; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002363; SSE-NEXT: retq
2364;
2365; AVX-LABEL: combine_undef_input_test12:
2366; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002367; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002368; AVX-NEXT: retq
2369 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
2370 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
2371 ret <4 x float> %2
2372}
2373
2374define <4 x float> @combine_undef_input_test13(<4 x float> %a, <4 x float> %b) {
2375; SSE-LABEL: combine_undef_input_test13:
2376; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002377; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002378; SSE-NEXT: retq
2379;
2380; AVX-LABEL: combine_undef_input_test13:
2381; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002382; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002383; AVX-NEXT: retq
2384 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
2385 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 5, i32 0, i32 5>
2386 ret <4 x float> %2
2387}
2388
2389define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) {
2390; SSE-LABEL: combine_undef_input_test14:
2391; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002392; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2393; SSE-NEXT: movapd %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002394; SSE-NEXT: retq
2395;
2396; AVX-LABEL: combine_undef_input_test14:
2397; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002398; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002399; AVX-NEXT: retq
2400 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
2401 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2402 ret <4 x float> %2
2403}
2404
2405define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
2406; SSE2-LABEL: combine_undef_input_test15:
2407; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002408; SSE2-NEXT: movsd %xmm0, %xmm1
2409; SSE2-NEXT: movaps %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002410; SSE2-NEXT: retq
2411;
2412; SSSE3-LABEL: combine_undef_input_test15:
2413; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002414; SSSE3-NEXT: movsd %xmm0, %xmm1
2415; SSSE3-NEXT: movaps %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002416; SSSE3-NEXT: retq
2417;
2418; SSE41-LABEL: combine_undef_input_test15:
2419; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002420; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002421; SSE41-NEXT: retq
2422;
2423; AVX-LABEL: combine_undef_input_test15:
2424; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002425; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002426; AVX-NEXT: retq
2427 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
2428 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3>
2429 ret <4 x float> %2
2430}
2431
2432
2433; Verify that shuffles are canonicalized according to rules:
2434; shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
2435;
2436; This allows to trigger the following combine rule:
2437; (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2)
2438;
2439; As a result, all the shuffle pairs in each function below should be
2440; combined into a single legal shuffle operation.
2441
2442define <4 x float> @combine_undef_input_test16(<4 x float> %a) {
2443; ALL-LABEL: combine_undef_input_test16:
2444; ALL: # BB#0:
2445; ALL-NEXT: retq
2446 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
2447 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
2448 ret <4 x float> %2
2449}
2450
2451define <4 x float> @combine_undef_input_test17(<4 x float> %a) {
2452; SSE2-LABEL: combine_undef_input_test17:
2453; SSE2: # BB#0:
2454; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2455; SSE2-NEXT: retq
2456;
2457; SSSE3-LABEL: combine_undef_input_test17:
2458; SSSE3: # BB#0:
2459; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2460; SSSE3-NEXT: retq
2461;
2462; SSE41-LABEL: combine_undef_input_test17:
2463; SSE41: # BB#0:
2464; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2465; SSE41-NEXT: retq
2466;
2467; AVX-LABEL: combine_undef_input_test17:
2468; AVX: # BB#0:
2469; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2470; AVX-NEXT: retq
2471 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
2472 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
2473 ret <4 x float> %2
2474}
2475
2476define <4 x float> @combine_undef_input_test18(<4 x float> %a) {
2477; SSE2-LABEL: combine_undef_input_test18:
2478; SSE2: # BB#0:
2479; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2480; SSE2-NEXT: retq
2481;
2482; SSSE3-LABEL: combine_undef_input_test18:
2483; SSSE3: # BB#0:
2484; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2485; SSSE3-NEXT: retq
2486;
2487; SSE41-LABEL: combine_undef_input_test18:
2488; SSE41: # BB#0:
2489; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2490; SSE41-NEXT: retq
2491;
2492; AVX-LABEL: combine_undef_input_test18:
2493; AVX: # BB#0:
2494; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2495; AVX-NEXT: retq
2496 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
2497 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
2498 ret <4 x float> %2
2499}
2500
2501define <4 x float> @combine_undef_input_test19(<4 x float> %a) {
2502; SSE-LABEL: combine_undef_input_test19:
2503; SSE: # BB#0:
2504; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
2505; SSE-NEXT: retq
2506;
2507; AVX-LABEL: combine_undef_input_test19:
2508; AVX: # BB#0:
2509; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
2510; AVX-NEXT: retq
2511 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
2512 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2513 ret <4 x float> %2
2514}
2515
2516define <4 x float> @combine_undef_input_test20(<4 x float> %a) {
2517; ALL-LABEL: combine_undef_input_test20:
2518; ALL: # BB#0:
2519; ALL-NEXT: retq
2520 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
2521 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3>
2522 ret <4 x float> %2
2523}
Chandler Carruthdaa1ff92014-10-05 19:14:34 +00002524
2525; These tests are designed to test the ability to combine away unnecessary
2526; operations feeding into a shuffle. The AVX cases are the important ones as
2527; they leverage operations which cannot be done naturally on the entire vector
2528; and thus are decomposed into multiple smaller operations.
2529
2530define <8 x i32> @combine_unneeded_subvector1(<8 x i32> %a) {
2531; SSE-LABEL: combine_unneeded_subvector1:
2532; SSE: # BB#0:
2533; SSE-NEXT: paddd {{.*}}(%rip), %xmm1
2534; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,2,1,0]
2535; SSE-NEXT: movdqa %xmm0, %xmm1
2536; SSE-NEXT: retq
2537;
2538; AVX1-LABEL: combine_unneeded_subvector1:
2539; AVX1: # BB#0:
2540; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2541; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2542; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
2543; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2544; AVX1-NEXT: retq
2545;
2546; AVX2-LABEL: combine_unneeded_subvector1:
2547; AVX2: # BB#0:
2548; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
2549; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
2550; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
2551; AVX2-NEXT: retq
2552 %b = add <8 x i32> %a, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
2553 %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
2554 ret <8 x i32> %c
2555}
2556
2557define <8 x i32> @combine_unneeded_subvector2(<8 x i32> %a, <8 x i32> %b) {
2558; SSE-LABEL: combine_unneeded_subvector2:
2559; SSE: # BB#0:
2560; SSE-NEXT: paddd {{.*}}(%rip), %xmm1
2561; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,2,1,0]
2562; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
2563; SSE-NEXT: retq
2564;
2565; AVX1-LABEL: combine_unneeded_subvector2:
2566; AVX1: # BB#0:
2567; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2568; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2569; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2570; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2571; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,2,1,0]
2572; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2573; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
2574; AVX1-NEXT: retq
2575;
2576; AVX2-LABEL: combine_unneeded_subvector2:
2577; AVX2: # BB#0:
2578; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
2579; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <7,6,5,4,u,u,u,u>
2580; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
2581; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2582; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2583; AVX2-NEXT: retq
2584 %c = add <8 x i32> %a, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
2585 %d = shufflevector <8 x i32> %b, <8 x i32> %c, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
2586 ret <8 x i32> %d
2587}