blob: 87b05202d9af7cee00eec4db388e1f4bec623bc8 [file] [log] [blame]
Chandler Carruth3c7bf042014-10-02 07:22:26 +00001; RUN: llc < %s -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2; RUN: llc < %s -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
3; RUN: llc < %s -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
Chandler Carruth7b270672014-10-02 07:13:25 +00006;
7; Verify that the DAG combiner correctly folds bitwise operations across
8; shuffles, nested shuffles with undef, pairs of nested shuffles, and other
9; basic and always-safe patterns. Also test that the DAG combiner will combine
10; target-specific shuffle instructions where reasonable.
Chandler Carruth0d6d1f22014-06-27 11:34:40 +000011
Chandler Carruth0d6d1f22014-06-27 11:34:40 +000012target triple = "x86_64-unknown-unknown"
13
Chandler Carruth688001f2014-06-27 11:40:13 +000014declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +000015declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
16declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8)
17
Chandler Carruth688001f2014-06-27 11:40:13 +000018define <4 x i32> @combine_pshufd1(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000019; ALL-LABEL: combine_pshufd1:
20; ALL: # BB#0: # %entry
21; ALL-NEXT: retq
22entry:
23 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
24 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000025 ret <4 x i32> %c
26}
27
28define <4 x i32> @combine_pshufd2(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000029; ALL-LABEL: combine_pshufd2:
30; ALL: # BB#0: # %entry
31; ALL-NEXT: retq
32entry:
33 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000034 %b.cast = bitcast <4 x i32> %b to <8 x i16>
35 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 -28)
36 %c.cast = bitcast <8 x i16> %c to <4 x i32>
Chandler Carruth3c7bf042014-10-02 07:22:26 +000037 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000038 ret <4 x i32> %d
39}
40
41define <4 x i32> @combine_pshufd3(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000042; ALL-LABEL: combine_pshufd3:
43; ALL: # BB#0: # %entry
44; ALL-NEXT: retq
45entry:
46 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000047 %b.cast = bitcast <4 x i32> %b to <8 x i16>
48 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 -28)
49 %c.cast = bitcast <8 x i16> %c to <4 x i32>
Chandler Carruth3c7bf042014-10-02 07:22:26 +000050 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000051 ret <4 x i32> %d
52}
53
54define <4 x i32> @combine_pshufd4(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000055; SSE-LABEL: combine_pshufd4:
56; SSE: # BB#0: # %entry
57; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
58; SSE-NEXT: retq
59;
60; AVX-LABEL: combine_pshufd4:
61; AVX: # BB#0: # %entry
62; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
63; AVX-NEXT: retq
64entry:
65 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -31)
Chandler Carruth688001f2014-06-27 11:40:13 +000066 %b.cast = bitcast <4 x i32> %b to <8 x i16>
67 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 27)
68 %c.cast = bitcast <8 x i16> %c to <4 x i32>
Chandler Carruth3c7bf042014-10-02 07:22:26 +000069 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -31)
Chandler Carruth688001f2014-06-27 11:40:13 +000070 ret <4 x i32> %d
71}
72
73define <4 x i32> @combine_pshufd5(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000074; SSE-LABEL: combine_pshufd5:
75; SSE: # BB#0: # %entry
76; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
77; SSE-NEXT: retq
78;
79; AVX-LABEL: combine_pshufd5:
80; AVX: # BB#0: # %entry
81; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
82; AVX-NEXT: retq
83entry:
84 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -76)
Chandler Carruth688001f2014-06-27 11:40:13 +000085 %b.cast = bitcast <4 x i32> %b to <8 x i16>
86 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 27)
87 %c.cast = bitcast <8 x i16> %c to <4 x i32>
88 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -76)
89 ret <4 x i32> %d
90}
91
Benjamin Kramere739cf32014-07-02 15:09:44 +000092define <4 x i32> @combine_pshufd6(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000093; SSE-LABEL: combine_pshufd6:
94; SSE: # BB#0: # %entry
95; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
96; SSE-NEXT: retq
97;
98; AVX-LABEL: combine_pshufd6:
99; AVX: # BB#0: # %entry
100; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
101; AVX-NEXT: retq
102entry:
Benjamin Kramere739cf32014-07-02 15:09:44 +0000103 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0)
104 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8)
105 ret <4 x i32> %c
106}
107
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000108define <8 x i16> @combine_pshuflw1(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000109; ALL-LABEL: combine_pshuflw1:
110; ALL: # BB#0: # %entry
111; ALL-NEXT: retq
112entry:
113 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
114 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000115 ret <8 x i16> %c
116}
117
118define <8 x i16> @combine_pshuflw2(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000119; ALL-LABEL: combine_pshuflw2:
120; ALL: # BB#0: # %entry
121; ALL-NEXT: retq
122entry:
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000123 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000124 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 -28)
125 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000126 ret <8 x i16> %d
127}
128
129define <8 x i16> @combine_pshuflw3(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000130; SSE-LABEL: combine_pshuflw3:
131; SSE: # BB#0: # %entry
132; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
133; SSE-NEXT: retq
134;
135; AVX-LABEL: combine_pshuflw3:
136; AVX: # BB#0: # %entry
137; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
138; AVX-NEXT: retq
139entry:
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000140 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000141 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 27)
142 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000143 ret <8 x i16> %d
144}
145
146define <8 x i16> @combine_pshufhw1(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000147; SSE-LABEL: combine_pshufhw1:
148; SSE: # BB#0: # %entry
149; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
150; SSE-NEXT: retq
151;
152; AVX-LABEL: combine_pshufhw1:
153; AVX: # BB#0: # %entry
154; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
155; AVX-NEXT: retq
156entry:
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000157 %b = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27)
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000158 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27)
159 %d = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %c, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000160 ret <8 x i16> %d
161}
Chandler Carruth21105012014-10-02 07:30:24 +0000162
163define <4 x i32> @combine_bitwise_ops_test1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
164; SSE-LABEL: combine_bitwise_ops_test1:
165; SSE: # BB#0:
166; SSE-NEXT: pand %xmm1, %xmm0
167; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
168; SSE-NEXT: retq
169;
170; AVX-LABEL: combine_bitwise_ops_test1:
171; AVX: # BB#0:
172; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
173; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
174; AVX-NEXT: retq
175 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
176 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
177 %and = and <4 x i32> %shuf1, %shuf2
178 ret <4 x i32> %and
179}
180
181define <4 x i32> @combine_bitwise_ops_test2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
182; SSE-LABEL: combine_bitwise_ops_test2:
183; SSE: # BB#0:
184; SSE-NEXT: por %xmm1, %xmm0
185; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
186; SSE-NEXT: retq
187;
188; AVX-LABEL: combine_bitwise_ops_test2:
189; AVX: # BB#0:
190; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
191; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
192; AVX-NEXT: retq
193 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
194 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
195 %or = or <4 x i32> %shuf1, %shuf2
196 ret <4 x i32> %or
197}
198
199define <4 x i32> @combine_bitwise_ops_test3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
200; SSE-LABEL: combine_bitwise_ops_test3:
201; SSE: # BB#0:
202; SSE-NEXT: pxor %xmm1, %xmm0
203; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
204; SSE-NEXT: retq
205;
206; AVX-LABEL: combine_bitwise_ops_test3:
207; AVX: # BB#0:
208; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
209; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
210; AVX-NEXT: retq
211 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
212 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
213 %xor = xor <4 x i32> %shuf1, %shuf2
214 ret <4 x i32> %xor
215}
216
217define <4 x i32> @combine_bitwise_ops_test4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
218; SSE-LABEL: combine_bitwise_ops_test4:
219; SSE: # BB#0:
220; SSE-NEXT: pand %xmm1, %xmm0
221; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
222; SSE-NEXT: retq
223;
224; AVX-LABEL: combine_bitwise_ops_test4:
225; AVX: # BB#0:
226; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
227; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
228; AVX-NEXT: retq
229 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
230 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
231 %and = and <4 x i32> %shuf1, %shuf2
232 ret <4 x i32> %and
233}
234
235define <4 x i32> @combine_bitwise_ops_test5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
236; SSE-LABEL: combine_bitwise_ops_test5:
237; SSE: # BB#0:
238; SSE-NEXT: por %xmm1, %xmm0
239; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
240; SSE-NEXT: retq
241;
242; AVX-LABEL: combine_bitwise_ops_test5:
243; AVX: # BB#0:
244; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
245; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
246; AVX-NEXT: retq
247 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
248 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
249 %or = or <4 x i32> %shuf1, %shuf2
250 ret <4 x i32> %or
251}
252
253define <4 x i32> @combine_bitwise_ops_test6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
254; SSE-LABEL: combine_bitwise_ops_test6:
255; SSE: # BB#0:
256; SSE-NEXT: pxor %xmm1, %xmm0
257; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
258; SSE-NEXT: retq
259;
260; AVX-LABEL: combine_bitwise_ops_test6:
261; AVX: # BB#0:
262; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
263; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
264; AVX-NEXT: retq
265 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
266 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
267 %xor = xor <4 x i32> %shuf1, %shuf2
268 ret <4 x i32> %xor
269}
270
271
272; Verify that DAGCombiner moves the shuffle after the xor/and/or even if shuffles
273; are not performing a swizzle operations.
274
275define <4 x i32> @combine_bitwise_ops_test1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
276; SSE2-LABEL: combine_bitwise_ops_test1b:
277; SSE2: # BB#0:
278; SSE2-NEXT: andps %xmm1, %xmm0
279; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000280; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth21105012014-10-02 07:30:24 +0000281; SSE2-NEXT: retq
282;
283; SSSE3-LABEL: combine_bitwise_ops_test1b:
284; SSSE3: # BB#0:
285; SSSE3-NEXT: andps %xmm1, %xmm0
286; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000287; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth21105012014-10-02 07:30:24 +0000288; SSSE3-NEXT: retq
289;
290; SSE41-LABEL: combine_bitwise_ops_test1b:
291; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000292; SSE41-NEXT: pand %xmm1, %xmm0
293; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000294; SSE41-NEXT: retq
295;
296; AVX1-LABEL: combine_bitwise_ops_test1b:
297; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000298; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
299; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000300; AVX1-NEXT: retq
301;
302; AVX2-LABEL: combine_bitwise_ops_test1b:
303; AVX2: # BB#0:
304; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
305; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
306; AVX2-NEXT: retq
307 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
308 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
309 %and = and <4 x i32> %shuf1, %shuf2
310 ret <4 x i32> %and
311}
312
313define <4 x i32> @combine_bitwise_ops_test2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
314; SSE2-LABEL: combine_bitwise_ops_test2b:
315; SSE2: # BB#0:
316; SSE2-NEXT: orps %xmm1, %xmm0
317; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000318; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth21105012014-10-02 07:30:24 +0000319; SSE2-NEXT: retq
320;
321; SSSE3-LABEL: combine_bitwise_ops_test2b:
322; SSSE3: # BB#0:
323; SSSE3-NEXT: orps %xmm1, %xmm0
324; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000325; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth21105012014-10-02 07:30:24 +0000326; SSSE3-NEXT: retq
327;
328; SSE41-LABEL: combine_bitwise_ops_test2b:
329; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000330; SSE41-NEXT: por %xmm1, %xmm0
331; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000332; SSE41-NEXT: retq
333;
334; AVX1-LABEL: combine_bitwise_ops_test2b:
335; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000336; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
337; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000338; AVX1-NEXT: retq
339;
340; AVX2-LABEL: combine_bitwise_ops_test2b:
341; AVX2: # BB#0:
342; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
343; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
344; AVX2-NEXT: retq
345 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
346 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
347 %or = or <4 x i32> %shuf1, %shuf2
348 ret <4 x i32> %or
349}
350
351define <4 x i32> @combine_bitwise_ops_test3b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
352; SSE2-LABEL: combine_bitwise_ops_test3b:
353; SSE2: # BB#0:
354; SSE2-NEXT: xorps %xmm1, %xmm0
355; SSE2-NEXT: xorps %xmm1, %xmm1
356; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000357; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth21105012014-10-02 07:30:24 +0000358; SSE2-NEXT: retq
359;
360; SSSE3-LABEL: combine_bitwise_ops_test3b:
361; SSSE3: # BB#0:
362; SSSE3-NEXT: xorps %xmm1, %xmm0
363; SSSE3-NEXT: xorps %xmm1, %xmm1
364; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000365; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth21105012014-10-02 07:30:24 +0000366; SSSE3-NEXT: retq
367;
368; SSE41-LABEL: combine_bitwise_ops_test3b:
369; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000370; SSE41-NEXT: pxor %xmm1, %xmm0
371; SSE41-NEXT: pxor %xmm1, %xmm1
372; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000373; SSE41-NEXT: retq
374;
375; AVX1-LABEL: combine_bitwise_ops_test3b:
376; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000377; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
378; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
379; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000380; AVX1-NEXT: retq
381;
382; AVX2-LABEL: combine_bitwise_ops_test3b:
383; AVX2: # BB#0:
384; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
385; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
386; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
387; AVX2-NEXT: retq
388 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
389 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
390 %xor = xor <4 x i32> %shuf1, %shuf2
391 ret <4 x i32> %xor
392}
393
394define <4 x i32> @combine_bitwise_ops_test4b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
395; SSE2-LABEL: combine_bitwise_ops_test4b:
396; SSE2: # BB#0:
397; SSE2-NEXT: andps %xmm1, %xmm0
398; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
399; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
400; SSE2-NEXT: retq
401;
402; SSSE3-LABEL: combine_bitwise_ops_test4b:
403; SSSE3: # BB#0:
404; SSSE3-NEXT: andps %xmm1, %xmm0
405; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
406; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
407; SSSE3-NEXT: retq
408;
409; SSE41-LABEL: combine_bitwise_ops_test4b:
410; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000411; SSE41-NEXT: pand %xmm1, %xmm0
412; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
413; SSE41-NEXT: movdqa %xmm2, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000414; SSE41-NEXT: retq
415;
416; AVX1-LABEL: combine_bitwise_ops_test4b:
417; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000418; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
419; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000420; AVX1-NEXT: retq
421;
422; AVX2-LABEL: combine_bitwise_ops_test4b:
423; AVX2: # BB#0:
424; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
425; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
426; AVX2-NEXT: retq
427 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
428 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
429 %and = and <4 x i32> %shuf1, %shuf2
430 ret <4 x i32> %and
431}
432
433define <4 x i32> @combine_bitwise_ops_test5b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
434; SSE2-LABEL: combine_bitwise_ops_test5b:
435; SSE2: # BB#0:
436; SSE2-NEXT: orps %xmm1, %xmm0
437; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
438; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
439; SSE2-NEXT: retq
440;
441; SSSE3-LABEL: combine_bitwise_ops_test5b:
442; SSSE3: # BB#0:
443; SSSE3-NEXT: orps %xmm1, %xmm0
444; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
445; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
446; SSSE3-NEXT: retq
447;
448; SSE41-LABEL: combine_bitwise_ops_test5b:
449; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000450; SSE41-NEXT: por %xmm1, %xmm0
451; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
452; SSE41-NEXT: movdqa %xmm2, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000453; SSE41-NEXT: retq
454;
455; AVX1-LABEL: combine_bitwise_ops_test5b:
456; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000457; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
458; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000459; AVX1-NEXT: retq
460;
461; AVX2-LABEL: combine_bitwise_ops_test5b:
462; AVX2: # BB#0:
463; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
464; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
465; AVX2-NEXT: retq
466 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
467 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
468 %or = or <4 x i32> %shuf1, %shuf2
469 ret <4 x i32> %or
470}
471
472define <4 x i32> @combine_bitwise_ops_test6b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
473; SSE2-LABEL: combine_bitwise_ops_test6b:
474; SSE2: # BB#0:
475; SSE2-NEXT: xorps %xmm1, %xmm0
476; SSE2-NEXT: xorps %xmm1, %xmm1
477; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000478; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
479; SSE2-NEXT: movaps %xmm1, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000480; SSE2-NEXT: retq
481;
482; SSSE3-LABEL: combine_bitwise_ops_test6b:
483; SSSE3: # BB#0:
484; SSSE3-NEXT: xorps %xmm1, %xmm0
485; SSSE3-NEXT: xorps %xmm1, %xmm1
486; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000487; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
488; SSSE3-NEXT: movaps %xmm1, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000489; SSSE3-NEXT: retq
490;
491; SSE41-LABEL: combine_bitwise_ops_test6b:
492; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000493; SSE41-NEXT: pxor %xmm1, %xmm0
494; SSE41-NEXT: pxor %xmm1, %xmm1
495; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
496; SSE41-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth21105012014-10-02 07:30:24 +0000497; SSE41-NEXT: retq
498;
499; AVX1-LABEL: combine_bitwise_ops_test6b:
500; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000501; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
502; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
503; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
Chandler Carruth21105012014-10-02 07:30:24 +0000504; AVX1-NEXT: retq
505;
506; AVX2-LABEL: combine_bitwise_ops_test6b:
507; AVX2: # BB#0:
508; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
509; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
510; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
511; AVX2-NEXT: retq
512 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
513 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
514 %xor = xor <4 x i32> %shuf1, %shuf2
515 ret <4 x i32> %xor
516}
517
518define <4 x i32> @combine_bitwise_ops_test1c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
519; SSE-LABEL: combine_bitwise_ops_test1c:
520; SSE: # BB#0:
521; SSE-NEXT: andps %xmm1, %xmm0
522; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
523; SSE-NEXT: retq
524;
525; AVX-LABEL: combine_bitwise_ops_test1c:
526; AVX: # BB#0:
527; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
528; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
529; AVX-NEXT: retq
530 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
531 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
532 %and = and <4 x i32> %shuf1, %shuf2
533 ret <4 x i32> %and
534}
535
536define <4 x i32> @combine_bitwise_ops_test2c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
537; SSE-LABEL: combine_bitwise_ops_test2c:
538; SSE: # BB#0:
539; SSE-NEXT: orps %xmm1, %xmm0
540; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
541; SSE-NEXT: retq
542;
543; AVX-LABEL: combine_bitwise_ops_test2c:
544; AVX: # BB#0:
545; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
546; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
547; AVX-NEXT: retq
548 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
549 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
550 %or = or <4 x i32> %shuf1, %shuf2
551 ret <4 x i32> %or
552}
553
554define <4 x i32> @combine_bitwise_ops_test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
555; SSE-LABEL: combine_bitwise_ops_test3c:
556; SSE: # BB#0:
557; SSE-NEXT: xorps %xmm1, %xmm0
558; SSE-NEXT: xorps %xmm1, %xmm1
559; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
560; SSE-NEXT: retq
561;
562; AVX-LABEL: combine_bitwise_ops_test3c:
563; AVX: # BB#0:
564; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
565; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
566; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
567; AVX-NEXT: retq
568 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
569 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
570 %xor = xor <4 x i32> %shuf1, %shuf2
571 ret <4 x i32> %xor
572}
573
574define <4 x i32> @combine_bitwise_ops_test4c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
575; SSE-LABEL: combine_bitwise_ops_test4c:
576; SSE: # BB#0:
577; SSE-NEXT: andps %xmm1, %xmm0
578; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
579; SSE-NEXT: movaps %xmm2, %xmm0
580; SSE-NEXT: retq
581;
582; AVX-LABEL: combine_bitwise_ops_test4c:
583; AVX: # BB#0:
584; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
585; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3]
586; AVX-NEXT: retq
587 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
588 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
589 %and = and <4 x i32> %shuf1, %shuf2
590 ret <4 x i32> %and
591}
592
593define <4 x i32> @combine_bitwise_ops_test5c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
594; SSE-LABEL: combine_bitwise_ops_test5c:
595; SSE: # BB#0:
596; SSE-NEXT: orps %xmm1, %xmm0
597; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
598; SSE-NEXT: movaps %xmm2, %xmm0
599; SSE-NEXT: retq
600;
601; AVX-LABEL: combine_bitwise_ops_test5c:
602; AVX: # BB#0:
603; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
604; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3]
605; AVX-NEXT: retq
606 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
607 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
608 %or = or <4 x i32> %shuf1, %shuf2
609 ret <4 x i32> %or
610}
611
612define <4 x i32> @combine_bitwise_ops_test6c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
613; SSE-LABEL: combine_bitwise_ops_test6c:
614; SSE: # BB#0:
615; SSE-NEXT: xorps %xmm1, %xmm0
616; SSE-NEXT: xorps %xmm1, %xmm1
617; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
618; SSE-NEXT: movaps %xmm1, %xmm0
619; SSE-NEXT: retq
620;
621; AVX-LABEL: combine_bitwise_ops_test6c:
622; AVX: # BB#0:
623; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
624; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
625; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[1,3]
626; AVX-NEXT: retq
627 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
628 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
629 %xor = xor <4 x i32> %shuf1, %shuf2
630 ret <4 x i32> %xor
631}
Chandler Carruthb2941e22014-10-02 07:42:58 +0000632
633define <4 x i32> @combine_nested_undef_test1(<4 x i32> %A, <4 x i32> %B) {
634; SSE-LABEL: combine_nested_undef_test1:
635; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000636; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000637; SSE-NEXT: retq
638;
639; AVX-LABEL: combine_nested_undef_test1:
640; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000641; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000642; AVX-NEXT: retq
643 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
644 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
645 ret <4 x i32> %2
646}
647
648define <4 x i32> @combine_nested_undef_test2(<4 x i32> %A, <4 x i32> %B) {
649; SSE-LABEL: combine_nested_undef_test2:
650; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000651; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000652; SSE-NEXT: retq
653;
654; AVX-LABEL: combine_nested_undef_test2:
655; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000656; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000657; AVX-NEXT: retq
658 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
659 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
660 ret <4 x i32> %2
661}
662
663define <4 x i32> @combine_nested_undef_test3(<4 x i32> %A, <4 x i32> %B) {
664; SSE-LABEL: combine_nested_undef_test3:
665; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000666; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000667; SSE-NEXT: retq
668;
669; AVX-LABEL: combine_nested_undef_test3:
670; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000671; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000672; AVX-NEXT: retq
673 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
674 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
675 ret <4 x i32> %2
676}
677
678define <4 x i32> @combine_nested_undef_test4(<4 x i32> %A, <4 x i32> %B) {
679; SSE-LABEL: combine_nested_undef_test4:
680; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000681; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000682; SSE-NEXT: retq
683;
Chandler Carruth99627bf2014-10-04 03:52:55 +0000684; AVX1-LABEL: combine_nested_undef_test4:
685; AVX1: # BB#0:
686; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
687; AVX1-NEXT: retq
688;
689; AVX2-LABEL: combine_nested_undef_test4:
690; AVX2: # BB#0:
691; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
692; AVX2-NEXT: retq
Chandler Carruthb2941e22014-10-02 07:42:58 +0000693 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 7, i32 1>
694 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 3>
695 ret <4 x i32> %2
696}
697
698define <4 x i32> @combine_nested_undef_test5(<4 x i32> %A, <4 x i32> %B) {
699; SSE-LABEL: combine_nested_undef_test5:
700; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000701; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000702; SSE-NEXT: retq
703;
704; AVX-LABEL: combine_nested_undef_test5:
705; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000706; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000707; AVX-NEXT: retq
708 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
709 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 4, i32 3>
710 ret <4 x i32> %2
711}
712
713define <4 x i32> @combine_nested_undef_test6(<4 x i32> %A, <4 x i32> %B) {
714; SSE-LABEL: combine_nested_undef_test6:
715; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000716; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000717; SSE-NEXT: retq
718;
719; AVX-LABEL: combine_nested_undef_test6:
720; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000721; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000722; AVX-NEXT: retq
723 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
724 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 4>
725 ret <4 x i32> %2
726}
727
728define <4 x i32> @combine_nested_undef_test7(<4 x i32> %A, <4 x i32> %B) {
729; SSE-LABEL: combine_nested_undef_test7:
730; SSE: # BB#0:
731; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
732; SSE-NEXT: retq
733;
734; AVX-LABEL: combine_nested_undef_test7:
735; AVX: # BB#0:
736; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
737; AVX-NEXT: retq
738 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
739 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
740 ret <4 x i32> %2
741}
742
743define <4 x i32> @combine_nested_undef_test8(<4 x i32> %A, <4 x i32> %B) {
744; SSE-LABEL: combine_nested_undef_test8:
745; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000746; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000747; SSE-NEXT: retq
748;
749; AVX-LABEL: combine_nested_undef_test8:
750; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000751; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000752; AVX-NEXT: retq
753 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
754 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
755 ret <4 x i32> %2
756}
757
758define <4 x i32> @combine_nested_undef_test9(<4 x i32> %A, <4 x i32> %B) {
759; SSE-LABEL: combine_nested_undef_test9:
760; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000761; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,2]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000762; SSE-NEXT: retq
763;
764; AVX-LABEL: combine_nested_undef_test9:
765; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000766; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,2]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000767; AVX-NEXT: retq
768 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 2, i32 5>
769 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
770 ret <4 x i32> %2
771}
772
773define <4 x i32> @combine_nested_undef_test10(<4 x i32> %A, <4 x i32> %B) {
774; SSE-LABEL: combine_nested_undef_test10:
775; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000776; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000777; SSE-NEXT: retq
778;
779; AVX-LABEL: combine_nested_undef_test10:
780; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000781; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000782; AVX-NEXT: retq
783 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
784 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
785 ret <4 x i32> %2
786}
787
788define <4 x i32> @combine_nested_undef_test11(<4 x i32> %A, <4 x i32> %B) {
789; SSE-LABEL: combine_nested_undef_test11:
790; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000791; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000792; SSE-NEXT: retq
793;
794; AVX-LABEL: combine_nested_undef_test11:
795; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000796; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000797; AVX-NEXT: retq
798 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 2, i32 5, i32 4>
799 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 0>
800 ret <4 x i32> %2
801}
802
803define <4 x i32> @combine_nested_undef_test12(<4 x i32> %A, <4 x i32> %B) {
804; SSE-LABEL: combine_nested_undef_test12:
805; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000806; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000807; SSE-NEXT: retq
808;
809; AVX1-LABEL: combine_nested_undef_test12:
810; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000811; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000812; AVX1-NEXT: retq
813;
814; AVX2-LABEL: combine_nested_undef_test12:
815; AVX2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000816; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
Chandler Carruthb2941e22014-10-02 07:42:58 +0000817; AVX2-NEXT: retq
818 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 0, i32 2, i32 4>
819 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 0, i32 4>
820 ret <4 x i32> %2
821}
822
823; The following pair of shuffles is folded into vector %A.
824define <4 x i32> @combine_nested_undef_test13(<4 x i32> %A, <4 x i32> %B) {
825; ALL-LABEL: combine_nested_undef_test13:
826; ALL: # BB#0:
827; ALL-NEXT: retq
828 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 4, i32 2, i32 6>
829 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 0, i32 2, i32 4>
830 ret <4 x i32> %2
831}
832
833; The following pair of shuffles is folded into vector %B.
834define <4 x i32> @combine_nested_undef_test14(<4 x i32> %A, <4 x i32> %B) {
835; SSE-LABEL: combine_nested_undef_test14:
836; SSE: # BB#0:
837; SSE-NEXT: movaps %xmm1, %xmm0
838; SSE-NEXT: retq
839;
840; AVX-LABEL: combine_nested_undef_test14:
841; AVX: # BB#0:
842; AVX-NEXT: vmovaps %xmm1, %xmm0
843; AVX-NEXT: retq
844 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
845 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 1, i32 4>
846 ret <4 x i32> %2
847}
848
849
850; Verify that we don't optimize the following cases. We expect more than one shuffle.
851;
852; FIXME: Many of these already don't make sense, and the rest should stop
853; making sense with th enew vector shuffle lowering. Revisit at least testing for
854; it.
855
856define <4 x i32> @combine_nested_undef_test15(<4 x i32> %A, <4 x i32> %B) {
857; SSE-LABEL: combine_nested_undef_test15:
858; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000859; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000860; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,1]
861; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,1,0,3]
862; SSE-NEXT: retq
863;
864; AVX-LABEL: combine_nested_undef_test15:
865; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000866; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000867; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[3,1]
868; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
869; AVX-NEXT: retq
870 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
871 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
872 ret <4 x i32> %2
873}
874
875define <4 x i32> @combine_nested_undef_test16(<4 x i32> %A, <4 x i32> %B) {
876; SSE2-LABEL: combine_nested_undef_test16:
877; SSE2: # BB#0:
878; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000879; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
880; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000881; SSE2-NEXT: retq
882;
883; SSSE3-LABEL: combine_nested_undef_test16:
884; SSSE3: # BB#0:
885; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +0000886; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
887; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000888; SSSE3-NEXT: retq
889;
890; SSE41-LABEL: combine_nested_undef_test16:
891; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000892; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000893; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
894; SSE41-NEXT: retq
895;
896; AVX1-LABEL: combine_nested_undef_test16:
897; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000898; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000899; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
900; AVX1-NEXT: retq
901;
902; AVX2-LABEL: combine_nested_undef_test16:
903; AVX2: # BB#0:
904; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
905; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
906; AVX2-NEXT: retq
907 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
908 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
909 ret <4 x i32> %2
910}
911
912define <4 x i32> @combine_nested_undef_test17(<4 x i32> %A, <4 x i32> %B) {
913; SSE-LABEL: combine_nested_undef_test17:
914; SSE: # BB#0:
915; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
916; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[3,1]
917; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,1,0,3]
918; SSE-NEXT: retq
919;
920; AVX-LABEL: combine_nested_undef_test17:
921; AVX: # BB#0:
922; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
923; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[3,1]
924; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
925; AVX-NEXT: retq
926 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
927 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
928 ret <4 x i32> %2
929}
930
931define <4 x i32> @combine_nested_undef_test18(<4 x i32> %A, <4 x i32> %B) {
932; SSE-LABEL: combine_nested_undef_test18:
933; SSE: # BB#0:
934; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,3]
935; SSE-NEXT: retq
936;
937; AVX-LABEL: combine_nested_undef_test18:
938; AVX: # BB#0:
939; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,0,3]
940; AVX-NEXT: retq
941 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
942 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
943 ret <4 x i32> %2
944}
945
946define <4 x i32> @combine_nested_undef_test19(<4 x i32> %A, <4 x i32> %B) {
947; SSE-LABEL: combine_nested_undef_test19:
948; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000949; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000950; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
951; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,0,0,0]
952; SSE-NEXT: retq
953;
954; AVX-LABEL: combine_nested_undef_test19:
955; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +0000956; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
Chandler Carruthb2941e22014-10-02 07:42:58 +0000957; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
958; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,0,0,0]
959; AVX-NEXT: retq
960 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
961 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
962 ret <4 x i32> %2
963}
964
965define <4 x i32> @combine_nested_undef_test20(<4 x i32> %A, <4 x i32> %B) {
966; SSE-LABEL: combine_nested_undef_test20:
967; SSE: # BB#0:
968; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2],xmm1[0,0]
969; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
970; SSE-NEXT: retq
971;
972; AVX-LABEL: combine_nested_undef_test20:
973; AVX: # BB#0:
974; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2],xmm1[0,0]
975; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
976; AVX-NEXT: retq
977 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 3, i32 2, i32 4, i32 4>
978 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
979 ret <4 x i32> %2
980}
981
982define <4 x i32> @combine_nested_undef_test21(<4 x i32> %A, <4 x i32> %B) {
983; SSE-LABEL: combine_nested_undef_test21:
984; SSE: # BB#0:
985; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
986; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[3,1]
987; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
988; SSE-NEXT: retq
989;
990; AVX-LABEL: combine_nested_undef_test21:
991; AVX: # BB#0:
992; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
993; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[3,1]
994; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
995; AVX-NEXT: retq
996 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
997 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
998 ret <4 x i32> %2
999}
1000
1001
1002; Test that we correctly combine shuffles according to rule
1003; shuffle(shuffle(x, y), undef) -> shuffle(y, undef)
1004
1005define <4 x i32> @combine_nested_undef_test22(<4 x i32> %A, <4 x i32> %B) {
1006; SSE-LABEL: combine_nested_undef_test22:
1007; SSE: # BB#0:
1008; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,3]
1009; SSE-NEXT: retq
1010;
1011; AVX-LABEL: combine_nested_undef_test22:
1012; AVX: # BB#0:
1013; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[1,1,1,3]
1014; AVX-NEXT: retq
1015 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
1016 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 3>
1017 ret <4 x i32> %2
1018}
1019
1020define <4 x i32> @combine_nested_undef_test23(<4 x i32> %A, <4 x i32> %B) {
1021; SSE-LABEL: combine_nested_undef_test23:
1022; SSE: # BB#0:
1023; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
1024; SSE-NEXT: retq
1025;
1026; AVX-LABEL: combine_nested_undef_test23:
1027; AVX: # BB#0:
1028; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
1029; AVX-NEXT: retq
1030 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
1031 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
1032 ret <4 x i32> %2
1033}
1034
1035define <4 x i32> @combine_nested_undef_test24(<4 x i32> %A, <4 x i32> %B) {
1036; SSE-LABEL: combine_nested_undef_test24:
1037; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001038; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,3,2,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +00001039; SSE-NEXT: retq
1040;
1041; AVX-LABEL: combine_nested_undef_test24:
1042; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001043; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,3,2,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +00001044; AVX-NEXT: retq
1045 %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
1046 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 4>
1047 ret <4 x i32> %2
1048}
1049
1050define <4 x i32> @combine_nested_undef_test25(<4 x i32> %A, <4 x i32> %B) {
1051; SSE-LABEL: combine_nested_undef_test25:
1052; SSE: # BB#0:
1053; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1054; SSE-NEXT: retq
1055;
Chandler Carruth99627bf2014-10-04 03:52:55 +00001056; AVX1-LABEL: combine_nested_undef_test25:
1057; AVX1: # BB#0:
1058; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1059; AVX1-NEXT: retq
1060;
1061; AVX2-LABEL: combine_nested_undef_test25:
1062; AVX2: # BB#0:
1063; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1064; AVX2-NEXT: retq
Chandler Carruthb2941e22014-10-02 07:42:58 +00001065 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 5, i32 2, i32 4>
1066 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 3, i32 1>
1067 ret <4 x i32> %2
1068}
1069
1070define <4 x i32> @combine_nested_undef_test26(<4 x i32> %A, <4 x i32> %B) {
1071; SSE-LABEL: combine_nested_undef_test26:
1072; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001073; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +00001074; SSE-NEXT: retq
1075;
1076; AVX-LABEL: combine_nested_undef_test26:
1077; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001078; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
Chandler Carruthb2941e22014-10-02 07:42:58 +00001079; AVX-NEXT: retq
1080 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 6, i32 7>
1081 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
1082 ret <4 x i32> %2
1083}
1084
1085define <4 x i32> @combine_nested_undef_test27(<4 x i32> %A, <4 x i32> %B) {
1086; SSE-LABEL: combine_nested_undef_test27:
1087; SSE: # BB#0:
1088; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1089; SSE-NEXT: retq
1090;
Chandler Carruth99627bf2014-10-04 03:52:55 +00001091; AVX1-LABEL: combine_nested_undef_test27:
1092; AVX1: # BB#0:
1093; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
1094; AVX1-NEXT: retq
1095;
1096; AVX2-LABEL: combine_nested_undef_test27:
1097; AVX2: # BB#0:
1098; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1099; AVX2-NEXT: retq
Chandler Carruthb2941e22014-10-02 07:42:58 +00001100 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 2, i32 1, i32 5, i32 4>
1101 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
1102 ret <4 x i32> %2
1103}
1104
1105define <4 x i32> @combine_nested_undef_test28(<4 x i32> %A, <4 x i32> %B) {
1106; SSE-LABEL: combine_nested_undef_test28:
1107; SSE: # BB#0:
1108; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
1109; SSE-NEXT: retq
1110;
1111; AVX-LABEL: combine_nested_undef_test28:
1112; AVX: # BB#0:
1113; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
1114; AVX-NEXT: retq
1115 %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
1116 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 2>
1117 ret <4 x i32> %2
1118}
Chandler Carruth782b0a72014-10-02 07:56:47 +00001119
1120define <4 x float> @combine_test1(<4 x float> %a, <4 x float> %b) {
1121; SSE2-LABEL: combine_test1:
1122; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001123; SSE2-NEXT: movaps %xmm1, %xmm2
1124; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1125; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1126; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,3]
1127; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001128; SSE2-NEXT: retq
1129;
1130; SSSE3-LABEL: combine_test1:
1131; SSSE3: # BB#0:
1132; SSSE3-NEXT: movaps %xmm1, %xmm0
1133; SSSE3-NEXT: retq
1134;
1135; SSE41-LABEL: combine_test1:
1136; SSE41: # BB#0:
1137; SSE41-NEXT: movaps %xmm1, %xmm0
1138; SSE41-NEXT: retq
1139;
1140; AVX-LABEL: combine_test1:
1141; AVX: # BB#0:
1142; AVX-NEXT: vmovaps %xmm1, %xmm0
1143; AVX-NEXT: retq
1144 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1145 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1146 ret <4 x float> %2
1147}
1148
1149define <4 x float> @combine_test2(<4 x float> %a, <4 x float> %b) {
1150; SSE2-LABEL: combine_test2:
1151; SSE2: # BB#0:
1152; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001153; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001154; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
1155; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
1156; SSE2-NEXT: retq
1157;
1158; SSSE3-LABEL: combine_test2:
1159; SSSE3: # BB#0:
1160; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001161; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001162; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
1163; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
1164; SSSE3-NEXT: retq
1165;
1166; SSE41-LABEL: combine_test2:
1167; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001168; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001169; SSE41-NEXT: movaps %xmm1, %xmm0
1170; SSE41-NEXT: retq
1171;
1172; AVX-LABEL: combine_test2:
1173; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001174; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001175; AVX-NEXT: retq
1176 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1177 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1178 ret <4 x float> %2
1179}
1180
1181define <4 x float> @combine_test3(<4 x float> %a, <4 x float> %b) {
1182; SSE-LABEL: combine_test3:
1183; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001184; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001185; SSE-NEXT: retq
1186;
1187; AVX-LABEL: combine_test3:
1188; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001189; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001190; AVX-NEXT: retq
1191 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
1192 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
1193 ret <4 x float> %2
1194}
1195
1196define <4 x float> @combine_test4(<4 x float> %a, <4 x float> %b) {
1197; SSE-LABEL: combine_test4:
1198; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001199; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
1200; SSE-NEXT: movapd %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001201; SSE-NEXT: retq
1202;
1203; AVX-LABEL: combine_test4:
1204; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001205; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001206; AVX-NEXT: retq
1207 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
1208 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1209 ret <4 x float> %2
1210}
1211
1212define <4 x float> @combine_test5(<4 x float> %a, <4 x float> %b) {
1213; SSE2-LABEL: combine_test5:
1214; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001215; SSE2-NEXT: movaps %xmm1, %xmm2
1216; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1217; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1218; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
1219; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1220; SSE2-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001221; SSE2-NEXT: retq
1222;
1223; SSSE3-LABEL: combine_test5:
1224; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001225; SSSE3-NEXT: movaps %xmm1, %xmm2
1226; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1227; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1228; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
1229; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1230; SSSE3-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001231; SSSE3-NEXT: retq
1232;
1233; SSE41-LABEL: combine_test5:
1234; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001235; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1],xmm1[2,3]
1236; SSE41-NEXT: movaps %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001237; SSE41-NEXT: retq
1238;
1239; AVX-LABEL: combine_test5:
1240; AVX: # BB#0:
1241; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1242; AVX-NEXT: retq
1243 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1244 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
1245 ret <4 x float> %2
1246}
1247
1248define <4 x i32> @combine_test6(<4 x i32> %a, <4 x i32> %b) {
1249; SSE2-LABEL: combine_test6:
1250; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001251; SSE2-NEXT: movaps %xmm1, %xmm2
1252; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1253; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1254; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,3]
1255; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001256; SSE2-NEXT: retq
1257;
1258; SSSE3-LABEL: combine_test6:
1259; SSSE3: # BB#0:
1260; SSSE3-NEXT: movaps %xmm1, %xmm0
1261; SSSE3-NEXT: retq
1262;
1263; SSE41-LABEL: combine_test6:
1264; SSE41: # BB#0:
1265; SSE41-NEXT: movaps %xmm1, %xmm0
1266; SSE41-NEXT: retq
1267;
1268; AVX-LABEL: combine_test6:
1269; AVX: # BB#0:
1270; AVX-NEXT: vmovaps %xmm1, %xmm0
1271; AVX-NEXT: retq
1272 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1273 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1274 ret <4 x i32> %2
1275}
1276
1277define <4 x i32> @combine_test7(<4 x i32> %a, <4 x i32> %b) {
1278; SSE2-LABEL: combine_test7:
1279; SSE2: # BB#0:
1280; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001281; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001282; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
1283; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
1284; SSE2-NEXT: retq
1285;
1286; SSSE3-LABEL: combine_test7:
1287; SSSE3: # BB#0:
1288; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001289; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001290; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
1291; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
1292; SSSE3-NEXT: retq
1293;
1294; SSE41-LABEL: combine_test7:
1295; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001296; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1297; SSE41-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001298; SSE41-NEXT: retq
1299;
Chandler Carruth99627bf2014-10-04 03:52:55 +00001300; AVX1-LABEL: combine_test7:
1301; AVX1: # BB#0:
1302; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1303; AVX1-NEXT: retq
1304;
1305; AVX2-LABEL: combine_test7:
1306; AVX2: # BB#0:
1307; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1308; AVX2-NEXT: retq
Chandler Carruth782b0a72014-10-02 07:56:47 +00001309 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1310 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1311 ret <4 x i32> %2
1312}
1313
1314define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) {
1315; SSE-LABEL: combine_test8:
1316; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001317; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001318; SSE-NEXT: retq
1319;
1320; AVX-LABEL: combine_test8:
1321; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001322; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001323; AVX-NEXT: retq
1324 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
1325 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
1326 ret <4 x i32> %2
1327}
1328
1329define <4 x i32> @combine_test9(<4 x i32> %a, <4 x i32> %b) {
1330; SSE-LABEL: combine_test9:
1331; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001332; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
1333; SSE-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001334; SSE-NEXT: retq
1335;
1336; AVX-LABEL: combine_test9:
1337; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001338; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001339; AVX-NEXT: retq
1340 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
1341 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1342 ret <4 x i32> %2
1343}
1344
1345define <4 x i32> @combine_test10(<4 x i32> %a, <4 x i32> %b) {
1346; SSE2-LABEL: combine_test10:
1347; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001348; SSE2-NEXT: movaps %xmm1, %xmm2
1349; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1350; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1351; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
1352; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1353; SSE2-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001354; SSE2-NEXT: retq
1355;
1356; SSSE3-LABEL: combine_test10:
1357; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001358; SSSE3-NEXT: movaps %xmm1, %xmm2
1359; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1360; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1361; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
1362; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
1363; SSSE3-NEXT: movaps %xmm2, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001364; SSSE3-NEXT: retq
1365;
1366; SSE41-LABEL: combine_test10:
1367; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001368; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1369; SSE41-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001370; SSE41-NEXT: retq
1371;
1372; AVX1-LABEL: combine_test10:
1373; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001374; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001375; AVX1-NEXT: retq
1376;
1377; AVX2-LABEL: combine_test10:
1378; AVX2: # BB#0:
1379; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1380; AVX2-NEXT: retq
1381 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1382 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
1383 ret <4 x i32> %2
1384}
1385
1386define <4 x float> @combine_test11(<4 x float> %a, <4 x float> %b) {
1387; ALL-LABEL: combine_test11:
1388; ALL: # BB#0:
1389; ALL-NEXT: retq
1390 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1391 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1392 ret <4 x float> %2
1393}
1394
1395define <4 x float> @combine_test12(<4 x float> %a, <4 x float> %b) {
1396; SSE2-LABEL: combine_test12:
1397; SSE2: # BB#0:
1398; SSE2-NEXT: movss %xmm0, %xmm1
1399; SSE2-NEXT: movss %xmm0, %xmm1
1400; SSE2-NEXT: movaps %xmm1, %xmm0
1401; SSE2-NEXT: retq
1402;
1403; SSSE3-LABEL: combine_test12:
1404; SSSE3: # BB#0:
1405; SSSE3-NEXT: movss %xmm0, %xmm1
1406; SSSE3-NEXT: movss %xmm0, %xmm1
1407; SSSE3-NEXT: movaps %xmm1, %xmm0
1408; SSSE3-NEXT: retq
1409;
1410; SSE41-LABEL: combine_test12:
1411; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001412; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001413; SSE41-NEXT: movaps %xmm1, %xmm0
1414; SSE41-NEXT: retq
1415;
1416; AVX-LABEL: combine_test12:
1417; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001418; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001419; AVX-NEXT: retq
1420 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1421 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1422 ret <4 x float> %2
1423}
1424
1425define <4 x float> @combine_test13(<4 x float> %a, <4 x float> %b) {
1426; SSE-LABEL: combine_test13:
1427; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001428; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001429; SSE-NEXT: retq
1430;
1431; AVX-LABEL: combine_test13:
1432; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001433; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001434; AVX-NEXT: retq
1435 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1436 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1437 ret <4 x float> %2
1438}
1439
1440define <4 x float> @combine_test14(<4 x float> %a, <4 x float> %b) {
1441; SSE-LABEL: combine_test14:
1442; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001443; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001444; SSE-NEXT: retq
1445;
1446; AVX-LABEL: combine_test14:
1447; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001448; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001449; AVX-NEXT: retq
1450 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
1451 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1452 ret <4 x float> %2
1453}
1454
1455define <4 x float> @combine_test15(<4 x float> %a, <4 x float> %b) {
1456; SSE2-LABEL: combine_test15:
1457; SSE2: # BB#0:
1458; SSE2-NEXT: movaps %xmm0, %xmm2
1459; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
1460; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
1461; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
1462; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
1463; SSE2-NEXT: retq
1464;
1465; SSSE3-LABEL: combine_test15:
1466; SSSE3: # BB#0:
1467; SSSE3-NEXT: movaps %xmm0, %xmm2
1468; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
1469; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
1470; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
1471; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
1472; SSSE3-NEXT: retq
1473;
1474; SSE41-LABEL: combine_test15:
1475; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001476; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1],xmm1[2,3]
1477; SSE41-NEXT: movaps %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001478; SSE41-NEXT: retq
1479;
1480; AVX-LABEL: combine_test15:
1481; AVX: # BB#0:
1482; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1483; AVX-NEXT: retq
1484 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
1485 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
1486 ret <4 x float> %2
1487}
1488
1489define <4 x i32> @combine_test16(<4 x i32> %a, <4 x i32> %b) {
1490; ALL-LABEL: combine_test16:
1491; ALL: # BB#0:
1492; ALL-NEXT: retq
1493 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1494 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1495 ret <4 x i32> %2
1496}
1497
1498define <4 x i32> @combine_test17(<4 x i32> %a, <4 x i32> %b) {
1499; SSE2-LABEL: combine_test17:
1500; SSE2: # BB#0:
1501; SSE2-NEXT: movss %xmm0, %xmm1
1502; SSE2-NEXT: movss %xmm0, %xmm1
1503; SSE2-NEXT: movaps %xmm1, %xmm0
1504; SSE2-NEXT: retq
1505;
1506; SSSE3-LABEL: combine_test17:
1507; SSSE3: # BB#0:
1508; SSSE3-NEXT: movss %xmm0, %xmm1
1509; SSSE3-NEXT: movss %xmm0, %xmm1
1510; SSSE3-NEXT: movaps %xmm1, %xmm0
1511; SSSE3-NEXT: retq
1512;
1513; SSE41-LABEL: combine_test17:
1514; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001515; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1516; SSE41-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001517; SSE41-NEXT: retq
1518;
Chandler Carruth99627bf2014-10-04 03:52:55 +00001519; AVX1-LABEL: combine_test17:
1520; AVX1: # BB#0:
1521; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1522; AVX1-NEXT: retq
1523;
1524; AVX2-LABEL: combine_test17:
1525; AVX2: # BB#0:
1526; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1527; AVX2-NEXT: retq
Chandler Carruth782b0a72014-10-02 07:56:47 +00001528 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1529 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
1530 ret <4 x i32> %2
1531}
1532
1533define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) {
1534; SSE-LABEL: combine_test18:
1535; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001536; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001537; SSE-NEXT: retq
1538;
1539; AVX-LABEL: combine_test18:
1540; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001541; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001542; AVX-NEXT: retq
1543 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1544 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1545 ret <4 x i32> %2
1546}
1547
1548define <4 x i32> @combine_test19(<4 x i32> %a, <4 x i32> %b) {
1549; SSE-LABEL: combine_test19:
1550; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001551; SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001552; SSE-NEXT: retq
1553;
1554; AVX-LABEL: combine_test19:
1555; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001556; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001557; AVX-NEXT: retq
1558 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
1559 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1560 ret <4 x i32> %2
1561}
1562
1563define <4 x i32> @combine_test20(<4 x i32> %a, <4 x i32> %b) {
1564; SSE2-LABEL: combine_test20:
1565; SSE2: # BB#0:
1566; SSE2-NEXT: movaps %xmm0, %xmm2
1567; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
1568; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
1569; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
1570; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
1571; SSE2-NEXT: retq
1572;
1573; SSSE3-LABEL: combine_test20:
1574; SSSE3: # BB#0:
1575; SSSE3-NEXT: movaps %xmm0, %xmm2
1576; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
1577; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
1578; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
1579; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
1580; SSSE3-NEXT: retq
1581;
1582; SSE41-LABEL: combine_test20:
1583; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001584; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
1585; SSE41-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001586; SSE41-NEXT: retq
1587;
1588; AVX1-LABEL: combine_test20:
1589; AVX1: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001590; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001591; AVX1-NEXT: retq
1592;
1593; AVX2-LABEL: combine_test20:
1594; AVX2: # BB#0:
1595; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1596; AVX2-NEXT: retq
1597 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
1598 %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
1599 ret <4 x i32> %2
1600}
1601
1602
1603; Check some negative cases.
1604; FIXME: Do any of these really make sense? Are they redundant with the above tests?
1605
1606define <4 x float> @combine_test1b(<4 x float> %a, <4 x float> %b) {
1607; SSE2-LABEL: combine_test1b:
1608; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001609; SSE2-NEXT: movaps %xmm1, %xmm2
1610; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1611; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1612; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[0,0]
1613; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001614; SSE2-NEXT: movaps %xmm1, %xmm0
1615; SSE2-NEXT: retq
1616;
1617; SSSE3-LABEL: combine_test1b:
1618; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001619; SSSE3-NEXT: movaps %xmm1, %xmm2
1620; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1621; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1622; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[0,0]
1623; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001624; SSSE3-NEXT: movaps %xmm1, %xmm0
1625; SSSE3-NEXT: retq
1626;
1627; SSE41-LABEL: combine_test1b:
1628; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001629; SSE41-NEXT: movaps %xmm1, %xmm2
1630; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
1631; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm2[0,0]
1632; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[2,0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001633; SSE41-NEXT: movaps %xmm1, %xmm0
1634; SSE41-NEXT: retq
1635;
1636; AVX-LABEL: combine_test1b:
1637; AVX: # BB#0:
1638; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
1639; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1640; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[2,0]
1641; AVX-NEXT: retq
1642 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1643 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 0>
1644 ret <4 x float> %2
1645}
1646
1647define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) {
1648; SSE2-LABEL: combine_test2b:
1649; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001650; SSE2-NEXT: movaps %xmm1, %xmm2
1651; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1652; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1653; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
1654; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001655; SSE2-NEXT: retq
1656;
1657; SSSE3-LABEL: combine_test2b:
1658; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001659; SSSE3-NEXT: movaps %xmm1, %xmm2
1660; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1661; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1662; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
1663; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001664; SSSE3-NEXT: retq
1665;
1666; SSE41-LABEL: combine_test2b:
1667; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001668; SSE41-NEXT: movaps %xmm1, %xmm2
1669; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
1670; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[1,1]
1671; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001672; SSE41-NEXT: retq
1673;
1674; AVX-LABEL: combine_test2b:
1675; AVX: # BB#0:
1676; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
1677; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,1]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001678; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001679; AVX-NEXT: retq
1680 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1681 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5>
1682 ret <4 x float> %2
1683}
1684
1685define <4 x float> @combine_test3b(<4 x float> %a, <4 x float> %b) {
1686; SSE-LABEL: combine_test3b:
1687; SSE: # BB#0:
1688; SSE-NEXT: movaps %xmm1, %xmm2
1689; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[3,0]
1690; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm2[0,2]
1691; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[3,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001692; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001693; SSE-NEXT: retq
1694;
1695; AVX-LABEL: combine_test3b:
1696; AVX: # BB#0:
1697; AVX-NEXT: vshufps {{.*#+}} xmm2 = xmm1[2,0],xmm0[3,0]
1698; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm2[0,2]
1699; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[3,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001700; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001701; AVX-NEXT: retq
1702 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 3>
1703 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 7>
1704 ret <4 x float> %2
1705}
1706
1707define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) {
1708; SSE2-LABEL: combine_test4b:
1709; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001710; SSE2-NEXT: movaps %xmm1, %xmm2
1711; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1712; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1713; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[3,0]
1714; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[0,2]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001715; SSE2-NEXT: movaps %xmm1, %xmm0
1716; SSE2-NEXT: retq
1717;
1718; SSSE3-LABEL: combine_test4b:
1719; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001720; SSSE3-NEXT: movaps %xmm1, %xmm2
1721; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
1722; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
1723; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[3,0]
1724; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[0,2]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001725; SSSE3-NEXT: movaps %xmm1, %xmm0
1726; SSSE3-NEXT: retq
1727;
1728; SSE41-LABEL: combine_test4b:
1729; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001730; SSE41-NEXT: movaps %xmm1, %xmm2
1731; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
1732; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[3,0]
1733; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[0,2]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001734; SSE41-NEXT: movaps %xmm1, %xmm0
1735; SSE41-NEXT: retq
1736;
1737; AVX-LABEL: combine_test4b:
1738; AVX: # BB#0:
1739; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
1740; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
1741; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[1,1],xmm0[0,2]
1742; AVX-NEXT: retq
1743 %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1744 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 5, i32 5, i32 2, i32 7>
1745 ret <4 x float> %2
1746}
1747
1748
1749; Verify that we correctly fold shuffles even when we use illegal vector types.
1750
1751define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) {
1752; SSE2-LABEL: combine_test1c:
1753; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001754; SSE2-NEXT: movd (%rdi), %xmm0
1755; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1756; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1757; SSE2-NEXT: movd (%rsi), %xmm1
1758; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1759; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001760; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
Chandler Carruth99627bf2014-10-04 03:52:55 +00001761; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001762; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
1763; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
1764; SSE2-NEXT: retq
1765;
1766; SSSE3-LABEL: combine_test1c:
1767; SSSE3: # BB#0:
1768; SSSE3-NEXT: movd (%rdi), %xmm0
Chandler Carruth99627bf2014-10-04 03:52:55 +00001769; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1770; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1771; SSSE3-NEXT: movd (%rsi), %xmm1
1772; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1773; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1774; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
1775; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1776; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
1777; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001778; SSSE3-NEXT: retq
1779;
1780; SSE41-LABEL: combine_test1c:
1781; SSE41: # BB#0:
1782; SSE41-NEXT: pmovzxbd (%rdi), %xmm1
1783; SSE41-NEXT: pmovzxbd (%rsi), %xmm0
Chandler Carruth99627bf2014-10-04 03:52:55 +00001784; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001785; SSE41-NEXT: retq
1786;
Chandler Carruth99627bf2014-10-04 03:52:55 +00001787; AVX1-LABEL: combine_test1c:
1788; AVX1: # BB#0:
1789; AVX1-NEXT: vpmovzxbd (%rdi), %xmm0
1790; AVX1-NEXT: vpmovzxbd (%rsi), %xmm1
1791; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
1792; AVX1-NEXT: retq
1793;
1794; AVX2-LABEL: combine_test1c:
1795; AVX2: # BB#0:
1796; AVX2-NEXT: vpmovzxbd (%rdi), %xmm0
1797; AVX2-NEXT: vpmovzxbd (%rsi), %xmm1
1798; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
1799; AVX2-NEXT: retq
Chandler Carruth782b0a72014-10-02 07:56:47 +00001800 %A = load <4 x i8>* %a
1801 %B = load <4 x i8>* %b
1802 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1803 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1804 ret <4 x i8> %2
1805}
1806
1807define <4 x i8> @combine_test2c(<4 x i8>* %a, <4 x i8>* %b) {
1808; SSE2-LABEL: combine_test2c:
1809; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001810; SSE2-NEXT: movd (%rdi), %xmm0
1811; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1812; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1813; SSE2-NEXT: movd (%rsi), %xmm1
1814; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1815; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1816; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001817; SSE2-NEXT: retq
1818;
1819; SSSE3-LABEL: combine_test2c:
1820; SSSE3: # BB#0:
1821; SSSE3-NEXT: movd (%rdi), %xmm0
Chandler Carruth99627bf2014-10-04 03:52:55 +00001822; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1823; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1824; SSSE3-NEXT: movd (%rsi), %xmm1
1825; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1826; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1827; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001828; SSSE3-NEXT: retq
1829;
1830; SSE41-LABEL: combine_test2c:
1831; SSE41: # BB#0:
1832; SSE41-NEXT: pmovzxbd (%rdi), %xmm0
1833; SSE41-NEXT: pmovzxbd (%rsi), %xmm1
Chandler Carruth99627bf2014-10-04 03:52:55 +00001834; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001835; SSE41-NEXT: retq
1836;
1837; AVX-LABEL: combine_test2c:
1838; AVX: # BB#0:
1839; AVX-NEXT: vpmovzxbd (%rdi), %xmm0
1840; AVX-NEXT: vpmovzxbd (%rsi), %xmm1
Chandler Carruth99627bf2014-10-04 03:52:55 +00001841; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001842; AVX-NEXT: retq
1843 %A = load <4 x i8>* %a
1844 %B = load <4 x i8>* %b
1845 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 1, i32 5>
1846 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
1847 ret <4 x i8> %2
1848}
1849
1850define <4 x i8> @combine_test3c(<4 x i8>* %a, <4 x i8>* %b) {
1851; SSE2-LABEL: combine_test3c:
1852; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001853; SSE2-NEXT: movd (%rdi), %xmm1
1854; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1855; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1856; SSE2-NEXT: movd (%rsi), %xmm0
1857; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1858; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1859; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001860; SSE2-NEXT: retq
1861;
1862; SSSE3-LABEL: combine_test3c:
1863; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001864; SSSE3-NEXT: movd (%rdi), %xmm1
1865; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1866; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1867; SSSE3-NEXT: movd (%rsi), %xmm0
1868; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1869; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1870; SSSE3-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001871; SSSE3-NEXT: retq
1872;
1873; SSE41-LABEL: combine_test3c:
1874; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001875; SSE41-NEXT: pmovzxbd (%rdi), %xmm1
1876; SSE41-NEXT: pmovzxbd (%rsi), %xmm0
1877; SSE41-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001878; SSE41-NEXT: retq
1879;
1880; AVX-LABEL: combine_test3c:
1881; AVX: # BB#0:
1882; AVX-NEXT: vpmovzxbd (%rdi), %xmm0
1883; AVX-NEXT: vpmovzxbd (%rsi), %xmm1
Chandler Carruth99627bf2014-10-04 03:52:55 +00001884; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001885; AVX-NEXT: retq
1886 %A = load <4 x i8>* %a
1887 %B = load <4 x i8>* %b
1888 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
1889 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
1890 ret <4 x i8> %2
1891}
1892
1893define <4 x i8> @combine_test4c(<4 x i8>* %a, <4 x i8>* %b) {
1894; SSE2-LABEL: combine_test4c:
1895; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001896; SSE2-NEXT: movd (%rdi), %xmm1
1897; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1898; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1899; SSE2-NEXT: movd (%rsi), %xmm2
1900; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
1901; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
1902; SSE2-NEXT: movdqa %xmm2, %xmm0
1903; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
1904; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
1905; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[2,0]
1906; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001907; SSE2-NEXT: retq
1908;
1909; SSSE3-LABEL: combine_test4c:
1910; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001911; SSSE3-NEXT: movd (%rdi), %xmm1
1912; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1913; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001914; SSSE3-NEXT: movd (%rsi), %xmm2
Chandler Carruth99627bf2014-10-04 03:52:55 +00001915; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
1916; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
1917; SSSE3-NEXT: movdqa %xmm2, %xmm0
1918; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
1919; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001920; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[2,0]
1921; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
1922; SSSE3-NEXT: retq
1923;
1924; SSE41-LABEL: combine_test4c:
1925; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001926; SSE41-NEXT: pmovzxbd (%rdi), %xmm1
1927; SSE41-NEXT: pmovzxbd (%rsi), %xmm0
1928; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001929; SSE41-NEXT: retq
1930;
1931; AVX1-LABEL: combine_test4c:
1932; AVX1: # BB#0:
1933; AVX1-NEXT: vpmovzxbd (%rdi), %xmm0
1934; AVX1-NEXT: vpmovzxbd (%rsi), %xmm1
Chandler Carruth99627bf2014-10-04 03:52:55 +00001935; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
Chandler Carruth782b0a72014-10-02 07:56:47 +00001936; AVX1-NEXT: retq
1937;
1938; AVX2-LABEL: combine_test4c:
1939; AVX2: # BB#0:
1940; AVX2-NEXT: vpmovzxbd (%rdi), %xmm0
1941; AVX2-NEXT: vpmovzxbd (%rsi), %xmm1
1942; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
1943; AVX2-NEXT: retq
1944 %A = load <4 x i8>* %a
1945 %B = load <4 x i8>* %b
1946 %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
1947 %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
1948 ret <4 x i8> %2
1949}
1950
1951
1952; The following test cases are generated from this C++ code
1953;
1954;__m128 blend_01(__m128 a, __m128 b)
1955;{
1956; __m128 s = a;
1957; s = _mm_blend_ps( s, b, 1<<0 );
1958; s = _mm_blend_ps( s, b, 1<<1 );
1959; return s;
1960;}
1961;
1962;__m128 blend_02(__m128 a, __m128 b)
1963;{
1964; __m128 s = a;
1965; s = _mm_blend_ps( s, b, 1<<0 );
1966; s = _mm_blend_ps( s, b, 1<<2 );
1967; return s;
1968;}
1969;
1970;__m128 blend_123(__m128 a, __m128 b)
1971;{
1972; __m128 s = a;
1973; s = _mm_blend_ps( s, b, 1<<1 );
1974; s = _mm_blend_ps( s, b, 1<<2 );
1975; s = _mm_blend_ps( s, b, 1<<3 );
1976; return s;
1977;}
1978
1979; Ideally, we should collapse the following shuffles into a single one.
1980
1981define <4 x float> @combine_blend_01(<4 x float> %a, <4 x float> %b) {
1982; SSE2-LABEL: combine_blend_01:
1983; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001984; SSE2-NEXT: movsd %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001985; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1986; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1987; SSE2-NEXT: movaps %xmm1, %xmm0
1988; SSE2-NEXT: retq
1989;
1990; SSSE3-LABEL: combine_blend_01:
1991; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00001992; SSSE3-NEXT: movsd %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00001993; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,0],xmm0[0,0]
1994; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3]
1995; SSSE3-NEXT: movaps %xmm1, %xmm0
1996; SSSE3-NEXT: retq
1997;
1998; SSE41-LABEL: combine_blend_01:
1999; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002000; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
2001; SSE41-NEXT: movapd %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002002; SSE41-NEXT: retq
2003;
2004; AVX-LABEL: combine_blend_01:
2005; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002006; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00002007; AVX-NEXT: retq
2008 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
2009 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
2010 ret <4 x float> %shuffle6
2011}
2012
2013define <4 x float> @combine_blend_02(<4 x float> %a, <4 x float> %b) {
2014; SSE2-LABEL: combine_blend_02:
2015; SSE2: # BB#0:
2016; SSE2-NEXT: movss %xmm1, %xmm0
2017; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
2018; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
2019; SSE2-NEXT: retq
2020;
2021; SSSE3-LABEL: combine_blend_02:
2022; SSSE3: # BB#0:
2023; SSSE3-NEXT: movss %xmm1, %xmm0
2024; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
2025; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
2026; SSSE3-NEXT: retq
2027;
2028; SSE41-LABEL: combine_blend_02:
2029; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002030; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
2031; SSE41-NEXT: movaps %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002032; SSE41-NEXT: retq
2033;
2034; AVX-LABEL: combine_blend_02:
2035; AVX: # BB#0:
2036; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
2037; AVX-NEXT: retq
2038 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 undef, i32 3>
2039 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
2040 ret <4 x float> %shuffle6
2041}
2042
2043define <4 x float> @combine_blend_123(<4 x float> %a, <4 x float> %b) {
2044; SSE2-LABEL: combine_blend_123:
2045; SSE2: # BB#0:
2046; SSE2-NEXT: movaps %xmm1, %xmm2
Chandler Carruth99627bf2014-10-04 03:52:55 +00002047; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm0[0,0]
2048; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[2,3]
2049; SSE2-NEXT: movsd %xmm2, %xmm1
2050; SSE2-NEXT: movaps %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002051; SSE2-NEXT: retq
2052;
2053; SSSE3-LABEL: combine_blend_123:
2054; SSSE3: # BB#0:
2055; SSSE3-NEXT: movaps %xmm1, %xmm2
Chandler Carruth99627bf2014-10-04 03:52:55 +00002056; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,0],xmm0[0,0]
2057; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[2,3]
2058; SSSE3-NEXT: movsd %xmm2, %xmm1
2059; SSSE3-NEXT: movaps %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002060; SSSE3-NEXT: retq
2061;
2062; SSE41-LABEL: combine_blend_123:
2063; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002064; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00002065; SSE41-NEXT: movaps %xmm1, %xmm0
2066; SSE41-NEXT: retq
2067;
2068; AVX-LABEL: combine_blend_123:
2069; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002070; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
Chandler Carruth782b0a72014-10-02 07:56:47 +00002071; AVX-NEXT: retq
2072 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
2073 %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
2074 %shuffle12 = shufflevector <4 x float> %shuffle6, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
2075 ret <4 x float> %shuffle12
2076}
2077
2078define <4 x i32> @combine_test_movhl_1(<4 x i32> %a, <4 x i32> %b) {
2079; SSE-LABEL: combine_test_movhl_1:
2080; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002081; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2082; SSE-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002083; SSE-NEXT: retq
2084;
2085; AVX-LABEL: combine_test_movhl_1:
2086; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002087; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00002088; AVX-NEXT: retq
2089 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 7, i32 5, i32 3>
2090 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 1, i32 0, i32 3>
2091 ret <4 x i32> %2
2092}
2093
2094define <4 x i32> @combine_test_movhl_2(<4 x i32> %a, <4 x i32> %b) {
2095; SSE-LABEL: combine_test_movhl_2:
2096; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002097; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2098; SSE-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002099; SSE-NEXT: retq
2100;
2101; AVX-LABEL: combine_test_movhl_2:
2102; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002103; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00002104; AVX-NEXT: retq
2105 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 0, i32 3, i32 6>
2106 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 3, i32 7, i32 0, i32 2>
2107 ret <4 x i32> %2
2108}
2109
2110define <4 x i32> @combine_test_movhl_3(<4 x i32> %a, <4 x i32> %b) {
2111; SSE-LABEL: combine_test_movhl_3:
2112; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002113; SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2114; SSE-NEXT: movdqa %xmm1, %xmm0
Chandler Carruth782b0a72014-10-02 07:56:47 +00002115; SSE-NEXT: retq
2116;
2117; AVX-LABEL: combine_test_movhl_3:
2118; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002119; AVX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth782b0a72014-10-02 07:56:47 +00002120; AVX-NEXT: retq
2121 %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 6, i32 3, i32 2>
2122 %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 0, i32 3, i32 2>
2123 ret <4 x i32> %2
2124}
Chandler Carruth71f41872014-10-02 08:02:34 +00002125
2126
2127; Verify that we fold shuffles according to rule:
2128; (shuffle(shuffle A, Undef, M0), B, M1) -> (shuffle A, B, M2)
2129
2130define <4 x float> @combine_undef_input_test1(<4 x float> %a, <4 x float> %b) {
2131; SSE2-LABEL: combine_undef_input_test1:
2132; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002133; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002134; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[1,2]
2135; SSE2-NEXT: movaps %xmm1, %xmm0
2136; SSE2-NEXT: retq
2137;
2138; SSSE3-LABEL: combine_undef_input_test1:
2139; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002140; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002141; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[1,2]
2142; SSSE3-NEXT: movaps %xmm1, %xmm0
2143; SSSE3-NEXT: retq
2144;
2145; SSE41-LABEL: combine_undef_input_test1:
2146; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002147; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
2148; SSE41-NEXT: movapd %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002149; SSE41-NEXT: retq
2150;
2151; AVX-LABEL: combine_undef_input_test1:
2152; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002153; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002154; AVX-NEXT: retq
2155 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
2156 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 1, i32 2>
2157 ret <4 x float> %2
2158}
2159
2160define <4 x float> @combine_undef_input_test2(<4 x float> %a, <4 x float> %b) {
2161; SSE-LABEL: combine_undef_input_test2:
2162; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002163; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002164; SSE-NEXT: retq
2165;
2166; AVX-LABEL: combine_undef_input_test2:
2167; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002168; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002169; AVX-NEXT: retq
2170 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
2171 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
2172 ret <4 x float> %2
2173}
2174
2175define <4 x float> @combine_undef_input_test3(<4 x float> %a, <4 x float> %b) {
2176; SSE-LABEL: combine_undef_input_test3:
2177; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002178; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002179; SSE-NEXT: retq
2180;
2181; AVX-LABEL: combine_undef_input_test3:
2182; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002183; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002184; AVX-NEXT: retq
2185 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
2186 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
2187 ret <4 x float> %2
2188}
2189
2190define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) {
2191; SSE-LABEL: combine_undef_input_test4:
2192; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002193; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2194; SSE-NEXT: movapd %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002195; SSE-NEXT: retq
2196;
2197; AVX-LABEL: combine_undef_input_test4:
2198; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002199; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002200; AVX-NEXT: retq
2201 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
2202 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
2203 ret <4 x float> %2
2204}
2205
2206define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
2207; SSE2-LABEL: combine_undef_input_test5:
2208; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002209; SSE2-NEXT: movsd %xmm0, %xmm1
2210; SSE2-NEXT: movaps %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002211; SSE2-NEXT: retq
2212;
2213; SSSE3-LABEL: combine_undef_input_test5:
2214; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002215; SSSE3-NEXT: movsd %xmm0, %xmm1
2216; SSSE3-NEXT: movaps %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002217; SSSE3-NEXT: retq
2218;
2219; SSE41-LABEL: combine_undef_input_test5:
2220; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002221; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002222; SSE41-NEXT: retq
2223;
2224; AVX-LABEL: combine_undef_input_test5:
2225; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002226; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002227; AVX-NEXT: retq
2228 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
2229 %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 6, i32 7>
2230 ret <4 x float> %2
2231}
2232
2233
2234; Verify that we fold shuffles according to rule:
2235; (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2)
2236
2237define <4 x float> @combine_undef_input_test6(<4 x float> %a) {
2238; ALL-LABEL: combine_undef_input_test6:
2239; ALL: # BB#0:
2240; ALL-NEXT: retq
2241 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
2242 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 1, i32 2>
2243 ret <4 x float> %2
2244}
2245
2246define <4 x float> @combine_undef_input_test7(<4 x float> %a) {
2247; SSE2-LABEL: combine_undef_input_test7:
2248; SSE2: # BB#0:
2249; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2250; SSE2-NEXT: retq
2251;
2252; SSSE3-LABEL: combine_undef_input_test7:
2253; SSSE3: # BB#0:
2254; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2255; SSSE3-NEXT: retq
2256;
2257; SSE41-LABEL: combine_undef_input_test7:
2258; SSE41: # BB#0:
2259; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2260; SSE41-NEXT: retq
2261;
2262; AVX-LABEL: combine_undef_input_test7:
2263; AVX: # BB#0:
2264; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2265; AVX-NEXT: retq
2266 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
2267 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
2268 ret <4 x float> %2
2269}
2270
2271define <4 x float> @combine_undef_input_test8(<4 x float> %a) {
2272; SSE2-LABEL: combine_undef_input_test8:
2273; SSE2: # BB#0:
2274; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2275; SSE2-NEXT: retq
2276;
2277; SSSE3-LABEL: combine_undef_input_test8:
2278; SSSE3: # BB#0:
2279; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2280; SSSE3-NEXT: retq
2281;
2282; SSE41-LABEL: combine_undef_input_test8:
2283; SSE41: # BB#0:
2284; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2285; SSE41-NEXT: retq
2286;
2287; AVX-LABEL: combine_undef_input_test8:
2288; AVX: # BB#0:
2289; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2290; AVX-NEXT: retq
2291 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
2292 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
2293 ret <4 x float> %2
2294}
2295
2296define <4 x float> @combine_undef_input_test9(<4 x float> %a) {
2297; SSE-LABEL: combine_undef_input_test9:
2298; SSE: # BB#0:
2299; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
2300; SSE-NEXT: retq
2301;
2302; AVX-LABEL: combine_undef_input_test9:
2303; AVX: # BB#0:
2304; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
2305; AVX-NEXT: retq
2306 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
2307 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
2308 ret <4 x float> %2
2309}
2310
2311define <4 x float> @combine_undef_input_test10(<4 x float> %a) {
2312; ALL-LABEL: combine_undef_input_test10:
2313; ALL: # BB#0:
2314; ALL-NEXT: retq
2315 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
2316 %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 6, i32 7>
2317 ret <4 x float> %2
2318}
2319
2320define <4 x float> @combine_undef_input_test11(<4 x float> %a, <4 x float> %b) {
2321; SSE2-LABEL: combine_undef_input_test11:
2322; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002323; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002324; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[1,2]
2325; SSE2-NEXT: movaps %xmm1, %xmm0
2326; SSE2-NEXT: retq
2327;
2328; SSSE3-LABEL: combine_undef_input_test11:
2329; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002330; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002331; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[1,2]
2332; SSSE3-NEXT: movaps %xmm1, %xmm0
2333; SSSE3-NEXT: retq
2334;
2335; SSE41-LABEL: combine_undef_input_test11:
2336; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002337; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm1[0],xmm0[1]
2338; SSE41-NEXT: movapd %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002339; SSE41-NEXT: retq
2340;
2341; AVX-LABEL: combine_undef_input_test11:
2342; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002343; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002344; AVX-NEXT: retq
2345 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
2346 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 6>
2347 ret <4 x float> %2
2348}
2349
2350define <4 x float> @combine_undef_input_test12(<4 x float> %a, <4 x float> %b) {
2351; SSE-LABEL: combine_undef_input_test12:
2352; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002353; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002354; SSE-NEXT: retq
2355;
2356; AVX-LABEL: combine_undef_input_test12:
2357; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002358; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002359; AVX-NEXT: retq
2360 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
2361 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
2362 ret <4 x float> %2
2363}
2364
2365define <4 x float> @combine_undef_input_test13(<4 x float> %a, <4 x float> %b) {
2366; SSE-LABEL: combine_undef_input_test13:
2367; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002368; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002369; SSE-NEXT: retq
2370;
2371; AVX-LABEL: combine_undef_input_test13:
2372; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002373; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
Chandler Carruth71f41872014-10-02 08:02:34 +00002374; AVX-NEXT: retq
2375 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
2376 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 5, i32 0, i32 5>
2377 ret <4 x float> %2
2378}
2379
2380define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) {
2381; SSE-LABEL: combine_undef_input_test14:
2382; SSE: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002383; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
2384; SSE-NEXT: movapd %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002385; SSE-NEXT: retq
2386;
2387; AVX-LABEL: combine_undef_input_test14:
2388; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002389; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002390; AVX-NEXT: retq
2391 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
2392 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2393 ret <4 x float> %2
2394}
2395
2396define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
2397; SSE2-LABEL: combine_undef_input_test15:
2398; SSE2: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002399; SSE2-NEXT: movsd %xmm0, %xmm1
2400; SSE2-NEXT: movaps %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002401; SSE2-NEXT: retq
2402;
2403; SSSE3-LABEL: combine_undef_input_test15:
2404; SSSE3: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002405; SSSE3-NEXT: movsd %xmm0, %xmm1
2406; SSSE3-NEXT: movaps %xmm1, %xmm0
Chandler Carruth71f41872014-10-02 08:02:34 +00002407; SSSE3-NEXT: retq
2408;
2409; SSE41-LABEL: combine_undef_input_test15:
2410; SSE41: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002411; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002412; SSE41-NEXT: retq
2413;
2414; AVX-LABEL: combine_undef_input_test15:
2415; AVX: # BB#0:
Chandler Carruth99627bf2014-10-04 03:52:55 +00002416; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
Chandler Carruth71f41872014-10-02 08:02:34 +00002417; AVX-NEXT: retq
2418 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
2419 %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3>
2420 ret <4 x float> %2
2421}
2422
2423
2424; Verify that shuffles are canonicalized according to rules:
2425; shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
2426;
2427; This allows to trigger the following combine rule:
2428; (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2)
2429;
2430; As a result, all the shuffle pairs in each function below should be
2431; combined into a single legal shuffle operation.
2432
2433define <4 x float> @combine_undef_input_test16(<4 x float> %a) {
2434; ALL-LABEL: combine_undef_input_test16:
2435; ALL: # BB#0:
2436; ALL-NEXT: retq
2437 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
2438 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
2439 ret <4 x float> %2
2440}
2441
2442define <4 x float> @combine_undef_input_test17(<4 x float> %a) {
2443; SSE2-LABEL: combine_undef_input_test17:
2444; SSE2: # BB#0:
2445; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2446; SSE2-NEXT: retq
2447;
2448; SSSE3-LABEL: combine_undef_input_test17:
2449; SSSE3: # BB#0:
2450; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2451; SSSE3-NEXT: retq
2452;
2453; SSE41-LABEL: combine_undef_input_test17:
2454; SSE41: # BB#0:
2455; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2456; SSE41-NEXT: retq
2457;
2458; AVX-LABEL: combine_undef_input_test17:
2459; AVX: # BB#0:
2460; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2461; AVX-NEXT: retq
2462 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
2463 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
2464 ret <4 x float> %2
2465}
2466
2467define <4 x float> @combine_undef_input_test18(<4 x float> %a) {
2468; SSE2-LABEL: combine_undef_input_test18:
2469; SSE2: # BB#0:
2470; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2471; SSE2-NEXT: retq
2472;
2473; SSSE3-LABEL: combine_undef_input_test18:
2474; SSSE3: # BB#0:
2475; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2476; SSSE3-NEXT: retq
2477;
2478; SSE41-LABEL: combine_undef_input_test18:
2479; SSE41: # BB#0:
2480; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2481; SSE41-NEXT: retq
2482;
2483; AVX-LABEL: combine_undef_input_test18:
2484; AVX: # BB#0:
2485; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
2486; AVX-NEXT: retq
2487 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
2488 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
2489 ret <4 x float> %2
2490}
2491
2492define <4 x float> @combine_undef_input_test19(<4 x float> %a) {
2493; SSE-LABEL: combine_undef_input_test19:
2494; SSE: # BB#0:
2495; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
2496; SSE-NEXT: retq
2497;
2498; AVX-LABEL: combine_undef_input_test19:
2499; AVX: # BB#0:
2500; AVX-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
2501; AVX-NEXT: retq
2502 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
2503 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
2504 ret <4 x float> %2
2505}
2506
2507define <4 x float> @combine_undef_input_test20(<4 x float> %a) {
2508; ALL-LABEL: combine_undef_input_test20:
2509; ALL: # BB#0:
2510; ALL-NEXT: retq
2511 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
2512 %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3>
2513 ret <4 x float> %2
2514}
Chandler Carruthdaa1ff92014-10-05 19:14:34 +00002515
2516; These tests are designed to test the ability to combine away unnecessary
2517; operations feeding into a shuffle. The AVX cases are the important ones as
2518; they leverage operations which cannot be done naturally on the entire vector
2519; and thus are decomposed into multiple smaller operations.
2520
2521define <8 x i32> @combine_unneeded_subvector1(<8 x i32> %a) {
2522; SSE-LABEL: combine_unneeded_subvector1:
2523; SSE: # BB#0:
2524; SSE-NEXT: paddd {{.*}}(%rip), %xmm1
2525; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,2,1,0]
2526; SSE-NEXT: movdqa %xmm0, %xmm1
2527; SSE-NEXT: retq
2528;
2529; AVX1-LABEL: combine_unneeded_subvector1:
2530; AVX1: # BB#0:
2531; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2532; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2533; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
2534; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2535; AVX1-NEXT: retq
2536;
2537; AVX2-LABEL: combine_unneeded_subvector1:
2538; AVX2: # BB#0:
2539; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
2540; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
2541; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
2542; AVX2-NEXT: retq
2543 %b = add <8 x i32> %a, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
2544 %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
2545 ret <8 x i32> %c
2546}
2547
2548define <8 x i32> @combine_unneeded_subvector2(<8 x i32> %a, <8 x i32> %b) {
2549; SSE-LABEL: combine_unneeded_subvector2:
2550; SSE: # BB#0:
2551; SSE-NEXT: paddd {{.*}}(%rip), %xmm1
2552; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[3,2,1,0]
2553; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
2554; SSE-NEXT: retq
2555;
2556; AVX1-LABEL: combine_unneeded_subvector2:
2557; AVX1: # BB#0:
2558; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
2559; AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2560; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
2561; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
2562; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,2,1,0]
2563; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2564; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
2565; AVX1-NEXT: retq
2566;
2567; AVX2-LABEL: combine_unneeded_subvector2:
2568; AVX2: # BB#0:
2569; AVX2-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
2570; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <7,6,5,4,u,u,u,u>
2571; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1
2572; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
2573; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2574; AVX2-NEXT: retq
2575 %c = add <8 x i32> %a, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
2576 %d = shufflevector <8 x i32> %b, <8 x i32> %c, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
2577 ret <8 x i32> %d
2578}