blob: 7255d8f7bc7573ad5b0a46bdaa83351afdbca5e7 [file] [log] [blame]
Chandler Carruth3c7bf042014-10-02 07:22:26 +00001; RUN: llc < %s -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2; RUN: llc < %s -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
3; RUN: llc < %s -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
Chandler Carruth7b270672014-10-02 07:13:25 +00006;
7; Verify that the DAG combiner correctly folds bitwise operations across
8; shuffles, nested shuffles with undef, pairs of nested shuffles, and other
9; basic and always-safe patterns. Also test that the DAG combiner will combine
10; target-specific shuffle instructions where reasonable.
Chandler Carruth0d6d1f22014-06-27 11:34:40 +000011
Chandler Carruth0d6d1f22014-06-27 11:34:40 +000012target triple = "x86_64-unknown-unknown"
13
Chandler Carruth688001f2014-06-27 11:40:13 +000014declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +000015declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
16declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8)
17
Chandler Carruth688001f2014-06-27 11:40:13 +000018define <4 x i32> @combine_pshufd1(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000019; ALL-LABEL: combine_pshufd1:
20; ALL: # BB#0: # %entry
21; ALL-NEXT: retq
22entry:
23 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
24 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000025 ret <4 x i32> %c
26}
27
28define <4 x i32> @combine_pshufd2(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000029; ALL-LABEL: combine_pshufd2:
30; ALL: # BB#0: # %entry
31; ALL-NEXT: retq
32entry:
33 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000034 %b.cast = bitcast <4 x i32> %b to <8 x i16>
35 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 -28)
36 %c.cast = bitcast <8 x i16> %c to <4 x i32>
Chandler Carruth3c7bf042014-10-02 07:22:26 +000037 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000038 ret <4 x i32> %d
39}
40
41define <4 x i32> @combine_pshufd3(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000042; ALL-LABEL: combine_pshufd3:
43; ALL: # BB#0: # %entry
44; ALL-NEXT: retq
45entry:
46 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000047 %b.cast = bitcast <4 x i32> %b to <8 x i16>
48 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 -28)
49 %c.cast = bitcast <8 x i16> %c to <4 x i32>
Chandler Carruth3c7bf042014-10-02 07:22:26 +000050 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000051 ret <4 x i32> %d
52}
53
54define <4 x i32> @combine_pshufd4(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000055; SSE-LABEL: combine_pshufd4:
56; SSE: # BB#0: # %entry
57; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
58; SSE-NEXT: retq
59;
60; AVX-LABEL: combine_pshufd4:
61; AVX: # BB#0: # %entry
62; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
63; AVX-NEXT: retq
64entry:
65 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -31)
Chandler Carruth688001f2014-06-27 11:40:13 +000066 %b.cast = bitcast <4 x i32> %b to <8 x i16>
67 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 27)
68 %c.cast = bitcast <8 x i16> %c to <4 x i32>
Chandler Carruth3c7bf042014-10-02 07:22:26 +000069 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -31)
Chandler Carruth688001f2014-06-27 11:40:13 +000070 ret <4 x i32> %d
71}
72
73define <4 x i32> @combine_pshufd5(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000074; SSE-LABEL: combine_pshufd5:
75; SSE: # BB#0: # %entry
76; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
77; SSE-NEXT: retq
78;
79; AVX-LABEL: combine_pshufd5:
80; AVX: # BB#0: # %entry
81; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
82; AVX-NEXT: retq
83entry:
84 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -76)
Chandler Carruth688001f2014-06-27 11:40:13 +000085 %b.cast = bitcast <4 x i32> %b to <8 x i16>
86 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 27)
87 %c.cast = bitcast <8 x i16> %c to <4 x i32>
88 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -76)
89 ret <4 x i32> %d
90}
91
Benjamin Kramere739cf32014-07-02 15:09:44 +000092define <4 x i32> @combine_pshufd6(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000093; SSE-LABEL: combine_pshufd6:
94; SSE: # BB#0: # %entry
95; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
96; SSE-NEXT: retq
97;
98; AVX-LABEL: combine_pshufd6:
99; AVX: # BB#0: # %entry
100; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
101; AVX-NEXT: retq
102entry:
Benjamin Kramere739cf32014-07-02 15:09:44 +0000103 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0)
104 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8)
105 ret <4 x i32> %c
106}
107
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000108define <8 x i16> @combine_pshuflw1(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000109; ALL-LABEL: combine_pshuflw1:
110; ALL: # BB#0: # %entry
111; ALL-NEXT: retq
112entry:
113 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
114 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000115 ret <8 x i16> %c
116}
117
118define <8 x i16> @combine_pshuflw2(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000119; ALL-LABEL: combine_pshuflw2:
120; ALL: # BB#0: # %entry
121; ALL-NEXT: retq
122entry:
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000123 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000124 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 -28)
125 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000126 ret <8 x i16> %d
127}
128
129define <8 x i16> @combine_pshuflw3(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000130; SSE-LABEL: combine_pshuflw3:
131; SSE: # BB#0: # %entry
132; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
133; SSE-NEXT: retq
134;
135; AVX-LABEL: combine_pshuflw3:
136; AVX: # BB#0: # %entry
137; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
138; AVX-NEXT: retq
139entry:
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000140 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000141 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 27)
142 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000143 ret <8 x i16> %d
144}
145
146define <8 x i16> @combine_pshufhw1(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000147; SSE-LABEL: combine_pshufhw1:
148; SSE: # BB#0: # %entry
149; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
150; SSE-NEXT: retq
151;
152; AVX-LABEL: combine_pshufhw1:
153; AVX: # BB#0: # %entry
154; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
155; AVX-NEXT: retq
156entry:
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000157 %b = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27)
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000158 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27)
159 %d = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %c, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000160 ret <8 x i16> %d
161}
Chandler Carruth21105012014-10-02 07:30:24 +0000162
163define <4 x i32> @combine_bitwise_ops_test1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
164; SSE-LABEL: combine_bitwise_ops_test1:
165; SSE: # BB#0:
166; SSE-NEXT: pand %xmm1, %xmm0
167; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
168; SSE-NEXT: retq
169;
170; AVX-LABEL: combine_bitwise_ops_test1:
171; AVX: # BB#0:
172; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
173; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
174; AVX-NEXT: retq
175 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
176 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
177 %and = and <4 x i32> %shuf1, %shuf2
178 ret <4 x i32> %and
179}
180
181define <4 x i32> @combine_bitwise_ops_test2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
182; SSE-LABEL: combine_bitwise_ops_test2:
183; SSE: # BB#0:
184; SSE-NEXT: por %xmm1, %xmm0
185; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
186; SSE-NEXT: retq
187;
188; AVX-LABEL: combine_bitwise_ops_test2:
189; AVX: # BB#0:
190; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
191; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
192; AVX-NEXT: retq
193 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
194 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
195 %or = or <4 x i32> %shuf1, %shuf2
196 ret <4 x i32> %or
197}
198
199define <4 x i32> @combine_bitwise_ops_test3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
200; SSE-LABEL: combine_bitwise_ops_test3:
201; SSE: # BB#0:
202; SSE-NEXT: pxor %xmm1, %xmm0
203; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
204; SSE-NEXT: retq
205;
206; AVX-LABEL: combine_bitwise_ops_test3:
207; AVX: # BB#0:
208; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
209; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
210; AVX-NEXT: retq
211 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
212 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
213 %xor = xor <4 x i32> %shuf1, %shuf2
214 ret <4 x i32> %xor
215}
216
217define <4 x i32> @combine_bitwise_ops_test4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
218; SSE-LABEL: combine_bitwise_ops_test4:
219; SSE: # BB#0:
220; SSE-NEXT: pand %xmm1, %xmm0
221; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
222; SSE-NEXT: retq
223;
224; AVX-LABEL: combine_bitwise_ops_test4:
225; AVX: # BB#0:
226; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
227; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
228; AVX-NEXT: retq
229 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
230 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
231 %and = and <4 x i32> %shuf1, %shuf2
232 ret <4 x i32> %and
233}
234
235define <4 x i32> @combine_bitwise_ops_test5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
236; SSE-LABEL: combine_bitwise_ops_test5:
237; SSE: # BB#0:
238; SSE-NEXT: por %xmm1, %xmm0
239; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
240; SSE-NEXT: retq
241;
242; AVX-LABEL: combine_bitwise_ops_test5:
243; AVX: # BB#0:
244; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0
245; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
246; AVX-NEXT: retq
247 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
248 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
249 %or = or <4 x i32> %shuf1, %shuf2
250 ret <4 x i32> %or
251}
252
253define <4 x i32> @combine_bitwise_ops_test6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
254; SSE-LABEL: combine_bitwise_ops_test6:
255; SSE: # BB#0:
256; SSE-NEXT: pxor %xmm1, %xmm0
257; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
258; SSE-NEXT: retq
259;
260; AVX-LABEL: combine_bitwise_ops_test6:
261; AVX: # BB#0:
262; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
263; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
264; AVX-NEXT: retq
265 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
266 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
267 %xor = xor <4 x i32> %shuf1, %shuf2
268 ret <4 x i32> %xor
269}
270
271
272; Verify that DAGCombiner moves the shuffle after the xor/and/or even if shuffles
273; are not performing a swizzle operations.
274
275define <4 x i32> @combine_bitwise_ops_test1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
276; SSE2-LABEL: combine_bitwise_ops_test1b:
277; SSE2: # BB#0:
278; SSE2-NEXT: andps %xmm1, %xmm0
279; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
280; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
281; SSE2-NEXT: retq
282;
283; SSSE3-LABEL: combine_bitwise_ops_test1b:
284; SSSE3: # BB#0:
285; SSSE3-NEXT: andps %xmm1, %xmm0
286; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
287; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
288; SSSE3-NEXT: retq
289;
290; SSE41-LABEL: combine_bitwise_ops_test1b:
291; SSE41: # BB#0:
292; SSE41-NEXT: andps %xmm1, %xmm0
293; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
294; SSE41-NEXT: retq
295;
296; AVX1-LABEL: combine_bitwise_ops_test1b:
297; AVX1: # BB#0:
298; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
299; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
300; AVX1-NEXT: retq
301;
302; AVX2-LABEL: combine_bitwise_ops_test1b:
303; AVX2: # BB#0:
304; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
305; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
306; AVX2-NEXT: retq
307 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
308 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
309 %and = and <4 x i32> %shuf1, %shuf2
310 ret <4 x i32> %and
311}
312
313define <4 x i32> @combine_bitwise_ops_test2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
314; SSE2-LABEL: combine_bitwise_ops_test2b:
315; SSE2: # BB#0:
316; SSE2-NEXT: orps %xmm1, %xmm0
317; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
318; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
319; SSE2-NEXT: retq
320;
321; SSSE3-LABEL: combine_bitwise_ops_test2b:
322; SSSE3: # BB#0:
323; SSSE3-NEXT: orps %xmm1, %xmm0
324; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
325; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
326; SSSE3-NEXT: retq
327;
328; SSE41-LABEL: combine_bitwise_ops_test2b:
329; SSE41: # BB#0:
330; SSE41-NEXT: orps %xmm1, %xmm0
331; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
332; SSE41-NEXT: retq
333;
334; AVX1-LABEL: combine_bitwise_ops_test2b:
335; AVX1: # BB#0:
336; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
337; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
338; AVX1-NEXT: retq
339;
340; AVX2-LABEL: combine_bitwise_ops_test2b:
341; AVX2: # BB#0:
342; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
343; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
344; AVX2-NEXT: retq
345 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
346 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
347 %or = or <4 x i32> %shuf1, %shuf2
348 ret <4 x i32> %or
349}
350
351define <4 x i32> @combine_bitwise_ops_test3b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
352; SSE2-LABEL: combine_bitwise_ops_test3b:
353; SSE2: # BB#0:
354; SSE2-NEXT: xorps %xmm1, %xmm0
355; SSE2-NEXT: xorps %xmm1, %xmm1
356; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
357; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
358; SSE2-NEXT: retq
359;
360; SSSE3-LABEL: combine_bitwise_ops_test3b:
361; SSSE3: # BB#0:
362; SSSE3-NEXT: xorps %xmm1, %xmm0
363; SSSE3-NEXT: xorps %xmm1, %xmm1
364; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
365; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
366; SSSE3-NEXT: retq
367;
368; SSE41-LABEL: combine_bitwise_ops_test3b:
369; SSE41: # BB#0:
370; SSE41-NEXT: xorps %xmm1, %xmm0
371; SSE41-NEXT: xorps %xmm1, %xmm1
372; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
373; SSE41-NEXT: retq
374;
375; AVX1-LABEL: combine_bitwise_ops_test3b:
376; AVX1: # BB#0:
377; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0
378; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
379; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
380; AVX1-NEXT: retq
381;
382; AVX2-LABEL: combine_bitwise_ops_test3b:
383; AVX2: # BB#0:
384; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
385; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
386; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
387; AVX2-NEXT: retq
388 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
389 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
390 %xor = xor <4 x i32> %shuf1, %shuf2
391 ret <4 x i32> %xor
392}
393
394define <4 x i32> @combine_bitwise_ops_test4b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
395; SSE2-LABEL: combine_bitwise_ops_test4b:
396; SSE2: # BB#0:
397; SSE2-NEXT: andps %xmm1, %xmm0
398; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
399; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
400; SSE2-NEXT: retq
401;
402; SSSE3-LABEL: combine_bitwise_ops_test4b:
403; SSSE3: # BB#0:
404; SSSE3-NEXT: andps %xmm1, %xmm0
405; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
406; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
407; SSSE3-NEXT: retq
408;
409; SSE41-LABEL: combine_bitwise_ops_test4b:
410; SSE41: # BB#0:
411; SSE41-NEXT: andps %xmm1, %xmm0
412; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
413; SSE41-NEXT: movaps %xmm2, %xmm0
414; SSE41-NEXT: retq
415;
416; AVX1-LABEL: combine_bitwise_ops_test4b:
417; AVX1: # BB#0:
418; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
419; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
420; AVX1-NEXT: retq
421;
422; AVX2-LABEL: combine_bitwise_ops_test4b:
423; AVX2: # BB#0:
424; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
425; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
426; AVX2-NEXT: retq
427 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
428 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
429 %and = and <4 x i32> %shuf1, %shuf2
430 ret <4 x i32> %and
431}
432
433define <4 x i32> @combine_bitwise_ops_test5b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
434; SSE2-LABEL: combine_bitwise_ops_test5b:
435; SSE2: # BB#0:
436; SSE2-NEXT: orps %xmm1, %xmm0
437; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
438; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
439; SSE2-NEXT: retq
440;
441; SSSE3-LABEL: combine_bitwise_ops_test5b:
442; SSSE3: # BB#0:
443; SSSE3-NEXT: orps %xmm1, %xmm0
444; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
445; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,1,3]
446; SSSE3-NEXT: retq
447;
448; SSE41-LABEL: combine_bitwise_ops_test5b:
449; SSE41: # BB#0:
450; SSE41-NEXT: orps %xmm1, %xmm0
451; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
452; SSE41-NEXT: movaps %xmm2, %xmm0
453; SSE41-NEXT: retq
454;
455; AVX1-LABEL: combine_bitwise_ops_test5b:
456; AVX1: # BB#0:
457; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
458; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
459; AVX1-NEXT: retq
460;
461; AVX2-LABEL: combine_bitwise_ops_test5b:
462; AVX2: # BB#0:
463; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
464; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
465; AVX2-NEXT: retq
466 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
467 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
468 %or = or <4 x i32> %shuf1, %shuf2
469 ret <4 x i32> %or
470}
471
472define <4 x i32> @combine_bitwise_ops_test6b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
473; SSE2-LABEL: combine_bitwise_ops_test6b:
474; SSE2: # BB#0:
475; SSE2-NEXT: xorps %xmm1, %xmm0
476; SSE2-NEXT: xorps %xmm1, %xmm1
477; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
478; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,1,3]
479; SSE2-NEXT: retq
480;
481; SSSE3-LABEL: combine_bitwise_ops_test6b:
482; SSSE3: # BB#0:
483; SSSE3-NEXT: xorps %xmm1, %xmm0
484; SSSE3-NEXT: xorps %xmm1, %xmm1
485; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
486; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,1,3]
487; SSSE3-NEXT: retq
488;
489; SSE41-LABEL: combine_bitwise_ops_test6b:
490; SSE41: # BB#0:
491; SSE41-NEXT: xorps %xmm1, %xmm0
492; SSE41-NEXT: xorps %xmm1, %xmm1
493; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
494; SSE41-NEXT: movaps %xmm1, %xmm0
495; SSE41-NEXT: retq
496;
497; AVX1-LABEL: combine_bitwise_ops_test6b:
498; AVX1: # BB#0:
499; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0
500; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
501; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
502; AVX1-NEXT: retq
503;
504; AVX2-LABEL: combine_bitwise_ops_test6b:
505; AVX2: # BB#0:
506; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
507; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
508; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
509; AVX2-NEXT: retq
510 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
511 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
512 %xor = xor <4 x i32> %shuf1, %shuf2
513 ret <4 x i32> %xor
514}
515
516define <4 x i32> @combine_bitwise_ops_test1c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
517; SSE-LABEL: combine_bitwise_ops_test1c:
518; SSE: # BB#0:
519; SSE-NEXT: andps %xmm1, %xmm0
520; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
521; SSE-NEXT: retq
522;
523; AVX-LABEL: combine_bitwise_ops_test1c:
524; AVX: # BB#0:
525; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
526; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
527; AVX-NEXT: retq
528 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
529 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
530 %and = and <4 x i32> %shuf1, %shuf2
531 ret <4 x i32> %and
532}
533
534define <4 x i32> @combine_bitwise_ops_test2c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
535; SSE-LABEL: combine_bitwise_ops_test2c:
536; SSE: # BB#0:
537; SSE-NEXT: orps %xmm1, %xmm0
538; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
539; SSE-NEXT: retq
540;
541; AVX-LABEL: combine_bitwise_ops_test2c:
542; AVX: # BB#0:
543; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
544; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
545; AVX-NEXT: retq
546 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
547 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
548 %or = or <4 x i32> %shuf1, %shuf2
549 ret <4 x i32> %or
550}
551
552define <4 x i32> @combine_bitwise_ops_test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
553; SSE-LABEL: combine_bitwise_ops_test3c:
554; SSE: # BB#0:
555; SSE-NEXT: xorps %xmm1, %xmm0
556; SSE-NEXT: xorps %xmm1, %xmm1
557; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
558; SSE-NEXT: retq
559;
560; AVX-LABEL: combine_bitwise_ops_test3c:
561; AVX: # BB#0:
562; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
563; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
564; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
565; AVX-NEXT: retq
566 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
567 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
568 %xor = xor <4 x i32> %shuf1, %shuf2
569 ret <4 x i32> %xor
570}
571
572define <4 x i32> @combine_bitwise_ops_test4c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
573; SSE-LABEL: combine_bitwise_ops_test4c:
574; SSE: # BB#0:
575; SSE-NEXT: andps %xmm1, %xmm0
576; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
577; SSE-NEXT: movaps %xmm2, %xmm0
578; SSE-NEXT: retq
579;
580; AVX-LABEL: combine_bitwise_ops_test4c:
581; AVX: # BB#0:
582; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
583; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3]
584; AVX-NEXT: retq
585 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
586 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
587 %and = and <4 x i32> %shuf1, %shuf2
588 ret <4 x i32> %and
589}
590
591define <4 x i32> @combine_bitwise_ops_test5c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
592; SSE-LABEL: combine_bitwise_ops_test5c:
593; SSE: # BB#0:
594; SSE-NEXT: orps %xmm1, %xmm0
595; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
596; SSE-NEXT: movaps %xmm2, %xmm0
597; SSE-NEXT: retq
598;
599; AVX-LABEL: combine_bitwise_ops_test5c:
600; AVX: # BB#0:
601; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
602; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3]
603; AVX-NEXT: retq
604 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
605 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
606 %or = or <4 x i32> %shuf1, %shuf2
607 ret <4 x i32> %or
608}
609
610define <4 x i32> @combine_bitwise_ops_test6c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
611; SSE-LABEL: combine_bitwise_ops_test6c:
612; SSE: # BB#0:
613; SSE-NEXT: xorps %xmm1, %xmm0
614; SSE-NEXT: xorps %xmm1, %xmm1
615; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
616; SSE-NEXT: movaps %xmm1, %xmm0
617; SSE-NEXT: retq
618;
619; AVX-LABEL: combine_bitwise_ops_test6c:
620; AVX: # BB#0:
621; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0
622; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
623; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[1,3]
624; AVX-NEXT: retq
625 %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
626 %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
627 %xor = xor <4 x i32> %shuf1, %shuf2
628 ret <4 x i32> %xor
629}