blob: 063dba73cd7b9b91456c0eb398bb429c9c159ecf [file] [log] [blame]
Chandler Carruth3c7bf042014-10-02 07:22:26 +00001; RUN: llc < %s -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
2; RUN: llc < %s -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
3; RUN: llc < %s -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
4; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
5; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
Chandler Carruth7b270672014-10-02 07:13:25 +00006;
7; Verify that the DAG combiner correctly folds bitwise operations across
8; shuffles, nested shuffles with undef, pairs of nested shuffles, and other
9; basic and always-safe patterns. Also test that the DAG combiner will combine
10; target-specific shuffle instructions where reasonable.
Chandler Carruth0d6d1f22014-06-27 11:34:40 +000011
Chandler Carruth0d6d1f22014-06-27 11:34:40 +000012target triple = "x86_64-unknown-unknown"
13
Chandler Carruth688001f2014-06-27 11:40:13 +000014declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +000015declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
16declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8)
17
Chandler Carruth688001f2014-06-27 11:40:13 +000018define <4 x i32> @combine_pshufd1(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000019; ALL-LABEL: combine_pshufd1:
20; ALL: # BB#0: # %entry
21; ALL-NEXT: retq
22entry:
23 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
24 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000025 ret <4 x i32> %c
26}
27
28define <4 x i32> @combine_pshufd2(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000029; ALL-LABEL: combine_pshufd2:
30; ALL: # BB#0: # %entry
31; ALL-NEXT: retq
32entry:
33 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000034 %b.cast = bitcast <4 x i32> %b to <8 x i16>
35 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 -28)
36 %c.cast = bitcast <8 x i16> %c to <4 x i32>
Chandler Carruth3c7bf042014-10-02 07:22:26 +000037 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000038 ret <4 x i32> %d
39}
40
41define <4 x i32> @combine_pshufd3(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000042; ALL-LABEL: combine_pshufd3:
43; ALL: # BB#0: # %entry
44; ALL-NEXT: retq
45entry:
46 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000047 %b.cast = bitcast <4 x i32> %b to <8 x i16>
48 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 -28)
49 %c.cast = bitcast <8 x i16> %c to <4 x i32>
Chandler Carruth3c7bf042014-10-02 07:22:26 +000050 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27)
Chandler Carruth688001f2014-06-27 11:40:13 +000051 ret <4 x i32> %d
52}
53
54define <4 x i32> @combine_pshufd4(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000055; SSE-LABEL: combine_pshufd4:
56; SSE: # BB#0: # %entry
57; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
58; SSE-NEXT: retq
59;
60; AVX-LABEL: combine_pshufd4:
61; AVX: # BB#0: # %entry
62; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
63; AVX-NEXT: retq
64entry:
65 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -31)
Chandler Carruth688001f2014-06-27 11:40:13 +000066 %b.cast = bitcast <4 x i32> %b to <8 x i16>
67 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 27)
68 %c.cast = bitcast <8 x i16> %c to <4 x i32>
Chandler Carruth3c7bf042014-10-02 07:22:26 +000069 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -31)
Chandler Carruth688001f2014-06-27 11:40:13 +000070 ret <4 x i32> %d
71}
72
73define <4 x i32> @combine_pshufd5(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000074; SSE-LABEL: combine_pshufd5:
75; SSE: # BB#0: # %entry
76; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
77; SSE-NEXT: retq
78;
79; AVX-LABEL: combine_pshufd5:
80; AVX: # BB#0: # %entry
81; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
82; AVX-NEXT: retq
83entry:
84 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -76)
Chandler Carruth688001f2014-06-27 11:40:13 +000085 %b.cast = bitcast <4 x i32> %b to <8 x i16>
86 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 27)
87 %c.cast = bitcast <8 x i16> %c to <4 x i32>
88 %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -76)
89 ret <4 x i32> %d
90}
91
Benjamin Kramere739cf32014-07-02 15:09:44 +000092define <4 x i32> @combine_pshufd6(<4 x i32> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +000093; SSE-LABEL: combine_pshufd6:
94; SSE: # BB#0: # %entry
95; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
96; SSE-NEXT: retq
97;
98; AVX-LABEL: combine_pshufd6:
99; AVX: # BB#0: # %entry
100; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
101; AVX-NEXT: retq
102entry:
Benjamin Kramere739cf32014-07-02 15:09:44 +0000103 %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0)
104 %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8)
105 ret <4 x i32> %c
106}
107
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000108define <8 x i16> @combine_pshuflw1(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000109; ALL-LABEL: combine_pshuflw1:
110; ALL: # BB#0: # %entry
111; ALL-NEXT: retq
112entry:
113 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
114 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000115 ret <8 x i16> %c
116}
117
118define <8 x i16> @combine_pshuflw2(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000119; ALL-LABEL: combine_pshuflw2:
120; ALL: # BB#0: # %entry
121; ALL-NEXT: retq
122entry:
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000123 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000124 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 -28)
125 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000126 ret <8 x i16> %d
127}
128
129define <8 x i16> @combine_pshuflw3(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000130; SSE-LABEL: combine_pshuflw3:
131; SSE: # BB#0: # %entry
132; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
133; SSE-NEXT: retq
134;
135; AVX-LABEL: combine_pshuflw3:
136; AVX: # BB#0: # %entry
137; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
138; AVX-NEXT: retq
139entry:
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000140 %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000141 %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 27)
142 %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000143 ret <8 x i16> %d
144}
145
146define <8 x i16> @combine_pshufhw1(<8 x i16> %a) {
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000147; SSE-LABEL: combine_pshufhw1:
148; SSE: # BB#0: # %entry
149; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
150; SSE-NEXT: retq
151;
152; AVX-LABEL: combine_pshufhw1:
153; AVX: # BB#0: # %entry
154; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
155; AVX-NEXT: retq
156entry:
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000157 %b = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27)
Chandler Carruth3c7bf042014-10-02 07:22:26 +0000158 %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27)
159 %d = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %c, i8 27)
Chandler Carruth0d6d1f22014-06-27 11:34:40 +0000160 ret <8 x i16> %d
161}