blob: 3465429e1636c2c3179743e443be73fa12d32ab2 [file] [log] [blame]
Ahmed Bougacha671795a2016-03-03 16:53:50 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
Simon Pilgrim06bfabb2016-09-17 18:42:41 +00002; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
Ahmed Bougacha671795a2016-03-03 16:53:50 +00004
5declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
6declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
Simon Pilgrimbec65432016-07-05 20:11:29 +00007declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
Simon Pilgrim21b2c562016-05-02 19:46:58 +00008declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
Ahmed Bougacha671795a2016-03-03 16:53:50 +00009
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000010define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +000011; X32-LABEL: combine_pshufb_pslldq:
12; X32: # BB#0:
13; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
14; X32-NEXT: retl
15;
16; X64-LABEL: combine_pshufb_pslldq:
17; X64: # BB#0:
18; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
19; X64-NEXT: retq
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000020 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
21 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
22 ret <32 x i8> %2
23}
24
25define <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +000026; X32-LABEL: combine_pshufb_psrldq:
27; X32: # BB#0:
28; X32-NEXT: vxorps %ymm0, %ymm0, %ymm0
29; X32-NEXT: retl
30;
31; X64-LABEL: combine_pshufb_psrldq:
32; X64: # BB#0:
33; X64-NEXT: vxorps %ymm0, %ymm0, %ymm0
34; X64-NEXT: retq
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000035 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
36 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
37 ret <32 x i8> %2
38}
39
Ahmed Bougacha671795a2016-03-03 16:53:50 +000040define <32 x i8> @combine_pshufb_vpermd(<8 x i32> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +000041; X32-LABEL: combine_pshufb_vpermd:
42; X32: # BB#0:
43; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
44; X32-NEXT: retl
45;
46; X64-LABEL: combine_pshufb_vpermd:
47; X64: # BB#0:
48; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
49; X64-NEXT: retq
Ahmed Bougacha671795a2016-03-03 16:53:50 +000050 %tmp0 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
51 %tmp1 = bitcast <8 x i32> %tmp0 to <32 x i8>
52 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
53 ret <32 x i8> %tmp2
54}
55
56define <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +000057; X32-LABEL: combine_pshufb_vpermps:
58; X32: # BB#0:
59; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
60; X32-NEXT: retl
61;
62; X64-LABEL: combine_pshufb_vpermps:
63; X64: # BB#0:
64; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
65; X64-NEXT: retq
Ahmed Bougacha671795a2016-03-03 16:53:50 +000066 %tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
67 %tmp1 = bitcast <8 x float> %tmp0 to <32 x i8>
68 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
69 ret <32 x i8> %tmp2
70}
Simon Pilgrim21b2c562016-05-02 19:46:58 +000071
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000072define <4 x i64> @combine_permq_pshufb_as_vperm2i128(<4 x i64> %a0) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +000073; X32-LABEL: combine_permq_pshufb_as_vperm2i128:
74; X32: # BB#0:
75; X32-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
76; X32-NEXT: vpaddq {{\.LCPI.*}}, %ymm0, %ymm0
77; X32-NEXT: retl
78;
79; X64-LABEL: combine_permq_pshufb_as_vperm2i128:
80; X64: # BB#0:
81; X64-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
82; X64-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
83; X64-NEXT: retq
Simon Pilgrim21b2c562016-05-02 19:46:58 +000084 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
85 %2 = bitcast <4 x i64> %1 to <32 x i8>
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000086 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>)
Simon Pilgrim21b2c562016-05-02 19:46:58 +000087 %4 = bitcast <32 x i8> %3 to <4 x i64>
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000088 %5 = add <4 x i64> %4, <i64 1, i64 1, i64 3, i64 3>
89 ret <4 x i64> %5
Simon Pilgrim21b2c562016-05-02 19:46:58 +000090}
Simon Pilgrimbec65432016-07-05 20:11:29 +000091
Simon Pilgrimbce1f6b2016-10-02 20:43:02 +000092define <8 x i32> @combine_as_vpermd(<8 x i32> %a0) {
93; X32-LABEL: combine_as_vpermd:
94; X32: # BB#0:
Simon Pilgrima8d21682016-10-02 21:07:58 +000095; X32-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,4,5,6,7,0,7]
96; X32-NEXT: vpermd %ymm0, %ymm1, %ymm0
Simon Pilgrimbce1f6b2016-10-02 20:43:02 +000097; X32-NEXT: retl
98;
99; X64-LABEL: combine_as_vpermd:
100; X64: # BB#0:
Simon Pilgrima8d21682016-10-02 21:07:58 +0000101; X64-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,4,5,6,7,0,7]
102; X64-NEXT: vpermd %ymm0, %ymm1, %ymm0
Simon Pilgrimbce1f6b2016-10-02 20:43:02 +0000103; X64-NEXT: retq
104 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
105 %2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6>)
106 %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 8, i32 9, i32 1, i32 15, i32 14, i32 4, i32 3>
107 ret <8 x i32> %3
108}
109
110define <8 x float> @combine_as_vpermps(<8 x float> %a0) {
111; X32-LABEL: combine_as_vpermps:
112; X32: # BB#0:
Simon Pilgrima8d21682016-10-02 21:07:58 +0000113; X32-NEXT: vmovaps {{.*#+}} ymm1 = <6,4,7,5,1,u,4,7>
114; X32-NEXT: vpermps %ymm0, %ymm1, %ymm0
Simon Pilgrimbce1f6b2016-10-02 20:43:02 +0000115; X32-NEXT: retl
116;
117; X64-LABEL: combine_as_vpermps:
118; X64: # BB#0:
Simon Pilgrima8d21682016-10-02 21:07:58 +0000119; X64-NEXT: vmovaps {{.*#+}} ymm1 = <6,4,7,5,1,u,4,7>
120; X64-NEXT: vpermps %ymm0, %ymm1, %ymm0
Simon Pilgrimbce1f6b2016-10-02 20:43:02 +0000121; X64-NEXT: retq
122 %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
Simon Pilgrima8d21682016-10-02 21:07:58 +0000123 %2 = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 1, i32 undef, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>)
Simon Pilgrimbce1f6b2016-10-02 20:43:02 +0000124 %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 15, i32 0, i32 14, i32 1, i32 8, i32 9, i32 4, i32 3>
125 ret <8 x float> %3
126}
127
Simon Pilgrim700e4a12016-07-14 12:21:40 +0000128define <32 x i8> @combine_permq_pshufb_as_vpblendd(<4 x i64> %a0) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000129; X32-LABEL: combine_permq_pshufb_as_vpblendd:
130; X32: # BB#0:
131; X32-NEXT: vpxor %ymm1, %ymm1, %ymm1
132; X32-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
133; X32-NEXT: retl
134;
135; X64-LABEL: combine_permq_pshufb_as_vpblendd:
136; X64: # BB#0:
137; X64-NEXT: vpxor %ymm1, %ymm1, %ymm1
138; X64-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
139; X64-NEXT: retq
Simon Pilgrim700e4a12016-07-14 12:21:40 +0000140 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
141 %2 = bitcast <4 x i64> %1 to <32 x i8>
142 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>)
143 ret <32 x i8> %3
144}
145
Simon Pilgrimbec65432016-07-05 20:11:29 +0000146define <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000147; X32-LABEL: combine_pshufb_as_vpbroadcastb128:
148; X32: # BB#0:
149; X32-NEXT: vpbroadcastb %xmm0, %xmm0
150; X32-NEXT: retl
151;
152; X64-LABEL: combine_pshufb_as_vpbroadcastb128:
153; X64: # BB#0:
154; X64-NEXT: vpbroadcastb %xmm0, %xmm0
155; X64-NEXT: retq
Simon Pilgrimbec65432016-07-05 20:11:29 +0000156 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> zeroinitializer)
157 ret <16 x i8> %1
158}
159
160define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000161; X32-LABEL: combine_pshufb_as_vpbroadcastb256:
162; X32: # BB#0:
163; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
164; X32-NEXT: vpbroadcastb %xmm0, %ymm0
165; X32-NEXT: retl
166;
167; X64-LABEL: combine_pshufb_as_vpbroadcastb256:
168; X64: # BB#0:
169; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
170; X64-NEXT: vpbroadcastb %xmm0, %ymm0
171; X64-NEXT: retq
Simon Pilgrimbec65432016-07-05 20:11:29 +0000172 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
173 %2 = bitcast <4 x i64> %1 to <32 x i8>
174 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> zeroinitializer)
175 %4 = bitcast <32 x i8> %3 to <8 x i32>
176 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
177 %6 = bitcast <8 x i32> %5 to <32 x i8>
178 ret <32 x i8> %6
179}
180
181define <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000182; X32-LABEL: combine_pshufb_as_vpbroadcastw128:
183; X32: # BB#0:
184; X32-NEXT: vpbroadcastw %xmm0, %xmm0
185; X32-NEXT: retl
186;
187; X64-LABEL: combine_pshufb_as_vpbroadcastw128:
188; X64: # BB#0:
189; X64-NEXT: vpbroadcastw %xmm0, %xmm0
190; X64-NEXT: retq
Simon Pilgrimbec65432016-07-05 20:11:29 +0000191 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
192 ret <16 x i8> %1
193}
194
195define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000196; X32-LABEL: combine_pshufb_as_vpbroadcastw256:
197; X32: # BB#0:
198; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
199; X32-NEXT: vpbroadcastw %xmm0, %ymm0
200; X32-NEXT: retl
201;
202; X64-LABEL: combine_pshufb_as_vpbroadcastw256:
203; X64: # BB#0:
204; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
205; X64-NEXT: vpbroadcastw %xmm0, %ymm0
206; X64-NEXT: retq
Simon Pilgrimbec65432016-07-05 20:11:29 +0000207 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
208 %2 = bitcast <4 x i64> %1 to <32 x i8>
209 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
210 %4 = bitcast <32 x i8> %3 to <8 x i32>
211 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
212 %6 = bitcast <8 x i32> %5 to <32 x i8>
213 ret <32 x i8> %6
214}
215
216define <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000217; X32-LABEL: combine_pshufb_as_vpbroadcastd128:
218; X32: # BB#0:
219; X32-NEXT: vpbroadcastd %xmm0, %xmm0
220; X32-NEXT: vpaddb {{\.LCPI.*}}, %xmm0, %xmm0
221; X32-NEXT: retl
222;
223; X64-LABEL: combine_pshufb_as_vpbroadcastd128:
224; X64: # BB#0:
225; X64-NEXT: vpbroadcastd %xmm0, %xmm0
226; X64-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
227; X64-NEXT: retq
Simon Pilgrimbec65432016-07-05 20:11:29 +0000228 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
229 %2 = add <16 x i8> %1, <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>
230 ret <16 x i8> %2
231}
232
233define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000234; X32-LABEL: combine_permd_as_vpbroadcastd256:
235; X32: # BB#0:
236; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
237; X32-NEXT: vpbroadcastd %xmm0, %ymm0
238; X32-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0
239; X32-NEXT: retl
240;
241; X64-LABEL: combine_permd_as_vpbroadcastd256:
242; X64: # BB#0:
243; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
244; X64-NEXT: vpbroadcastd %xmm0, %ymm0
245; X64-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
246; X64-NEXT: retq
Simon Pilgrimbec65432016-07-05 20:11:29 +0000247 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
248 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> zeroinitializer)
249 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
250 ret <8 x i32> %3
251}
252
253define <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000254; X32-LABEL: combine_pshufb_as_vpbroadcastq128:
255; X32: # BB#0:
256; X32-NEXT: vpbroadcastq %xmm0, %xmm0
257; X32-NEXT: retl
258;
259; X64-LABEL: combine_pshufb_as_vpbroadcastq128:
260; X64: # BB#0:
261; X64-NEXT: vpbroadcastq %xmm0, %xmm0
262; X64-NEXT: retq
Simon Pilgrimbec65432016-07-05 20:11:29 +0000263 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
264 ret <16 x i8> %1
265}
266
267define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000268; X32-LABEL: combine_permd_as_vpbroadcastq256:
269; X32: # BB#0:
270; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
271; X32-NEXT: vpbroadcastq %xmm0, %ymm0
272; X32-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0
273; X32-NEXT: retl
274;
275; X64-LABEL: combine_permd_as_vpbroadcastq256:
276; X64: # BB#0:
277; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
278; X64-NEXT: vpbroadcastq %xmm0, %ymm0
279; X64-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
280; X64-NEXT: retq
Simon Pilgrimbec65432016-07-05 20:11:29 +0000281 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
282 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
283 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
284 ret <8 x i32> %3
285}
286
287define <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000288; X32-LABEL: combine_pshufb_as_vpbroadcastss128:
289; X32: # BB#0:
290; X32-NEXT: vbroadcastss %xmm0, %xmm0
291; X32-NEXT: retl
292;
293; X64-LABEL: combine_pshufb_as_vpbroadcastss128:
294; X64: # BB#0:
295; X64-NEXT: vbroadcastss %xmm0, %xmm0
296; X64-NEXT: retq
Simon Pilgrimbec65432016-07-05 20:11:29 +0000297 %1 = bitcast <4 x float> %a to <16 x i8>
298 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
299 %3 = bitcast <16 x i8> %2 to <4 x float>
300 ret <4 x float> %3
301}
302
Simon Pilgrimb5200972016-10-02 19:31:58 +0000303define <8 x float> @combine_permps_as_vpbroadcastss256(<4 x float> %a) {
304; X32-LABEL: combine_permps_as_vpbroadcastss256:
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000305; X32: # BB#0:
306; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
307; X32-NEXT: vbroadcastss %xmm0, %ymm0
308; X32-NEXT: retl
309;
Simon Pilgrimb5200972016-10-02 19:31:58 +0000310; X64-LABEL: combine_permps_as_vpbroadcastss256:
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000311; X64: # BB#0:
312; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
313; X64-NEXT: vbroadcastss %xmm0, %ymm0
314; X64-NEXT: retq
Simon Pilgrimbec65432016-07-05 20:11:29 +0000315 %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
316 %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer)
317 ret <8 x float> %2
318}
319
Simon Pilgrimb5200972016-10-02 19:31:58 +0000320define <4 x double> @combine_permps_as_vpbroadcastsd256(<2 x double> %a) {
321; X32-LABEL: combine_permps_as_vpbroadcastsd256:
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000322; X32: # BB#0:
323; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
324; X32-NEXT: vbroadcastsd %xmm0, %ymm0
325; X32-NEXT: retl
326;
Simon Pilgrimb5200972016-10-02 19:31:58 +0000327; X64-LABEL: combine_permps_as_vpbroadcastsd256:
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000328; X64: # BB#0:
329; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
330; X64-NEXT: vbroadcastsd %xmm0, %ymm0
331; X64-NEXT: retq
Simon Pilgrimbec65432016-07-05 20:11:29 +0000332 %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
333 %2 = bitcast <4 x double> %1 to <8 x float>
334 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
335 %4 = bitcast <8 x float> %3 to <4 x double>
336 ret <4 x double> %4
337}
Simon Pilgrim118da632016-07-06 15:09:48 +0000338
Simon Pilgrim4ac74202016-07-18 16:17:34 +0000339define <16 x i8> @combine_vpbroadcast_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000340; X32-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastb128:
341; X32: # BB#0:
342; X32-NEXT: vpbroadcastb %xmm0, %xmm0
343; X32-NEXT: retl
344;
345; X64-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastb128:
346; X64: # BB#0:
347; X64-NEXT: vpbroadcastb %xmm0, %xmm0
348; X64-NEXT: retq
Simon Pilgrim4ac74202016-07-18 16:17:34 +0000349 %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
350 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> zeroinitializer)
351 ret <16 x i8> %2
352}
353
354define <32 x i8> @combine_vpbroadcast_pshufb_as_vpbroadcastb256(<32 x i8> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000355; X32-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastb256:
356; X32: # BB#0:
357; X32-NEXT: vpbroadcastb %xmm0, %ymm0
358; X32-NEXT: retl
359;
360; X64-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastb256:
361; X64: # BB#0:
362; X64-NEXT: vpbroadcastb %xmm0, %ymm0
363; X64-NEXT: retq
Simon Pilgrim4ac74202016-07-18 16:17:34 +0000364 %1 = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
365 %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> zeroinitializer)
366 ret <32 x i8> %2
367}
368
369define <4 x float> @combine_vpbroadcast_pshufb_as_vpbroadcastss128(<4 x float> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000370; X32-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastss128:
371; X32: # BB#0:
372; X32-NEXT: vbroadcastss %xmm0, %xmm0
373; X32-NEXT: retl
374;
375; X64-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastss128:
376; X64: # BB#0:
377; X64-NEXT: vbroadcastss %xmm0, %xmm0
378; X64-NEXT: retq
Simon Pilgrim4ac74202016-07-18 16:17:34 +0000379 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
380 %2 = bitcast <4 x float> %1 to <16 x i8>
381 %3 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
382 %4 = bitcast <16 x i8> %3 to <4 x float>
383 ret <4 x float> %4
384}
385
386define <8 x float> @combine_vpbroadcast_permd_as_vpbroadcastss256(<4 x float> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000387; X32-LABEL: combine_vpbroadcast_permd_as_vpbroadcastss256:
388; X32: # BB#0:
389; X32-NEXT: vbroadcastss %xmm0, %ymm0
390; X32-NEXT: vbroadcastss %xmm0, %ymm0
391; X32-NEXT: retl
392;
393; X64-LABEL: combine_vpbroadcast_permd_as_vpbroadcastss256:
394; X64: # BB#0:
395; X64-NEXT: vbroadcastss %xmm0, %ymm0
396; X64-NEXT: vbroadcastss %xmm0, %ymm0
397; X64-NEXT: retq
Simon Pilgrim4ac74202016-07-18 16:17:34 +0000398 %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> zeroinitializer
399 %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer)
400 ret <8 x float> %2
401}
402
403define <4 x double> @combine_vpbroadcast_permd_as_vpbroadcastsd256(<2 x double> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000404; X32-LABEL: combine_vpbroadcast_permd_as_vpbroadcastsd256:
405; X32: # BB#0:
406; X32-NEXT: vbroadcastsd %xmm0, %ymm0
407; X32-NEXT: vbroadcastsd %xmm0, %ymm0
408; X32-NEXT: retl
409;
410; X64-LABEL: combine_vpbroadcast_permd_as_vpbroadcastsd256:
411; X64: # BB#0:
412; X64-NEXT: vbroadcastsd %xmm0, %ymm0
413; X64-NEXT: vbroadcastsd %xmm0, %ymm0
414; X64-NEXT: retq
Simon Pilgrim4ac74202016-07-18 16:17:34 +0000415 %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> zeroinitializer
416 %2 = bitcast <4 x double> %1 to <8 x float>
417 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
418 %4 = bitcast <8 x float> %3 to <4 x double>
419 ret <4 x double> %4
420}
421
Simon Pilgrim950419f2016-07-08 19:23:29 +0000422define <8 x i32> @combine_permd_as_permq(<8 x i32> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000423; X32-LABEL: combine_permd_as_permq:
424; X32: # BB#0:
425; X32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1]
426; X32-NEXT: retl
427;
428; X64-LABEL: combine_permd_as_permq:
429; X64: # BB#0:
430; X64-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1]
431; X64-NEXT: retq
Simon Pilgrim950419f2016-07-08 19:23:29 +0000432 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>)
433 ret <8 x i32> %1
434}
435
436define <8 x float> @combine_permps_as_permpd(<8 x float> %a) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000437; X32-LABEL: combine_permps_as_permpd:
438; X32: # BB#0:
439; X32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1]
440; X32-NEXT: retl
441;
442; X64-LABEL: combine_permps_as_permpd:
443; X64: # BB#0:
444; X64-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1]
445; X64-NEXT: retq
Simon Pilgrim950419f2016-07-08 19:23:29 +0000446 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>)
447 ret <8 x float> %1
448}
449
Simon Pilgrimd7a37822016-08-19 17:02:00 +0000450define <4 x double> @combine_pshufb_as_vzmovl_64(<4 x double> %a0) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000451; X32-LABEL: combine_pshufb_as_vzmovl_64:
452; X32: # BB#0:
453; X32-NEXT: vxorpd %ymm1, %ymm1, %ymm1
454; X32-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
455; X32-NEXT: retl
456;
457; X64-LABEL: combine_pshufb_as_vzmovl_64:
458; X64: # BB#0:
459; X64-NEXT: vxorpd %ymm1, %ymm1, %ymm1
460; X64-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
461; X64-NEXT: retq
Simon Pilgrimd7a37822016-08-19 17:02:00 +0000462 %1 = bitcast <4 x double> %a0 to <32 x i8>
463 %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
464 %3 = bitcast <32 x i8> %2 to <4 x double>
465 ret <4 x double> %3
466}
467
468define <8 x float> @combine_pshufb_as_vzmovl_32(<8 x float> %a0) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000469; X32-LABEL: combine_pshufb_as_vzmovl_32:
470; X32: # BB#0:
471; X32-NEXT: vxorps %ymm1, %ymm1, %ymm1
472; X32-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
473; X32-NEXT: retl
474;
475; X64-LABEL: combine_pshufb_as_vzmovl_32:
476; X64: # BB#0:
477; X64-NEXT: vxorps %ymm1, %ymm1, %ymm1
478; X64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
479; X64-NEXT: retq
Simon Pilgrimd7a37822016-08-19 17:02:00 +0000480 %1 = bitcast <8 x float> %a0 to <32 x i8>
481 %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
482 %3 = bitcast <32 x i8> %2 to <8 x float>
483 ret <8 x float> %3
484}
485
Simon Pilgrim118da632016-07-06 15:09:48 +0000486define <32 x i8> @combine_pshufb_as_pslldq(<32 x i8> %a0) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000487; X32-LABEL: combine_pshufb_as_pslldq:
488; X32: # BB#0:
489; X32-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21]
490; X32-NEXT: retl
491;
492; X64-LABEL: combine_pshufb_as_pslldq:
493; X64: # BB#0:
494; X64-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21]
495; X64-NEXT: retq
Simon Pilgrim118da632016-07-06 15:09:48 +0000496 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>)
497 ret <32 x i8> %res0
498}
499
500define <32 x i8> @combine_pshufb_as_psrldq(<32 x i8> %a0) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000501; X32-LABEL: combine_pshufb_as_psrldq:
502; X32: # BB#0:
503; X32-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
504; X32-NEXT: retl
505;
506; X64-LABEL: combine_pshufb_as_psrldq:
507; X64: # BB#0:
508; X64-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
509; X64-NEXT: retq
Simon Pilgrim118da632016-07-06 15:09:48 +0000510 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
511 ret <32 x i8> %res0
512}
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000513
514define <32 x i8> @combine_pshufb_as_pshuflw(<32 x i8> %a0) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000515; X32-LABEL: combine_pshufb_as_pshuflw:
516; X32: # BB#0:
517; X32-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
518; X32-NEXT: retl
519;
520; X64-LABEL: combine_pshufb_as_pshuflw:
521; X64: # BB#0:
522; X64-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
523; X64-NEXT: retq
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000524 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
525 ret <32 x i8> %res0
526}
527
528define <32 x i8> @combine_pshufb_as_pshufhw(<32 x i8> %a0) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000529; X32-LABEL: combine_pshufb_as_pshufhw:
530; X32: # BB#0:
531; X32-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14]
532; X32-NEXT: retl
533;
534; X64-LABEL: combine_pshufb_as_pshufhw:
535; X64: # BB#0:
536; X64-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14]
537; X64-NEXT: retq
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000538 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
539 ret <32 x i8> %res0
540}
541
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000542define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000543; X32-LABEL: combine_pshufb_not_as_pshufw:
544; X32: # BB#0:
545; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29]
546; X32-NEXT: retl
547;
548; X64-LABEL: combine_pshufb_not_as_pshufw:
549; X64: # BB#0:
550; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29]
551; X64-NEXT: retq
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000552 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
553 %res1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %res0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
554 ret <32 x i8> %res1
555}
Simon Pilgrim06f85e42016-09-17 17:42:15 +0000556
557define <8 x i32> @constant_fold_permd() {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000558; X32-LABEL: constant_fold_permd:
559; X32: # BB#0:
560; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [4,6,2,1,7,1,5,0]
561; X32-NEXT: vpermd {{\.LCPI.*}}, %ymm0, %ymm0
562; X32-NEXT: retl
563;
564; X64-LABEL: constant_fold_permd:
565; X64: # BB#0:
566; X64-NEXT: vmovdqa {{.*#+}} ymm0 = [4,6,2,1,7,1,5,0]
567; X64-NEXT: vpermd {{.*}}(%rip), %ymm0, %ymm0
568; X64-NEXT: retq
Simon Pilgrim06f85e42016-09-17 17:42:15 +0000569 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32> <i32 4, i32 6, i32 2, i32 1, i32 7, i32 1, i32 5, i32 0>)
570 ret <8 x i32> %1
571}
572
573define <8 x float> @constant_fold_permps() {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000574; X32-LABEL: constant_fold_permps:
575; X32: # BB#0:
576; X32-NEXT: vmovaps {{.*#+}} ymm0 = [4,6,2,1,7,1,5,0]
577; X32-NEXT: vpermps {{\.LCPI.*}}, %ymm0, %ymm0
578; X32-NEXT: retl
579;
580; X64-LABEL: constant_fold_permps:
581; X64: # BB#0:
582; X64-NEXT: vmovaps {{.*#+}} ymm0 = [4,6,2,1,7,1,5,0]
583; X64-NEXT: vpermps {{.*}}(%rip), %ymm0, %ymm0
584; X64-NEXT: retq
Simon Pilgrim06f85e42016-09-17 17:42:15 +0000585 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, <8 x i32> <i32 4, i32 6, i32 2, i32 1, i32 7, i32 1, i32 5, i32 0>)
586 ret <8 x float> %1
587}
588
589define <32 x i8> @constant_fold_pshufb_256() {
Simon Pilgrim06bfabb2016-09-17 18:42:41 +0000590; X32-LABEL: constant_fold_pshufb_256:
591; X32: # BB#0:
592; X32-NEXT: vmovdqa {{.*#+}} ymm0 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
593; X32-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,zero,zero,ymm0[u,u],zero,zero,ymm0[15],zero,zero,zero,zero,zero,ymm0[7,6,17],zero,zero,zero,ymm0[u,u],zero,zero,ymm0[31],zero,zero,zero,zero,zero,ymm0[23,22]
594; X32-NEXT: retl
595;
596; X64-LABEL: constant_fold_pshufb_256:
597; X64: # BB#0:
598; X64-NEXT: vmovdqa {{.*#+}} ymm0 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0,255,254,253,252,251,250,249,248,247,246,245,244,243,242,241]
599; X64-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1],zero,zero,zero,ymm0[u,u],zero,zero,ymm0[15],zero,zero,zero,zero,zero,ymm0[7,6,17],zero,zero,zero,ymm0[u,u],zero,zero,ymm0[31],zero,zero,zero,zero,zero,ymm0[23,22]
600; X64-NEXT: retq
Simon Pilgrim06f85e42016-09-17 17:42:15 +0000601 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 -8, i8 -9, i8 -10, i8 -11, i8 -12, i8 -13, i8 -14, i8 -15>, <32 x i8> <i8 1, i8 -1, i8 -1, i8 -1, i8 undef, i8 undef, i8 -1, i8 -1, i8 15, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 7, i8 6, i8 1, i8 -1, i8 -1, i8 -1, i8 undef, i8 undef, i8 -1, i8 -1, i8 15, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 7, i8 6>)
602 ret <32 x i8> %1
603}