blob: 9340b3f6324dcb8667999b1949c5c767fef101a6 [file] [log] [blame]
Ahmed Bougacha671795a2016-03-03 16:53:50 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s
3
4declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
5declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
Simon Pilgrimbec65432016-07-05 20:11:29 +00006declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
Simon Pilgrim21b2c562016-05-02 19:46:58 +00007declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
Ahmed Bougacha671795a2016-03-03 16:53:50 +00008
Simon Pilgrim8dd73e32016-06-11 13:18:21 +00009define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) {
10; CHECK-LABEL: combine_pshufb_pslldq:
11; CHECK: # BB#0:
Simon Pilgrim6800a452016-06-11 13:38:28 +000012; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000013; CHECK-NEXT: retq
14 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
15 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
16 ret <32 x i8> %2
17}
18
19define <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) {
20; CHECK-LABEL: combine_pshufb_psrldq:
21; CHECK: # BB#0:
Simon Pilgrim6800a452016-06-11 13:38:28 +000022; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000023; CHECK-NEXT: retq
24 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
25 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
26 ret <32 x i8> %2
27}
28
Ahmed Bougacha671795a2016-03-03 16:53:50 +000029define <32 x i8> @combine_pshufb_vpermd(<8 x i32> %a) {
30; CHECK-LABEL: combine_pshufb_vpermd:
31; CHECK: # BB#0:
32; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
33; CHECK-NEXT: retq
34 %tmp0 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
35 %tmp1 = bitcast <8 x i32> %tmp0 to <32 x i8>
36 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
37 ret <32 x i8> %tmp2
38}
39
40define <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) {
41; CHECK-LABEL: combine_pshufb_vpermps:
42; CHECK: # BB#0:
43; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
44; CHECK-NEXT: retq
45 %tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
46 %tmp1 = bitcast <8 x float> %tmp0 to <32 x i8>
47 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
48 ret <32 x i8> %tmp2
49}
Simon Pilgrim21b2c562016-05-02 19:46:58 +000050
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000051define <4 x i64> @combine_permq_pshufb_as_vperm2i128(<4 x i64> %a0) {
52; CHECK-LABEL: combine_permq_pshufb_as_vperm2i128:
Simon Pilgrim21b2c562016-05-02 19:46:58 +000053; CHECK: # BB#0:
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000054; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
55; CHECK-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim21b2c562016-05-02 19:46:58 +000056; CHECK-NEXT: retq
57 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
58 %2 = bitcast <4 x i64> %1 to <32 x i8>
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000059 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>)
Simon Pilgrim21b2c562016-05-02 19:46:58 +000060 %4 = bitcast <32 x i8> %3 to <4 x i64>
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000061 %5 = add <4 x i64> %4, <i64 1, i64 1, i64 3, i64 3>
62 ret <4 x i64> %5
Simon Pilgrim21b2c562016-05-02 19:46:58 +000063}
Simon Pilgrimbec65432016-07-05 20:11:29 +000064
Simon Pilgrim700e4a12016-07-14 12:21:40 +000065define <32 x i8> @combine_permq_pshufb_as_vpblendd(<4 x i64> %a0) {
66; CHECK-LABEL: combine_permq_pshufb_as_vpblendd:
67; CHECK: # BB#0:
Simon Pilgrim053d3292016-07-14 12:58:04 +000068; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1
69; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
Simon Pilgrim700e4a12016-07-14 12:21:40 +000070; CHECK-NEXT: retq
71 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
72 %2 = bitcast <4 x i64> %1 to <32 x i8>
73 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>)
74 ret <32 x i8> %3
75}
76
Simon Pilgrimbec65432016-07-05 20:11:29 +000077define <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
78; CHECK-LABEL: combine_pshufb_as_vpbroadcastb128:
79; CHECK: # BB#0:
80; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
81; CHECK-NEXT: retq
82 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> zeroinitializer)
83 ret <16 x i8> %1
84}
85
86define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
87; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256:
88; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +000089; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +000090; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
91; CHECK-NEXT: retq
92 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
93 %2 = bitcast <4 x i64> %1 to <32 x i8>
94 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> zeroinitializer)
95 %4 = bitcast <32 x i8> %3 to <8 x i32>
96 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
97 %6 = bitcast <8 x i32> %5 to <32 x i8>
98 ret <32 x i8> %6
99}
100
101define <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) {
102; CHECK-LABEL: combine_pshufb_as_vpbroadcastw128:
103; CHECK: # BB#0:
104; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
105; CHECK-NEXT: retq
106 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
107 ret <16 x i8> %1
108}
109
110define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
111; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256:
112; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000113; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000114; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
115; CHECK-NEXT: retq
116 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
117 %2 = bitcast <4 x i64> %1 to <32 x i8>
118 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
119 %4 = bitcast <32 x i8> %3 to <8 x i32>
120 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
121 %6 = bitcast <8 x i32> %5 to <32 x i8>
122 ret <32 x i8> %6
123}
124
125define <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) {
126; CHECK-LABEL: combine_pshufb_as_vpbroadcastd128:
127; CHECK: # BB#0:
128; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0
129; CHECK-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
130; CHECK-NEXT: retq
131 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
132 %2 = add <16 x i8> %1, <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>
133 ret <16 x i8> %2
134}
135
136define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
137; CHECK-LABEL: combine_permd_as_vpbroadcastd256:
138; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000139; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000140; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
141; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
142; CHECK-NEXT: retq
143 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
144 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> zeroinitializer)
145 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
146 ret <8 x i32> %3
147}
148
149define <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) {
150; CHECK-LABEL: combine_pshufb_as_vpbroadcastq128:
151; CHECK: # BB#0:
152; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
153; CHECK-NEXT: retq
154 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
155 ret <16 x i8> %1
156}
157
158define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
159; CHECK-LABEL: combine_permd_as_vpbroadcastq256:
160; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000161; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000162; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
163; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
164; CHECK-NEXT: retq
165 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
166 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
167 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
168 ret <8 x i32> %3
169}
170
171define <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) {
172; CHECK-LABEL: combine_pshufb_as_vpbroadcastss128:
173; CHECK: # BB#0:
174; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
175; CHECK-NEXT: retq
176 %1 = bitcast <4 x float> %a to <16 x i8>
177 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
178 %3 = bitcast <16 x i8> %2 to <4 x float>
179 ret <4 x float> %3
180}
181
182define <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) {
183; CHECK-LABEL: combine_permd_as_vpbroadcastss256:
184; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000185; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000186; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
187; CHECK-NEXT: retq
188 %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
189 %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer)
190 ret <8 x float> %2
191}
192
193define <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) {
194; CHECK-LABEL: combine_permd_as_vpbroadcastsd256:
195; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000196; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000197; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
198; CHECK-NEXT: retq
199 %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
200 %2 = bitcast <4 x double> %1 to <8 x float>
201 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
202 %4 = bitcast <8 x float> %3 to <4 x double>
203 ret <4 x double> %4
204}
Simon Pilgrim118da632016-07-06 15:09:48 +0000205
Simon Pilgrim4ac74202016-07-18 16:17:34 +0000206define <16 x i8> @combine_vpbroadcast_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
207; CHECK-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastb128:
208; CHECK: # BB#0:
209; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
Simon Pilgrim4ac74202016-07-18 16:17:34 +0000210; CHECK-NEXT: retq
211 %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
212 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> zeroinitializer)
213 ret <16 x i8> %2
214}
215
216define <32 x i8> @combine_vpbroadcast_pshufb_as_vpbroadcastb256(<32 x i8> %a) {
217; CHECK-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastb256:
218; CHECK: # BB#0:
219; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
Simon Pilgrim4ac74202016-07-18 16:17:34 +0000220; CHECK-NEXT: retq
221 %1 = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
222 %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> zeroinitializer)
223 ret <32 x i8> %2
224}
225
226define <4 x float> @combine_vpbroadcast_pshufb_as_vpbroadcastss128(<4 x float> %a) {
227; CHECK-LABEL: combine_vpbroadcast_pshufb_as_vpbroadcastss128:
228; CHECK: # BB#0:
229; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
Simon Pilgrim4ac74202016-07-18 16:17:34 +0000230; CHECK-NEXT: retq
231 %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
232 %2 = bitcast <4 x float> %1 to <16 x i8>
233 %3 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %2, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
234 %4 = bitcast <16 x i8> %3 to <4 x float>
235 ret <4 x float> %4
236}
237
238define <8 x float> @combine_vpbroadcast_permd_as_vpbroadcastss256(<4 x float> %a) {
239; CHECK-LABEL: combine_vpbroadcast_permd_as_vpbroadcastss256:
240; CHECK: # BB#0:
241; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
242; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
243; CHECK-NEXT: retq
244 %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> zeroinitializer
245 %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer)
246 ret <8 x float> %2
247}
248
249define <4 x double> @combine_vpbroadcast_permd_as_vpbroadcastsd256(<2 x double> %a) {
250; CHECK-LABEL: combine_vpbroadcast_permd_as_vpbroadcastsd256:
251; CHECK: # BB#0:
252; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
253; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
254; CHECK-NEXT: retq
255 %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> zeroinitializer
256 %2 = bitcast <4 x double> %1 to <8 x float>
257 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
258 %4 = bitcast <8 x float> %3 to <4 x double>
259 ret <4 x double> %4
260}
261
Simon Pilgrim950419f2016-07-08 19:23:29 +0000262define <8 x i32> @combine_permd_as_permq(<8 x i32> %a) {
263; CHECK-LABEL: combine_permd_as_permq:
264; CHECK: # BB#0:
265; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1]
266; CHECK-NEXT: retq
267 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>)
268 ret <8 x i32> %1
269}
270
271define <8 x float> @combine_permps_as_permpd(<8 x float> %a) {
272; CHECK-LABEL: combine_permps_as_permpd:
273; CHECK: # BB#0:
274; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1]
275; CHECK-NEXT: retq
276 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>)
277 ret <8 x float> %1
278}
279
Simon Pilgrimd7a37822016-08-19 17:02:00 +0000280define <4 x double> @combine_pshufb_as_vzmovl_64(<4 x double> %a0) {
281; CHECK-LABEL: combine_pshufb_as_vzmovl_64:
282; CHECK: # BB#0:
283; CHECK-NEXT: vxorpd %ymm1, %ymm1, %ymm1
284; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
285; CHECK-NEXT: retq
286 %1 = bitcast <4 x double> %a0 to <32 x i8>
287 %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
288 %3 = bitcast <32 x i8> %2 to <4 x double>
289 ret <4 x double> %3
290}
291
292define <8 x float> @combine_pshufb_as_vzmovl_32(<8 x float> %a0) {
293; CHECK-LABEL: combine_pshufb_as_vzmovl_32:
294; CHECK: # BB#0:
295; CHECK-NEXT: vxorps %ymm1, %ymm1, %ymm1
296; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
297; CHECK-NEXT: retq
298 %1 = bitcast <8 x float> %a0 to <32 x i8>
299 %2 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
300 %3 = bitcast <32 x i8> %2 to <8 x float>
301 ret <8 x float> %3
302}
303
Simon Pilgrim118da632016-07-06 15:09:48 +0000304define <32 x i8> @combine_pshufb_as_pslldq(<32 x i8> %a0) {
305; CHECK-LABEL: combine_pshufb_as_pslldq:
306; CHECK: # BB#0:
Simon Pilgrim687d71e2016-08-12 11:24:34 +0000307; CHECK-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21]
Simon Pilgrim118da632016-07-06 15:09:48 +0000308; CHECK-NEXT: retq
309 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>)
310 ret <32 x i8> %res0
311}
312
313define <32 x i8> @combine_pshufb_as_psrldq(<32 x i8> %a0) {
314; CHECK-LABEL: combine_pshufb_as_psrldq:
315; CHECK: # BB#0:
Simon Pilgrim687d71e2016-08-12 11:24:34 +0000316; CHECK-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
Simon Pilgrim118da632016-07-06 15:09:48 +0000317; CHECK-NEXT: retq
318 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
319 ret <32 x i8> %res0
320}
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000321
322define <32 x i8> @combine_pshufb_as_pshuflw(<32 x i8> %a0) {
323; CHECK-LABEL: combine_pshufb_as_pshuflw:
324; CHECK: # BB#0:
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000325; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000326; CHECK-NEXT: retq
327 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
328 ret <32 x i8> %res0
329}
330
331define <32 x i8> @combine_pshufb_as_pshufhw(<32 x i8> %a0) {
332; CHECK-LABEL: combine_pshufb_as_pshufhw:
333; CHECK: # BB#0:
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000334; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14]
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000335; CHECK-NEXT: retq
336 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
337 ret <32 x i8> %res0
338}
339
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000340define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) {
341; CHECK-LABEL: combine_pshufb_not_as_pshufw:
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000342; CHECK: # BB#0:
343; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29]
344; CHECK-NEXT: retq
345 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
346 %res1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %res0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
347 ret <32 x i8> %res1
348}