blob: 5137a12b5a3f5e89b4dd937119ceef6e0838ea6c [file] [log] [blame]
Ahmed Bougacha671795a2016-03-03 16:53:50 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s
3
4declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
5declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
Simon Pilgrimbec65432016-07-05 20:11:29 +00006declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
Simon Pilgrim21b2c562016-05-02 19:46:58 +00007declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
Ahmed Bougacha671795a2016-03-03 16:53:50 +00008
Simon Pilgrim8dd73e32016-06-11 13:18:21 +00009define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) {
10; CHECK-LABEL: combine_pshufb_pslldq:
11; CHECK: # BB#0:
Simon Pilgrim6800a452016-06-11 13:38:28 +000012; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000013; CHECK-NEXT: retq
14 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
15 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
16 ret <32 x i8> %2
17}
18
19define <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) {
20; CHECK-LABEL: combine_pshufb_psrldq:
21; CHECK: # BB#0:
Simon Pilgrim6800a452016-06-11 13:38:28 +000022; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000023; CHECK-NEXT: retq
24 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
25 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
26 ret <32 x i8> %2
27}
28
Ahmed Bougacha671795a2016-03-03 16:53:50 +000029define <32 x i8> @combine_pshufb_vpermd(<8 x i32> %a) {
30; CHECK-LABEL: combine_pshufb_vpermd:
31; CHECK: # BB#0:
32; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
33; CHECK-NEXT: retq
34 %tmp0 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
35 %tmp1 = bitcast <8 x i32> %tmp0 to <32 x i8>
36 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
37 ret <32 x i8> %tmp2
38}
39
40define <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) {
41; CHECK-LABEL: combine_pshufb_vpermps:
42; CHECK: # BB#0:
43; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
44; CHECK-NEXT: retq
45 %tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
46 %tmp1 = bitcast <8 x float> %tmp0 to <32 x i8>
47 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
48 ret <32 x i8> %tmp2
49}
Simon Pilgrim21b2c562016-05-02 19:46:58 +000050
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000051define <4 x i64> @combine_permq_pshufb_as_vperm2i128(<4 x i64> %a0) {
52; CHECK-LABEL: combine_permq_pshufb_as_vperm2i128:
Simon Pilgrim21b2c562016-05-02 19:46:58 +000053; CHECK: # BB#0:
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000054; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
55; CHECK-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim21b2c562016-05-02 19:46:58 +000056; CHECK-NEXT: retq
57 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
58 %2 = bitcast <4 x i64> %1 to <32 x i8>
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000059 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>)
Simon Pilgrim21b2c562016-05-02 19:46:58 +000060 %4 = bitcast <32 x i8> %3 to <4 x i64>
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000061 %5 = add <4 x i64> %4, <i64 1, i64 1, i64 3, i64 3>
62 ret <4 x i64> %5
Simon Pilgrim21b2c562016-05-02 19:46:58 +000063}
Simon Pilgrimbec65432016-07-05 20:11:29 +000064
Simon Pilgrim700e4a12016-07-14 12:21:40 +000065define <32 x i8> @combine_permq_pshufb_as_vpblendd(<4 x i64> %a0) {
66; CHECK-LABEL: combine_permq_pshufb_as_vpblendd:
67; CHECK: # BB#0:
Simon Pilgrim053d3292016-07-14 12:58:04 +000068; CHECK-NEXT: vpxor %ymm1, %ymm1, %ymm1
69; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
Simon Pilgrim700e4a12016-07-14 12:21:40 +000070; CHECK-NEXT: retq
71 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
72 %2 = bitcast <4 x i64> %1 to <32 x i8>
73 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>)
74 ret <32 x i8> %3
75}
76
Simon Pilgrimbec65432016-07-05 20:11:29 +000077define <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
78; CHECK-LABEL: combine_pshufb_as_vpbroadcastb128:
79; CHECK: # BB#0:
80; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
81; CHECK-NEXT: retq
82 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> zeroinitializer)
83 ret <16 x i8> %1
84}
85
86define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
87; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256:
88; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +000089; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +000090; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
91; CHECK-NEXT: retq
92 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
93 %2 = bitcast <4 x i64> %1 to <32 x i8>
94 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> zeroinitializer)
95 %4 = bitcast <32 x i8> %3 to <8 x i32>
96 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
97 %6 = bitcast <8 x i32> %5 to <32 x i8>
98 ret <32 x i8> %6
99}
100
101define <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) {
102; CHECK-LABEL: combine_pshufb_as_vpbroadcastw128:
103; CHECK: # BB#0:
104; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
105; CHECK-NEXT: retq
106 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
107 ret <16 x i8> %1
108}
109
110define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
111; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256:
112; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000113; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000114; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
115; CHECK-NEXT: retq
116 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
117 %2 = bitcast <4 x i64> %1 to <32 x i8>
118 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
119 %4 = bitcast <32 x i8> %3 to <8 x i32>
120 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
121 %6 = bitcast <8 x i32> %5 to <32 x i8>
122 ret <32 x i8> %6
123}
124
125define <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) {
126; CHECK-LABEL: combine_pshufb_as_vpbroadcastd128:
127; CHECK: # BB#0:
128; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0
129; CHECK-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
130; CHECK-NEXT: retq
131 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
132 %2 = add <16 x i8> %1, <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>
133 ret <16 x i8> %2
134}
135
136define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
137; CHECK-LABEL: combine_permd_as_vpbroadcastd256:
138; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000139; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000140; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
141; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
142; CHECK-NEXT: retq
143 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
144 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> zeroinitializer)
145 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
146 ret <8 x i32> %3
147}
148
149define <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) {
150; CHECK-LABEL: combine_pshufb_as_vpbroadcastq128:
151; CHECK: # BB#0:
152; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
153; CHECK-NEXT: retq
154 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
155 ret <16 x i8> %1
156}
157
158define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
159; CHECK-LABEL: combine_permd_as_vpbroadcastq256:
160; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000161; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000162; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
163; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
164; CHECK-NEXT: retq
165 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
166 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
167 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
168 ret <8 x i32> %3
169}
170
171define <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) {
172; CHECK-LABEL: combine_pshufb_as_vpbroadcastss128:
173; CHECK: # BB#0:
174; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
175; CHECK-NEXT: retq
176 %1 = bitcast <4 x float> %a to <16 x i8>
177 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
178 %3 = bitcast <16 x i8> %2 to <4 x float>
179 ret <4 x float> %3
180}
181
182define <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) {
183; CHECK-LABEL: combine_permd_as_vpbroadcastss256:
184; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000185; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000186; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
187; CHECK-NEXT: retq
188 %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
189 %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer)
190 ret <8 x float> %2
191}
192
193define <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) {
194; CHECK-LABEL: combine_permd_as_vpbroadcastsd256:
195; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000196; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000197; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
198; CHECK-NEXT: retq
199 %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
200 %2 = bitcast <4 x double> %1 to <8 x float>
201 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
202 %4 = bitcast <8 x float> %3 to <4 x double>
203 ret <4 x double> %4
204}
Simon Pilgrim118da632016-07-06 15:09:48 +0000205
Simon Pilgrim950419f2016-07-08 19:23:29 +0000206define <8 x i32> @combine_permd_as_permq(<8 x i32> %a) {
207; CHECK-LABEL: combine_permd_as_permq:
208; CHECK: # BB#0:
209; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1]
210; CHECK-NEXT: retq
211 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>)
212 ret <8 x i32> %1
213}
214
215define <8 x float> @combine_permps_as_permpd(<8 x float> %a) {
216; CHECK-LABEL: combine_permps_as_permpd:
217; CHECK: # BB#0:
218; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1]
219; CHECK-NEXT: retq
220 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>)
221 ret <8 x float> %1
222}
223
Simon Pilgrim118da632016-07-06 15:09:48 +0000224define <32 x i8> @combine_pshufb_as_pslldq(<32 x i8> %a0) {
225; CHECK-LABEL: combine_pshufb_as_pslldq:
226; CHECK: # BB#0:
227; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21]
228; CHECK-NEXT: retq
229 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>)
230 ret <32 x i8> %res0
231}
232
233define <32 x i8> @combine_pshufb_as_psrldq(<32 x i8> %a0) {
234; CHECK-LABEL: combine_pshufb_as_psrldq:
235; CHECK: # BB#0:
236; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
237; CHECK-NEXT: retq
238 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
239 ret <32 x i8> %res0
240}
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000241
242define <32 x i8> @combine_pshufb_as_pshuflw(<32 x i8> %a0) {
243; CHECK-LABEL: combine_pshufb_as_pshuflw:
244; CHECK: # BB#0:
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000245; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000246; CHECK-NEXT: retq
247 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
248 ret <32 x i8> %res0
249}
250
251define <32 x i8> @combine_pshufb_as_pshufhw(<32 x i8> %a0) {
252; CHECK-LABEL: combine_pshufb_as_pshufhw:
253; CHECK: # BB#0:
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000254; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14]
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000255; CHECK-NEXT: retq
256 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
257 ret <32 x i8> %res0
258}
259
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000260define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) {
261; CHECK-LABEL: combine_pshufb_not_as_pshufw:
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000262; CHECK: # BB#0:
263; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29]
264; CHECK-NEXT: retq
265 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
266 %res1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %res0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
267 ret <32 x i8> %res1
268}