blob: b087fc43ae36d26e12e694b0b50e5517fc927c3e [file] [log] [blame]
Ahmed Bougacha671795a2016-03-03 16:53:50 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s
3
4declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
5declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
Simon Pilgrimbec65432016-07-05 20:11:29 +00006declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
Simon Pilgrim21b2c562016-05-02 19:46:58 +00007declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
Ahmed Bougacha671795a2016-03-03 16:53:50 +00008
Simon Pilgrim8dd73e32016-06-11 13:18:21 +00009define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) {
10; CHECK-LABEL: combine_pshufb_pslldq:
11; CHECK: # BB#0:
Simon Pilgrim6800a452016-06-11 13:38:28 +000012; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000013; CHECK-NEXT: retq
14 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
15 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
16 ret <32 x i8> %2
17}
18
19define <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) {
20; CHECK-LABEL: combine_pshufb_psrldq:
21; CHECK: # BB#0:
Simon Pilgrim6800a452016-06-11 13:38:28 +000022; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000023; CHECK-NEXT: retq
24 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
25 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
26 ret <32 x i8> %2
27}
28
Ahmed Bougacha671795a2016-03-03 16:53:50 +000029define <32 x i8> @combine_pshufb_vpermd(<8 x i32> %a) {
30; CHECK-LABEL: combine_pshufb_vpermd:
31; CHECK: # BB#0:
32; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
33; CHECK-NEXT: retq
34 %tmp0 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
35 %tmp1 = bitcast <8 x i32> %tmp0 to <32 x i8>
36 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
37 ret <32 x i8> %tmp2
38}
39
40define <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) {
41; CHECK-LABEL: combine_pshufb_vpermps:
42; CHECK: # BB#0:
43; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
44; CHECK-NEXT: retq
45 %tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
46 %tmp1 = bitcast <8 x float> %tmp0 to <32 x i8>
47 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
48 ret <32 x i8> %tmp2
49}
Simon Pilgrim21b2c562016-05-02 19:46:58 +000050
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000051define <4 x i64> @combine_permq_pshufb_as_vperm2i128(<4 x i64> %a0) {
52; CHECK-LABEL: combine_permq_pshufb_as_vperm2i128:
Simon Pilgrim21b2c562016-05-02 19:46:58 +000053; CHECK: # BB#0:
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000054; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
55; CHECK-NEXT: vpaddq {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrim21b2c562016-05-02 19:46:58 +000056; CHECK-NEXT: retq
57 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
58 %2 = bitcast <4 x i64> %1 to <32 x i8>
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000059 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255, i8 255>)
Simon Pilgrim21b2c562016-05-02 19:46:58 +000060 %4 = bitcast <32 x i8> %3 to <4 x i64>
Simon Pilgrim6fa71da2016-07-12 20:27:32 +000061 %5 = add <4 x i64> %4, <i64 1, i64 1, i64 3, i64 3>
62 ret <4 x i64> %5
Simon Pilgrim21b2c562016-05-02 19:46:58 +000063}
Simon Pilgrimbec65432016-07-05 20:11:29 +000064
65define <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
66; CHECK-LABEL: combine_pshufb_as_vpbroadcastb128:
67; CHECK: # BB#0:
68; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
69; CHECK-NEXT: retq
70 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> zeroinitializer)
71 ret <16 x i8> %1
72}
73
74define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
75; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256:
76; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +000077; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +000078; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
79; CHECK-NEXT: retq
80 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
81 %2 = bitcast <4 x i64> %1 to <32 x i8>
82 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> zeroinitializer)
83 %4 = bitcast <32 x i8> %3 to <8 x i32>
84 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
85 %6 = bitcast <8 x i32> %5 to <32 x i8>
86 ret <32 x i8> %6
87}
88
89define <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) {
90; CHECK-LABEL: combine_pshufb_as_vpbroadcastw128:
91; CHECK: # BB#0:
92; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
93; CHECK-NEXT: retq
94 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
95 ret <16 x i8> %1
96}
97
98define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
99; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256:
100; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000101; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000102; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
103; CHECK-NEXT: retq
104 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
105 %2 = bitcast <4 x i64> %1 to <32 x i8>
106 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
107 %4 = bitcast <32 x i8> %3 to <8 x i32>
108 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
109 %6 = bitcast <8 x i32> %5 to <32 x i8>
110 ret <32 x i8> %6
111}
112
113define <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) {
114; CHECK-LABEL: combine_pshufb_as_vpbroadcastd128:
115; CHECK: # BB#0:
116; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0
117; CHECK-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
118; CHECK-NEXT: retq
119 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
120 %2 = add <16 x i8> %1, <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>
121 ret <16 x i8> %2
122}
123
124define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
125; CHECK-LABEL: combine_permd_as_vpbroadcastd256:
126; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000127; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000128; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
129; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
130; CHECK-NEXT: retq
131 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
132 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> zeroinitializer)
133 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
134 ret <8 x i32> %3
135}
136
137define <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) {
138; CHECK-LABEL: combine_pshufb_as_vpbroadcastq128:
139; CHECK: # BB#0:
140; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
141; CHECK-NEXT: retq
142 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
143 ret <16 x i8> %1
144}
145
146define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
147; CHECK-LABEL: combine_permd_as_vpbroadcastq256:
148; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000149; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000150; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
151; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
152; CHECK-NEXT: retq
153 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
154 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
155 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
156 ret <8 x i32> %3
157}
158
159define <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) {
160; CHECK-LABEL: combine_pshufb_as_vpbroadcastss128:
161; CHECK: # BB#0:
162; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
163; CHECK-NEXT: retq
164 %1 = bitcast <4 x float> %a to <16 x i8>
165 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
166 %3 = bitcast <16 x i8> %2 to <4 x float>
167 ret <4 x float> %3
168}
169
170define <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) {
171; CHECK-LABEL: combine_permd_as_vpbroadcastss256:
172; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000173; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000174; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
175; CHECK-NEXT: retq
176 %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
177 %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer)
178 ret <8 x float> %2
179}
180
181define <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) {
182; CHECK-LABEL: combine_permd_as_vpbroadcastsd256:
183; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000184; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000185; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
186; CHECK-NEXT: retq
187 %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
188 %2 = bitcast <4 x double> %1 to <8 x float>
189 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
190 %4 = bitcast <8 x float> %3 to <4 x double>
191 ret <4 x double> %4
192}
Simon Pilgrim118da632016-07-06 15:09:48 +0000193
Simon Pilgrim950419f2016-07-08 19:23:29 +0000194define <8 x i32> @combine_permd_as_permq(<8 x i32> %a) {
195; CHECK-LABEL: combine_permd_as_permq:
196; CHECK: # BB#0:
197; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1]
198; CHECK-NEXT: retq
199 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>)
200 ret <8 x i32> %1
201}
202
203define <8 x float> @combine_permps_as_permpd(<8 x float> %a) {
204; CHECK-LABEL: combine_permps_as_permpd:
205; CHECK: # BB#0:
206; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1]
207; CHECK-NEXT: retq
208 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>)
209 ret <8 x float> %1
210}
211
Simon Pilgrim118da632016-07-06 15:09:48 +0000212define <32 x i8> @combine_pshufb_as_pslldq(<32 x i8> %a0) {
213; CHECK-LABEL: combine_pshufb_as_pslldq:
214; CHECK: # BB#0:
215; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21]
216; CHECK-NEXT: retq
217 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>)
218 ret <32 x i8> %res0
219}
220
221define <32 x i8> @combine_pshufb_as_psrldq(<32 x i8> %a0) {
222; CHECK-LABEL: combine_pshufb_as_psrldq:
223; CHECK: # BB#0:
224; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
225; CHECK-NEXT: retq
226 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
227 ret <32 x i8> %res0
228}
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000229
230define <32 x i8> @combine_pshufb_as_pshuflw(<32 x i8> %a0) {
231; CHECK-LABEL: combine_pshufb_as_pshuflw:
232; CHECK: # BB#0:
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000233; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000234; CHECK-NEXT: retq
235 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
236 ret <32 x i8> %res0
237}
238
239define <32 x i8> @combine_pshufb_as_pshufhw(<32 x i8> %a0) {
240; CHECK-LABEL: combine_pshufb_as_pshufhw:
241; CHECK: # BB#0:
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000242; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14]
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000243; CHECK-NEXT: retq
244 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
245 ret <32 x i8> %res0
246}
247
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000248define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) {
249; CHECK-LABEL: combine_pshufb_not_as_pshufw:
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000250; CHECK: # BB#0:
251; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29]
252; CHECK-NEXT: retq
253 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
254 %res1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %res0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
255 ret <32 x i8> %res1
256}