blob: 1a77785ab6608cf39912c5a5543ea05b7704f36b [file] [log] [blame]
Ahmed Bougacha671795a2016-03-03 16:53:50 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s
3
4declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
5declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
Simon Pilgrimbec65432016-07-05 20:11:29 +00006declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
Simon Pilgrim21b2c562016-05-02 19:46:58 +00007declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
Ahmed Bougacha671795a2016-03-03 16:53:50 +00008
Simon Pilgrim8dd73e32016-06-11 13:18:21 +00009define <32 x i8> @combine_pshufb_pslldq(<32 x i8> %a0) {
10; CHECK-LABEL: combine_pshufb_pslldq:
11; CHECK: # BB#0:
Simon Pilgrim6800a452016-06-11 13:38:28 +000012; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000013; CHECK-NEXT: retq
14 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
15 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
16 ret <32 x i8> %2
17}
18
19define <32 x i8> @combine_pshufb_psrldq(<32 x i8> %a0) {
20; CHECK-LABEL: combine_pshufb_psrldq:
21; CHECK: # BB#0:
Simon Pilgrim6800a452016-06-11 13:38:28 +000022; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
Simon Pilgrim8dd73e32016-06-11 13:18:21 +000023; CHECK-NEXT: retq
24 %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
25 %2 = shufflevector <32 x i8> %1, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
26 ret <32 x i8> %2
27}
28
Ahmed Bougacha671795a2016-03-03 16:53:50 +000029define <32 x i8> @combine_pshufb_vpermd(<8 x i32> %a) {
30; CHECK-LABEL: combine_pshufb_vpermd:
31; CHECK: # BB#0:
32; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
33; CHECK-NEXT: retq
34 %tmp0 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
35 %tmp1 = bitcast <8 x i32> %tmp0 to <32 x i8>
36 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
37 ret <32 x i8> %tmp2
38}
39
40define <32 x i8> @combine_pshufb_vpermps(<8 x float> %a) {
41; CHECK-LABEL: combine_pshufb_vpermps:
42; CHECK: # BB#0:
43; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,16,17,18,18]
44; CHECK-NEXT: retq
45 %tmp0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>)
46 %tmp1 = bitcast <8 x float> %tmp0 to <32 x i8>
47 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 30>
48 ret <32 x i8> %tmp2
49}
Simon Pilgrim21b2c562016-05-02 19:46:58 +000050
51define <4 x i64> @combine_permq_pshufb(<4 x i64> %a0) {
52; CHECK-LABEL: combine_permq_pshufb:
53; CHECK: # BB#0:
54; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
Simon Pilgrimc15d2172016-06-28 08:08:15 +000055; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
Simon Pilgrim21b2c562016-05-02 19:46:58 +000056; CHECK-NEXT: retq
57 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
58 %2 = bitcast <4 x i64> %1 to <32 x i8>
59 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
60 %4 = bitcast <32 x i8> %3 to <4 x i64>
61 ret <4 x i64> %4
62}
Simon Pilgrimbec65432016-07-05 20:11:29 +000063
64define <16 x i8> @combine_pshufb_as_vpbroadcastb128(<16 x i8> %a) {
65; CHECK-LABEL: combine_pshufb_as_vpbroadcastb128:
66; CHECK: # BB#0:
67; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
68; CHECK-NEXT: retq
69 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> zeroinitializer)
70 ret <16 x i8> %1
71}
72
73define <32 x i8> @combine_pshufb_as_vpbroadcastb256(<2 x i64> %a) {
74; CHECK-LABEL: combine_pshufb_as_vpbroadcastb256:
75; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +000076; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +000077; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
78; CHECK-NEXT: retq
79 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
80 %2 = bitcast <4 x i64> %1 to <32 x i8>
81 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> zeroinitializer)
82 %4 = bitcast <32 x i8> %3 to <8 x i32>
83 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
84 %6 = bitcast <8 x i32> %5 to <32 x i8>
85 ret <32 x i8> %6
86}
87
88define <16 x i8> @combine_pshufb_as_vpbroadcastw128(<16 x i8> %a) {
89; CHECK-LABEL: combine_pshufb_as_vpbroadcastw128:
90; CHECK: # BB#0:
91; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
92; CHECK-NEXT: retq
93 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
94 ret <16 x i8> %1
95}
96
97define <32 x i8> @combine_pshufb_as_vpbroadcastw256(<2 x i64> %a) {
98; CHECK-LABEL: combine_pshufb_as_vpbroadcastw256:
99; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000100; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000101; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
102; CHECK-NEXT: retq
103 %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
104 %2 = bitcast <4 x i64> %1 to <32 x i8>
105 %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %2, <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
106 %4 = bitcast <32 x i8> %3 to <8 x i32>
107 %5 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %4, <8 x i32> zeroinitializer)
108 %6 = bitcast <8 x i32> %5 to <32 x i8>
109 ret <32 x i8> %6
110}
111
112define <16 x i8> @combine_pshufb_as_vpbroadcastd128(<16 x i8> %a) {
113; CHECK-LABEL: combine_pshufb_as_vpbroadcastd128:
114; CHECK: # BB#0:
115; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0
116; CHECK-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
117; CHECK-NEXT: retq
118 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
119 %2 = add <16 x i8> %1, <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>
120 ret <16 x i8> %2
121}
122
123define <8 x i32> @combine_permd_as_vpbroadcastd256(<4 x i32> %a) {
124; CHECK-LABEL: combine_permd_as_vpbroadcastd256:
125; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000126; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000127; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0
128; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
129; CHECK-NEXT: retq
130 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
131 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> zeroinitializer)
132 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
133 ret <8 x i32> %3
134}
135
136define <16 x i8> @combine_pshufb_as_vpbroadcastq128(<16 x i8> %a) {
137; CHECK-LABEL: combine_pshufb_as_vpbroadcastq128:
138; CHECK: # BB#0:
139; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
140; CHECK-NEXT: retq
141 %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
142 ret <16 x i8> %1
143}
144
145define <8 x i32> @combine_permd_as_vpbroadcastq256(<4 x i32> %a) {
146; CHECK-LABEL: combine_permd_as_vpbroadcastq256:
147; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000148; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000149; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
150; CHECK-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0
151; CHECK-NEXT: retq
152 %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
153 %2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
154 %3 = add <8 x i32> %2, <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
155 ret <8 x i32> %3
156}
157
158define <4 x float> @combine_pshufb_as_vpbroadcastss128(<4 x float> %a) {
159; CHECK-LABEL: combine_pshufb_as_vpbroadcastss128:
160; CHECK: # BB#0:
161; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
162; CHECK-NEXT: retq
163 %1 = bitcast <4 x float> %a to <16 x i8>
164 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>)
165 %3 = bitcast <16 x i8> %2 to <4 x float>
166 ret <4 x float> %3
167}
168
169define <8 x float> @combine_permd_as_vpbroadcastss256(<4 x float> %a) {
170; CHECK-LABEL: combine_permd_as_vpbroadcastss256:
171; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000172; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000173; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
174; CHECK-NEXT: retq
175 %1 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
176 %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %1, <8 x i32> zeroinitializer)
177 ret <8 x float> %2
178}
179
180define <4 x double> @combine_permd_as_vpbroadcastsd256(<2 x double> %a) {
181; CHECK-LABEL: combine_permd_as_vpbroadcastsd256:
182; CHECK: # BB#0:
Matthias Braun152e7c82016-07-09 00:19:07 +0000183; CHECK-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
Simon Pilgrimbec65432016-07-05 20:11:29 +0000184; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
185; CHECK-NEXT: retq
186 %1 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
187 %2 = bitcast <4 x double> %1 to <8 x float>
188 %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>)
189 %4 = bitcast <8 x float> %3 to <4 x double>
190 ret <4 x double> %4
191}
Simon Pilgrim118da632016-07-06 15:09:48 +0000192
Simon Pilgrim950419f2016-07-08 19:23:29 +0000193define <8 x i32> @combine_permd_as_permq(<8 x i32> %a) {
194; CHECK-LABEL: combine_permd_as_permq:
195; CHECK: # BB#0:
196; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,1]
197; CHECK-NEXT: retq
198 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>)
199 ret <8 x i32> %1
200}
201
202define <8 x float> @combine_permps_as_permpd(<8 x float> %a) {
203; CHECK-LABEL: combine_permps_as_permpd:
204; CHECK: # BB#0:
205; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,0,1]
206; CHECK-NEXT: retq
207 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 0, i32 1, i32 2, i32 3>)
208 ret <8 x float> %1
209}
210
Simon Pilgrim118da632016-07-06 15:09:48 +0000211define <32 x i8> @combine_pshufb_as_pslldq(<32 x i8> %a0) {
212; CHECK-LABEL: combine_pshufb_as_pslldq:
213; CHECK: # BB#0:
214; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21]
215; CHECK-NEXT: retq
216 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5>)
217 ret <32 x i8> %res0
218}
219
220define <32 x i8> @combine_pshufb_as_psrldq(<32 x i8> %a0) {
221; CHECK-LABEL: combine_pshufb_as_psrldq:
222; CHECK: # BB#0:
223; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
224; CHECK-NEXT: retq
225 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 15, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128, i8 128>)
226 ret <32 x i8> %res0
227}
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000228
229define <32 x i8> @combine_pshufb_as_pshuflw(<32 x i8> %a0) {
230; CHECK-LABEL: combine_pshufb_as_pshuflw:
231; CHECK: # BB#0:
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000232; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000233; CHECK-NEXT: retq
234 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
235 ret <32 x i8> %res0
236}
237
238define <32 x i8> @combine_pshufb_as_pshufhw(<32 x i8> %a0) {
239; CHECK-LABEL: combine_pshufb_as_pshufhw:
240; CHECK: # BB#0:
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000241; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14]
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000242; CHECK-NEXT: retq
243 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
244 ret <32 x i8> %res0
245}
246
Simon Pilgrim2191faa2016-07-10 21:02:47 +0000247define <32 x i8> @combine_pshufb_not_as_pshufw(<32 x i8> %a0) {
248; CHECK-LABEL: combine_pshufb_not_as_pshufw:
Simon Pilgrim51c786b2016-07-10 20:19:56 +0000249; CHECK: # BB#0:
250; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13,18,19,16,17,22,23,20,21,26,27,24,25,30,31,28,29]
251; CHECK-NEXT: retq
252 %res0 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> <i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 2, i8 3, i8 0, i8 1, i8 6, i8 7, i8 4, i8 5, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
253 %res1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %res0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 10, i8 11, i8 8, i8 9, i8 14, i8 15, i8 12, i8 13>)
254 ret <32 x i8> %res1
255}