blob: afe04589f15efa9dd23fb23d7c0cb447c4226721 [file] [log] [blame]
Chandler Carruth44deb802014-09-22 20:25:08 +00001; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
3
4target triple = "x86_64-unknown-unknown"
5
6define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
7; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
8; AVX1: # BB#0:
9; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
10; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
11; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
12; AVX1-NEXT: retq
13;
14; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
15; AVX2: # BB#0:
16; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
17; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
18; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
19; AVX2-NEXT: retq
20 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
21 ret <32 x i8> %shuffle
22}
23
24define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<32 x i8> %a, <32 x i8> %b) {
25; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00
26; AVX1: # BB#0:
27; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
28; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
29; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
30; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
31; AVX1-NEXT: retq
32;
33; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00
34; AVX2: # BB#0:
35; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
36; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
37; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
38; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
39; AVX2-NEXT: retq
40 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
41 ret <32 x i8> %shuffle
42}
43
44define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<32 x i8> %a, <32 x i8> %b) {
45; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00
46; AVX1: # BB#0:
47; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
48; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
49; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
50; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
51; AVX1-NEXT: retq
52;
53; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00
54; AVX2: # BB#0:
55; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
56; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
57; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
58; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
59; AVX2-NEXT: retq
60 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
61 ret <32 x i8> %shuffle
62}
63
64define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<32 x i8> %a, <32 x i8> %b) {
65; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00
66; AVX1: # BB#0:
67; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
68; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
69; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
70; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
71; AVX1-NEXT: retq
72;
73; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00
74; AVX2: # BB#0:
75; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
76; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
77; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
78; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
79; AVX2-NEXT: retq
80 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
81 ret <32 x i8> %shuffle
82}
83
84define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
85; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00
86; AVX1: # BB#0:
87; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
88; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
89; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
90; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
91; AVX1-NEXT: retq
92;
93; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00
94; AVX2: # BB#0:
95; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
96; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
97; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
98; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
99; AVX2-NEXT: retq
100 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
101 ret <32 x i8> %shuffle
102}
103
104define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
105; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00
106; AVX1: # BB#0:
107; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
108; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
109; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
110; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
111; AVX1-NEXT: retq
112;
113; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00
114; AVX2: # BB#0:
115; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
116; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
117; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
118; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
119; AVX2-NEXT: retq
120 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
121 ret <32 x i8> %shuffle
122}
123
124define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
125; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00
126; AVX1: # BB#0:
127; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
128; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
129; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
130; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
131; AVX1-NEXT: retq
132;
133; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00
134; AVX2: # BB#0:
135; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
136; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
137; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
138; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
139; AVX2-NEXT: retq
140 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
141 ret <32 x i8> %shuffle
142}
143
144define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
145; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00
146; AVX1: # BB#0:
147; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
148; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
149; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
150; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
151; AVX1-NEXT: retq
152;
153; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00
154; AVX2: # BB#0:
155; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
156; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
157; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
158; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
159; AVX2-NEXT: retq
160 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
161 ret <32 x i8> %shuffle
162}
163
164define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
165; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00
166; AVX1: # BB#0:
167; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
168; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
169; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
170; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
171; AVX1-NEXT: retq
172;
173; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00
174; AVX2: # BB#0:
175; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
176; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
177; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
178; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
179; AVX2-NEXT: retq
180 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
181 ret <32 x i8> %shuffle
182}
183
184define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
185; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00
186; AVX1: # BB#0:
187; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
188; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
189; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
190; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
191; AVX1-NEXT: retq
192;
193; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00
194; AVX2: # BB#0:
195; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
196; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
197; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
198; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
199; AVX2-NEXT: retq
200 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
201 ret <32 x i8> %shuffle
202}
203
204define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
205; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00
206; AVX1: # BB#0:
207; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
208; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
209; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
210; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
211; AVX1-NEXT: retq
212;
213; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00
214; AVX2: # BB#0:
215; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
216; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
217; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
218; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
219; AVX2-NEXT: retq
220 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
221 ret <32 x i8> %shuffle
222}
223
224define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
225; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00
226; AVX1: # BB#0:
227; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
228; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
229; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
230; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
231; AVX1-NEXT: retq
232;
233; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00
234; AVX2: # BB#0:
235; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
236; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
237; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
238; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
239; AVX2-NEXT: retq
240 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
241 ret <32 x i8> %shuffle
242}
243
244define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
245; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00
246; AVX1: # BB#0:
247; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
248; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
249; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
250; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
251; AVX1-NEXT: retq
252;
253; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00
254; AVX2: # BB#0:
255; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
256; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
257; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
258; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
259; AVX2-NEXT: retq
260 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
261 ret <32 x i8> %shuffle
262}
263
264define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
265; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00
266; AVX1: # BB#0:
267; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
268; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
269; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
270; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
271; AVX1-NEXT: retq
272;
273; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00
274; AVX2: # BB#0:
275; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
276; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
277; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
278; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
279; AVX2-NEXT: retq
280 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
281 ret <32 x i8> %shuffle
282}
283
284define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
285; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00
286; AVX1: # BB#0:
287; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
288; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
289; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
290; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
291; AVX1-NEXT: retq
292;
293; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00
294; AVX2: # BB#0:
295; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
296; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
297; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
298; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
299; AVX2-NEXT: retq
300 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
301 ret <32 x i8> %shuffle
302}
303
304define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
305; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
306; AVX1: # BB#0:
307; AVX1-NEXT: movl $15, %eax
308; AVX1-NEXT: vmovd %eax, %xmm1
309; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
310; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
311; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
312; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
313; AVX1-NEXT: retq
314;
315; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
316; AVX2: # BB#0:
317; AVX2-NEXT: movl $15, %eax
318; AVX2-NEXT: vmovd %eax, %xmm1
319; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm1
320; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
321; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
322; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
323; AVX2-NEXT: retq
324 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
325 ret <32 x i8> %shuffle
326}
327
328define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
329; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
330; AVX1: # BB#0:
331; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
332; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
333; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
334; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
335; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],zero
336; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
337; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
338; AVX1-NEXT: retq
339;
340; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
341; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000342; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
343; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm2
344; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
345; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
346; AVX2-NEXT: vmovdqa {{.*}} # ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
347; AVX2-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000348; AVX2-NEXT: retq
349 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
350 ret <32 x i8> %shuffle
351}
352
353define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
354; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
355; AVX1: # BB#0:
356; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
357; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
358; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
359; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero
360; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0]
361; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
362; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
363; AVX1-NEXT: retq
364;
365; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
366; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000367; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
368; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
369; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
370; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,1,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
371; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,0,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
372; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000373; AVX2-NEXT: retq
374 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
375 ret <32 x i8> %shuffle
376}
377
378define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
379; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
380; AVX1: # BB#0:
381; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
382; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
383; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
384; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[2],zero,zero
385; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0]
386; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
387; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
388; AVX1-NEXT: retq
389;
390; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
391; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000392; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
393; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
394; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
395; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,2,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
396; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,0,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
397; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000398; AVX2-NEXT: retq
399 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
400 ret <32 x i8> %shuffle
401}
402
403define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
404; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
405; AVX1: # BB#0:
406; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
407; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
408; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
409; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[3],zero,zero,zero
410; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0]
411; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
412; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
413; AVX1-NEXT: retq
414;
415; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
416; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000417; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
418; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
419; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
420; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,3,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
421; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,0,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
422; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000423; AVX2-NEXT: retq
424 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
425 ret <32 x i8> %shuffle
426}
427
428define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
429; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
430; AVX1: # BB#0:
431; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
432; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
433; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
434; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[4],zero,zero,zero,zero
435; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0]
436; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
437; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
438; AVX1-NEXT: retq
439;
440; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
441; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000442; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
443; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
444; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
445; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,4,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
446; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,128,128,128,128,128,128,128,128,128,0,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
447; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000448; AVX2-NEXT: retq
449 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
450 ret <32 x i8> %shuffle
451}
452
453define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
454; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
455; AVX1: # BB#0:
456; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
457; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
458; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
459; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero
460; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0]
461; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
462; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
463; AVX1-NEXT: retq
464;
465; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
466; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000467; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
468; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
469; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
470; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,5,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
471; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,128,128,128,128,128,128,128,128,0,128,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
472; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000473; AVX2-NEXT: retq
474 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 21, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
475 ret <32 x i8> %shuffle
476}
477
478define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
479; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
480; AVX1: # BB#0:
481; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
482; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
483; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
484; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[6],zero,zero,zero,zero,zero,zero
485; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0]
486; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
487; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
488; AVX1-NEXT: retq
489;
490; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
491; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000492; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
493; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
494; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
495; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,u,u,u,u,u,6,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
496; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,128,128,128,128,128,128,128,0,128,128,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
497; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000498; AVX2-NEXT: retq
499 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 22, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
500 ret <32 x i8> %shuffle
501}
502
503define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
504; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
505; AVX1: # BB#0:
506; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
507; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
508; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
509; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[7],zero,zero,zero,zero,zero,zero,zero
510; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0]
511; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
512; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
513; AVX1-NEXT: retq
514;
515; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
516; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000517; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
518; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
519; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
520; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,u,u,u,u,7,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
521; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,128,128,128,128,128,128,0,128,128,128,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
522; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000523; AVX2-NEXT: retq
524 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
525 ret <32 x i8> %shuffle
526}
527
528define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
529; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
530; AVX1: # BB#0:
531; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
532; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
533; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
534; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,zero,xmm2[8],zero,zero,zero,zero,zero,zero,zero,zero
535; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0]
536; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
537; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
538; AVX1-NEXT: retq
539;
540; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
541; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000542; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
543; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
544; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
545; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,u,u,u,8,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
546; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,128,128,128,128,128,0,128,128,128,128,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
547; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000548; AVX2-NEXT: retq
549 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
550 ret <32 x i8> %shuffle
551}
552
553define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
554; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
555; AVX1: # BB#0:
556; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
557; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
558; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
559; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,zero,xmm2[9],zero,zero,zero,zero,zero,zero,zero,zero,zero
560; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0]
561; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
562; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
563; AVX1-NEXT: retq
564;
565; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
566; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000567; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
568; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
569; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
570; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,u,u,9,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
571; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,128,128,128,128,0,128,128,128,128,128,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
572; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000573; AVX2-NEXT: retq
574 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
575 ret <32 x i8> %shuffle
576}
577
578define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
579; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
580; AVX1: # BB#0:
581; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
582; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
583; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
584; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,zero,xmm2[10],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
585; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0]
586; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
587; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
588; AVX1-NEXT: retq
589;
590; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
591; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000592; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
593; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
594; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
595; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,u,10,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
596; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,128,128,128,0,128,128,128,128,128,128,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
597; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000598; AVX2-NEXT: retq
599 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 26, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
600 ret <32 x i8> %shuffle
601}
602
603define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
604; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
605; AVX1: # BB#0:
606; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
607; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
608; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
609; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,zero,xmm2[11],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
610; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0]
611; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
612; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
613; AVX1-NEXT: retq
614;
615; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
616; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000617; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
618; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
619; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
620; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,11,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
621; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,128,128,0,128,128,128,128,128,128,128,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
622; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000623; AVX2-NEXT: retq
624 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 27, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
625 ret <32 x i8> %shuffle
626}
627
628define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
629; AVX1-LABEL: @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
630; AVX1: # BB#0:
631; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
632; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
633; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
634; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,zero,xmm2[12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
635; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0]
636; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
637; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
638; AVX1-NEXT: retq
639;
640; AVX2-LABEL: @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
641; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000642; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
643; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
644; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
645; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,12,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
646; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,128,0,128,128,128,128,128,128,128,128,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
647; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000648; AVX2-NEXT: retq
649 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
650 ret <32 x i8> %shuffle
651}
652
653define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
654; AVX1-LABEL: @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
655; AVX1: # BB#0:
656; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
657; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
658; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
659; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,xmm2[13],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
660; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0]
661; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
662; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
663; AVX1-NEXT: retq
664;
665; AVX2-LABEL: @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
666; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000667; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
668; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
669; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
670; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,13,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
671; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,128,0,128,128,128,128,128,128,128,128,128,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
672; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000673; AVX2-NEXT: retq
674 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
675 ret <32 x i8> %shuffle
676}
677
678define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
679; AVX1-LABEL: @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
680; AVX1: # BB#0:
681; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
682; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
683; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
684; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,xmm2[14],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
685; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0]
686; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
687; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
688; AVX1-NEXT: retq
689;
690; AVX2-LABEL: @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
691; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000692; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
693; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm1
694; AVX2-NEXT: vperm2i128 {{.*}} # ymm0 = ymm0[2,3,0,1]
695; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,14,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
696; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [128,0,128,128,128,128,128,128,128,128,128,128,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
697; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000698; AVX2-NEXT: retq
699 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
700 ret <32 x i8> %shuffle
701}
702
703define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
704; AVX1-LABEL: @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
705; AVX1: # BB#0:
706; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
707; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
708; AVX1-NEXT: movl $128, %eax
709; AVX1-NEXT: vmovd %eax, %xmm2
710; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm2
711; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
712; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
713; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0
714; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
715; AVX1-NEXT: retq
716;
717; AVX2-LABEL: @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
718; AVX2: # BB#0:
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000719; AVX2-NEXT: vperm2i128 {{.*}} # ymm1 = ymm0[2,3,0,1]
720; AVX2-NEXT: movl $15, %eax
Chandler Carruth44deb802014-09-22 20:25:08 +0000721; AVX2-NEXT: vmovd %eax, %xmm2
Chandler Carruthe91d68c2014-09-25 10:21:15 +0000722; AVX2-NEXT: vpxor %ymm3, %ymm3, %ymm3
723; AVX2-NEXT: vinserti128 $0, %xmm2, %ymm3, %ymm2
724; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
725; AVX2-NEXT: vpshufb %ymm3, %ymm0, %ymm0
726; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [0,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
727; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000728; AVX2-NEXT: retq
729 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
730 ret <32 x i8> %shuffle
731}
732
733define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
734; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16
735; AVX1: # BB#0:
736; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
737; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
738; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
739; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
740; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
741; AVX1-NEXT: retq
742;
743; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16
744; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +0000745; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1
746; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000747; AVX2-NEXT: retq
748 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
749 ret <32 x i8> %shuffle
750}
751
752define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
753; AVX1-LABEL: @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31
754; AVX1: # BB#0:
755; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carruthe7e9c042014-09-24 09:39:41 +0000756; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
Chandler Carruth44deb802014-09-22 20:25:08 +0000757; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
758; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
759; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
760; AVX1-NEXT: retq
761;
762; AVX2-LABEL: @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31
763; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +0000764; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
Chandler Carruth44deb802014-09-22 20:25:08 +0000765; AVX2-NEXT: retq
766 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
767 ret <32 x i8> %shuffle
768}
769
770define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
771; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24
772; AVX1: # BB#0:
773; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carruthe7e9c042014-09-24 09:39:41 +0000774; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
Chandler Carruth44deb802014-09-22 20:25:08 +0000775; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
776; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
777; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
778; AVX1-NEXT: retq
779;
780; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24
781; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +0000782; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
Chandler Carruth44deb802014-09-22 20:25:08 +0000783; AVX2-NEXT: retq
784 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
785 ret <32 x i8> %shuffle
786}
787
788define <32 x i8> @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
789; AVX1-LABEL: @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31
790; AVX1: # BB#0:
791; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carruthe7e9c042014-09-24 09:39:41 +0000792; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15]
Chandler Carruth44deb802014-09-22 20:25:08 +0000793; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
794; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
795; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
796; AVX1-NEXT: retq
797;
798; AVX2-LABEL: @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31
799; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +0000800; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15,23,23,23,23,23,23,23,23,31,31,31,31,31,31,31,31]
Chandler Carruth44deb802014-09-22 20:25:08 +0000801; AVX2-NEXT: retq
802 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
803 ret <32 x i8> %shuffle
804}
805
806define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28(<32 x i8> %a, <32 x i8> %b) {
807; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28
808; AVX1: # BB#0:
809; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carruthe7e9c042014-09-24 09:39:41 +0000810; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
Chandler Carruth44deb802014-09-22 20:25:08 +0000811; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
812; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
813; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
814; AVX1-NEXT: retq
815;
816; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28
817; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +0000818; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20,24,24,24,24,28,28,28,28]
Chandler Carruth44deb802014-09-22 20:25:08 +0000819; AVX2-NEXT: retq
820 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
821 ret <32 x i8> %shuffle
822}
823
824define <32 x i8> @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
825; AVX1-LABEL: @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31
826; AVX1: # BB#0:
827; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carruthe7e9c042014-09-24 09:39:41 +0000828; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15]
Chandler Carruth44deb802014-09-22 20:25:08 +0000829; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
830; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
831; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
832; AVX1-NEXT: retq
833;
834; AVX2-LABEL: @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31
835; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +0000836; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15,19,19,19,19,23,23,23,23,27,27,27,27,31,31,31,31]
Chandler Carruth44deb802014-09-22 20:25:08 +0000837; AVX2-NEXT: retq
838 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15, i32 19, i32 19, i32 19, i32 19, i32 23, i32 23, i32 23, i32 23, i32 27, i32 27, i32 27, i32 27, i32 31, i32 31, i32 31, i32 31>
839 ret <32 x i8> %shuffle
840}
841
842define <32 x i8> @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30(<32 x i8> %a, <32 x i8> %b) {
843; AVX1-LABEL: @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30
844; AVX1: # BB#0:
845; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carruthe7e9c042014-09-24 09:39:41 +0000846; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
Chandler Carruth44deb802014-09-22 20:25:08 +0000847; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
848; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
849; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
850; AVX1-NEXT: retq
851;
852; AVX2-LABEL: @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30
853; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +0000854; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14,16,16,18,18,20,20,22,22,24,24,26,26,28,28,30,30]
Chandler Carruth44deb802014-09-22 20:25:08 +0000855; AVX2-NEXT: retq
856 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14, i32 16, i32 16, i32 18, i32 18, i32 20, i32 20, i32 22, i32 22, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30>
857 ret <32 x i8> %shuffle
858}
859
860define <32 x i8> @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31(<32 x i8> %a, <32 x i8> %b) {
861; AVX1-LABEL: @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31
862; AVX1: # BB#0:
863; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carruthe7e9c042014-09-24 09:39:41 +0000864; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
Chandler Carruth44deb802014-09-22 20:25:08 +0000865; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
866; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
867; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
868; AVX1-NEXT: retq
869;
870; AVX2-LABEL: @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31
871; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +0000872; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15,17,17,19,19,21,21,23,23,25,25,27,27,29,29,31,31]
Chandler Carruth44deb802014-09-22 20:25:08 +0000873; AVX2-NEXT: retq
874 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15, i32 17, i32 17, i32 19, i32 19, i32 21, i32 21, i32 23, i32 23, i32 25, i32 25, i32 27, i32 27, i32 29, i32 29, i32 31, i32 31>
875 ret <32 x i8> %shuffle
876}
877
878define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<32 x i8> %a, <32 x i8> %b) {
879; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00
880; AVX1: # BB#0:
881; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
882; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
883; AVX1-NEXT: retq
884;
885; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00
886; AVX2: # BB#0:
887; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
888; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
889; AVX2-NEXT: retq
890 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
891 ret <32 x i8> %shuffle
892}
893
894define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<32 x i8> %a, <32 x i8> %b) {
895; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00
896; AVX1: # BB#0:
897; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
898; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
899; AVX1-NEXT: retq
900;
901; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00
902; AVX2: # BB#0:
903; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
904; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
905; AVX2-NEXT: retq
906 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
907 ret <32 x i8> %shuffle
908}
909
910define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
911; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00
912; AVX1: # BB#0:
913; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
914; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
915; AVX1-NEXT: retq
916;
917; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00
918; AVX2: # BB#0:
919; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
920; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
921; AVX2-NEXT: retq
922 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
923 ret <32 x i8> %shuffle
924}
925
926define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
927; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00
928; AVX1: # BB#0:
929; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
930; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
931; AVX1-NEXT: retq
932;
933; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00
934; AVX2: # BB#0:
935; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
936; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
937; AVX2-NEXT: retq
938 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
939 ret <32 x i8> %shuffle
940}
941
942define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
943; AVX1-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00
944; AVX1: # BB#0:
945; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
946; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
947; AVX1-NEXT: retq
948;
949; AVX2-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00
950; AVX2: # BB#0:
951; AVX2-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
952; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
953; AVX2-NEXT: retq
954 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
955 ret <32 x i8> %shuffle
956}
957
958define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
959; AVX1-LABEL: @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
960; AVX1: # BB#0:
961; AVX1-NEXT: movl $15, %eax
962; AVX1-NEXT: vmovd %eax, %xmm1
963; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
964; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
965; AVX1-NEXT: retq
966;
967; AVX2-LABEL: @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
968; AVX2: # BB#0:
969; AVX2-NEXT: movl $15, %eax
970; AVX2-NEXT: vmovd %eax, %xmm1
971; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0
972; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
973; AVX2-NEXT: retq
974 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
975 ret <32 x i8> %shuffle
976}
977
Chandler Carruth397d12c2014-09-25 02:44:39 +0000978define <32 x i8> @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63(<32 x i8> %a, <32 x i8> %b) {
979; AVX1-LABEL: @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63
Chandler Carruth44deb802014-09-22 20:25:08 +0000980; AVX1: # BB#0:
Chandler Carruth397d12c2014-09-25 02:44:39 +0000981; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
982; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
Chandler Carruth44deb802014-09-22 20:25:08 +0000983; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
Chandler Carruth397d12c2014-09-25 02:44:39 +0000984; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
985; AVX1-NEXT: vmovdqa {{.*}} # xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
Chandler Carruth44deb802014-09-22 20:25:08 +0000986; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
Chandler Carruth397d12c2014-09-25 02:44:39 +0000987; AVX1-NEXT: vpunpcklbw {{.*}} # xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
988; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
989; AVX1-NEXT: vpshufb %xmm5, %xmm0, %xmm0
990; AVX1-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
Chandler Carruth44deb802014-09-22 20:25:08 +0000991; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
992; AVX1-NEXT: retq
993;
Chandler Carruth397d12c2014-09-25 02:44:39 +0000994; AVX2-LABEL: @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63
Chandler Carruth44deb802014-09-22 20:25:08 +0000995; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +0000996; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128]
997; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +0000998; AVX2-NEXT: retq
Chandler Carruth397d12c2014-09-25 02:44:39 +0000999 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 33, i32 2, i32 35, i32 4, i32 37, i32 6, i32 39, i32 8, i32 41, i32 10, i32 43, i32 12, i32 45, i32 14, i32 47, i32 16, i32 49, i32 18, i32 51, i32 20, i32 53, i32 22, i32 55, i32 24, i32 57, i32 26, i32 59, i32 28, i32 61, i32 30, i32 63>
1000 ret <32 x i8> %shuffle
1001}
1002
1003define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31(<32 x i8> %a, <32 x i8> %b) {
1004; AVX1-LABEL: @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31
1005; AVX1: # BB#0:
1006; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1007; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
1008; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1009; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1010; AVX1-NEXT: vmovdqa {{.*}} # xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
1011; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
1012; AVX1-NEXT: vpunpcklbw {{.*}} # xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
1013; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1014; AVX1-NEXT: vpshufb %xmm5, %xmm1, %xmm1
1015; AVX1-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1016; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1017; AVX1-NEXT: retq
1018;
1019; AVX2-LABEL: @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31
1020; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001021; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128]
1022; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
Chandler Carruth397d12c2014-09-25 02:44:39 +00001023; AVX2-NEXT: retq
1024 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
Chandler Carruth44deb802014-09-22 20:25:08 +00001025 ret <32 x i8> %shuffle
1026}
1027
Chandler Carrutha03011f2014-09-25 02:20:02 +00001028define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32(<32 x i8> %a, <32 x i8> %b) {
1029; AVX1-LABEL: @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32
Chandler Carruth44deb802014-09-22 20:25:08 +00001030; AVX1: # BB#0:
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001031; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1032; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1033; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1034; AVX1-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
Chandler Carruth44deb802014-09-22 20:25:08 +00001035; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1036; AVX1-NEXT: retq
1037;
Chandler Carrutha03011f2014-09-25 02:20:02 +00001038; AVX2-LABEL: @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32
Chandler Carruth44deb802014-09-22 20:25:08 +00001039; AVX2: # BB#0:
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001040; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2
1041; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1042; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1043; AVX2-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
Chandler Carruth44deb802014-09-22 20:25:08 +00001044; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
1045; AVX2-NEXT: retq
Chandler Carrutha03011f2014-09-25 02:20:02 +00001046 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32>
Chandler Carruth44deb802014-09-22 20:25:08 +00001047 ret <32 x i8> %shuffle
1048}
1049
Chandler Carrutha03011f2014-09-25 02:20:02 +00001050define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48(<32 x i8> %a, <32 x i8> %b) {
1051; AVX1-LABEL: @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48
Chandler Carruth44deb802014-09-22 20:25:08 +00001052; AVX1: # BB#0:
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001053; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1054; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1055; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1056; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
1057; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4
1058; AVX1-NEXT: vpunpcklbw {{.*}} # xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
1059; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
1060; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1061; AVX1-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1062; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +00001063; AVX1-NEXT: retq
1064;
Chandler Carrutha03011f2014-09-25 02:20:02 +00001065; AVX2-LABEL: @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48
Chandler Carruth44deb802014-09-22 20:25:08 +00001066; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001067; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
1068; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
1069; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
1070; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128]
1071; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +00001072; AVX2-NEXT: retq
Chandler Carrutha03011f2014-09-25 02:20:02 +00001073 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48>
Chandler Carruth44deb802014-09-22 20:25:08 +00001074 ret <32 x i8> %shuffle
1075}
1076
Chandler Carrutha03011f2014-09-25 02:20:02 +00001077define <32 x i8> @shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31(<32 x i8> %a, <32 x i8> %b) {
1078; AVX1-LABEL: @shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31
Chandler Carruth44deb802014-09-22 20:25:08 +00001079; AVX1: # BB#0:
1080; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
Chandler Carrutha03011f2014-09-25 02:20:02 +00001081; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1082; AVX1-NEXT: vpunpcklbw {{.*}} # xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1083; AVX1-NEXT: vpshuflw {{.*}} # xmm3 = xmm3[0,0,0,0,4,5,6,7]
1084; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
Chandler Carruth44deb802014-09-22 20:25:08 +00001085; AVX1-NEXT: vpunpcklbw {{.*}} # xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1086; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
1087; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1088; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1089; AVX1-NEXT: retq
1090;
Chandler Carrutha03011f2014-09-25 02:20:02 +00001091; AVX2-LABEL: @shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31
Chandler Carruth44deb802014-09-22 20:25:08 +00001092; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001093; AVX2-NEXT: vpxor %ymm2, %ymm2, %ymm2
1094; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
1095; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
Chandler Carruth44deb802014-09-22 20:25:08 +00001096; AVX2-NEXT: retq
Chandler Carrutha03011f2014-09-25 02:20:02 +00001097 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
Chandler Carruth44deb802014-09-22 20:25:08 +00001098 ret <32 x i8> %shuffle
1099}
1100
Chandler Carrutha03011f2014-09-25 02:20:02 +00001101define <32 x i8> @shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24(<32 x i8> %a, <32 x i8> %b) {
1102; AVX1-LABEL: @shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24
Chandler Carruth44deb802014-09-22 20:25:08 +00001103; AVX1: # BB#0:
1104; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
Chandler Carrutha03011f2014-09-25 02:20:02 +00001105; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = <u,u,u,u,u,u,u,u,15,14,13,12,11,10,9,8>
Chandler Carruth44deb802014-09-22 20:25:08 +00001106; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1107; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
Chandler Carrutha03011f2014-09-25 02:20:02 +00001108; AVX1-NEXT: vmovdqa {{.*}} # xmm5 = <7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u>
Chandler Carruth44deb802014-09-22 20:25:08 +00001109; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
1110; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
1111; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1112; AVX1-NEXT: vpshufb %xmm5, %xmm1, %xmm1
1113; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1114; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1115; AVX1-NEXT: retq
1116;
Chandler Carrutha03011f2014-09-25 02:20:02 +00001117; AVX2-LABEL: @shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24
Chandler Carruth44deb802014-09-22 20:25:08 +00001118; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001119; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,u,u,u,u,15,14,13,12,11,10,9,8,u,u,u,u,u,u,u,u,31,30,29,28,27,26,25,24]
1120; AVX2-NEXT: vpshufb {{.*}} # ymm1 = ymm1[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16,u,u,u,u,u,u,u,u]
1121; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
Chandler Carruth44deb802014-09-22 20:25:08 +00001122; AVX2-NEXT: retq
Chandler Carrutha03011f2014-09-25 02:20:02 +00001123 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24>
Chandler Carruth44deb802014-09-22 20:25:08 +00001124 ret <32 x i8> %shuffle
1125}
1126
Chandler Carrutha03011f2014-09-25 02:20:02 +00001127define <32 x i8> @shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16(<32 x i8> %a, <32 x i8> %b) {
1128; AVX1-LABEL: @shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16
Chandler Carruth44deb802014-09-22 20:25:08 +00001129; AVX1: # BB#0:
1130; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
Chandler Carrutha03011f2014-09-25 02:20:02 +00001131; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = <u,u,u,u,u,u,u,u,7,6,5,4,3,2,1,0>
Chandler Carruth44deb802014-09-22 20:25:08 +00001132; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1133; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
Chandler Carrutha03011f2014-09-25 02:20:02 +00001134; AVX1-NEXT: vmovdqa {{.*}} # xmm5 = <7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u>
Chandler Carruth44deb802014-09-22 20:25:08 +00001135; AVX1-NEXT: vpshufb %xmm5, %xmm4, %xmm4
1136; AVX1-NEXT: vpblendw {{.*}} # xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
1137; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1138; AVX1-NEXT: vpshufb %xmm5, %xmm1, %xmm1
1139; AVX1-NEXT: vpblendw {{.*}} # xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
1140; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1141; AVX1-NEXT: retq
1142;
Chandler Carrutha03011f2014-09-25 02:20:02 +00001143; AVX2-LABEL: @shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16
Chandler Carruth44deb802014-09-22 20:25:08 +00001144; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001145; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,u,u,u,u,u,u,7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16]
1146; AVX2-NEXT: vpshufb {{.*}} # ymm1 = ymm1[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16,u,u,u,u,u,u,u,u]
1147; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
Chandler Carruth44deb802014-09-22 20:25:08 +00001148; AVX2-NEXT: retq
Chandler Carrutha03011f2014-09-25 02:20:02 +00001149 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
Chandler Carruth44deb802014-09-22 20:25:08 +00001150 ret <32 x i8> %shuffle
1151}
1152
1153define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16(<32 x i8> %a, <32 x i8> %b) {
1154; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16
1155; AVX1: # BB#0:
1156; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carruthe7e9c042014-09-24 09:39:41 +00001157; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
Chandler Carruth44deb802014-09-22 20:25:08 +00001158; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1159; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1160; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1161; AVX1-NEXT: retq
1162;
1163; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16
1164; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001165; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001166; AVX2-NEXT: retq
1167 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 16>
1168 ret <32 x i8> %shuffle
1169}
1170
1171define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16(<32 x i8> %a, <32 x i8> %b) {
1172; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16
1173; AVX1: # BB#0:
1174; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carruthe7e9c042014-09-24 09:39:41 +00001175; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
Chandler Carruth44deb802014-09-22 20:25:08 +00001176; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1177; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1178; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1179; AVX1-NEXT: retq
1180;
1181; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16
1182; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001183; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,18,16,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001184; AVX2-NEXT: retq
1185 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 18, i32 16, i32 16>
1186 ret <32 x i8> %shuffle
1187}
1188
1189define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1190; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16
1191; AVX1: # BB#0:
1192; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carruthe7e9c042014-09-24 09:39:41 +00001193; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
Chandler Carruth44deb802014-09-22 20:25:08 +00001194; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1195; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1196; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1197; AVX1-NEXT: retq
1198;
1199; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16
1200; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001201; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001202; AVX2-NEXT: retq
1203 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 23, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1204 ret <32 x i8> %shuffle
1205}
1206
1207define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1208; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16
1209; AVX1: # BB#0:
1210; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carruthe7e9c042014-09-24 09:39:41 +00001211; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
Chandler Carruth44deb802014-09-22 20:25:08 +00001212; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1213; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1214; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1215; AVX1-NEXT: retq
1216;
1217; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16
1218; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001219; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001220; AVX2-NEXT: retq
1221 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1222 ret <32 x i8> %shuffle
1223}
1224
1225define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1226; AVX1-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16
1227; AVX1: # BB#0:
1228; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carruthe7e9c042014-09-24 09:39:41 +00001229; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
Chandler Carruth44deb802014-09-22 20:25:08 +00001230; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
1231; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
1232; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1233; AVX1-NEXT: retq
1234;
1235; AVX2-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16
1236; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001237; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,30,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001238; AVX2-NEXT: retq
1239 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 30, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1240 ret <32 x i8> %shuffle
1241}
1242
1243define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1244; AVX1-LABEL: @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16
1245; AVX1: # BB#0:
1246; AVX1-NEXT: movl $15, %eax
1247; AVX1-NEXT: vmovd %eax, %xmm1
1248; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1249; AVX1-NEXT: vpshufb %xmm1, %xmm2, %xmm2
1250; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0
1251; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1252; AVX1-NEXT: retq
1253;
1254; AVX2-LABEL: @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16
1255; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001256; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001257; AVX2-NEXT: retq
1258 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 31, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1259 ret <32 x i8> %shuffle
1260}
1261
Chandler Carrutha03011f2014-09-25 02:20:02 +00001262define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55(<32 x i8> %a, <32 x i8> %b) {
1263; AVX1-LABEL: @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55
Chandler Carruth44deb802014-09-22 20:25:08 +00001264; AVX1: # BB#0:
Chandler Carruth4d03be12014-09-27 04:42:44 +00001265; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1266; AVX1-NEXT: vpmovzxbw %xmm2, %xmm2
1267; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001268; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
Chandler Carruth4d03be12014-09-27 04:42:44 +00001269; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1270; AVX1-NEXT: vpunpcklbw {{.*}} # xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
Chandler Carrutha03011f2014-09-25 02:20:02 +00001271; AVX1-NEXT: vpmovzxbw %xmm0, %xmm0
Chandler Carruth4d03be12014-09-27 04:42:44 +00001272; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001273; AVX1-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1274; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +00001275; AVX1-NEXT: retq
1276;
Chandler Carrutha03011f2014-09-25 02:20:02 +00001277; AVX2-LABEL: @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55
Chandler Carruth44deb802014-09-22 20:25:08 +00001278; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001279; AVX2-NEXT: vpunpcklbw {{.*}} # ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
Chandler Carruth44deb802014-09-22 20:25:08 +00001280; AVX2-NEXT: retq
Chandler Carrutha03011f2014-09-25 02:20:02 +00001281 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
Chandler Carruth44deb802014-09-22 20:25:08 +00001282 ret <32 x i8> %shuffle
1283}
1284
Chandler Carrutha03011f2014-09-25 02:20:02 +00001285define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63(<32 x i8> %a, <32 x i8> %b) {
1286; AVX1-LABEL: @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63
Chandler Carruth44deb802014-09-22 20:25:08 +00001287; AVX1: # BB#0:
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001288; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
Chandler Carruth4d03be12014-09-27 04:42:44 +00001289; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = <8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u>
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001290; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1291; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
1292; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4
1293; AVX1-NEXT: vpunpcklbw {{.*}} # xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
1294; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
1295; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
1296; AVX1-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1297; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +00001298; AVX1-NEXT: retq
1299;
Chandler Carrutha03011f2014-09-25 02:20:02 +00001300; AVX2-LABEL: @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63
Chandler Carruth44deb802014-09-22 20:25:08 +00001301; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001302; AVX2-NEXT: vpunpckhbw {{.*}} # ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
Chandler Carruth44deb802014-09-22 20:25:08 +00001303; AVX2-NEXT: retq
Chandler Carrutha03011f2014-09-25 02:20:02 +00001304 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
Chandler Carruth44deb802014-09-22 20:25:08 +00001305 ret <32 x i8> %shuffle
1306}
1307
Chandler Carrutha03011f2014-09-25 02:20:02 +00001308define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63(<32 x i8> %a, <32 x i8> %b) {
1309; AVX1-LABEL: @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63
Chandler Carruth44deb802014-09-22 20:25:08 +00001310; AVX1: # BB#0:
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001311; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
Chandler Carruth4d03be12014-09-27 04:42:44 +00001312; AVX1-NEXT: vmovdqa {{.*}} # xmm3 = <8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u>
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001313; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
1314; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
1315; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm3
1316; AVX1-NEXT: vpunpcklbw {{.*}} # xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
Chandler Carrutha03011f2014-09-25 02:20:02 +00001317; AVX1-NEXT: vpmovzxbw %xmm0, %xmm0
1318; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001319; AVX1-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1320; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +00001321; AVX1-NEXT: retq
1322;
Chandler Carrutha03011f2014-09-25 02:20:02 +00001323; AVX2-LABEL: @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63
Chandler Carruth44deb802014-09-22 20:25:08 +00001324; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001325; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31,u]
1326; AVX2-NEXT: vpshufb {{.*}} # ymm1 = ymm1[u,0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31]
1327; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128]
1328; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carruth44deb802014-09-22 20:25:08 +00001329; AVX2-NEXT: retq
Chandler Carrutha03011f2014-09-25 02:20:02 +00001330 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
1331 ret <32 x i8> %shuffle
1332}
1333
1334define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55(<32 x i8> %a, <32 x i8> %b) {
1335; AVX1-LABEL: @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55
1336; AVX1: # BB#0:
Chandler Carruth4d03be12014-09-27 04:42:44 +00001337; AVX1-NEXT: vmovdqa {{.*}} # xmm2 = <8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u>
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001338; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm3
1339; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm2
1340; AVX1-NEXT: vpunpcklbw {{.*}} # xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
1341; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001342; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1343; AVX1-NEXT: vpmovzxbw %xmm0, %xmm0
1344; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1345; AVX1-NEXT: vpunpcklbw {{.*}} # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
1346; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Chandler Carrutha03011f2014-09-25 02:20:02 +00001347; AVX1-NEXT: retq
1348;
1349; AVX2-LABEL: @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55
1350; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001351; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23,u]
1352; AVX2-NEXT: vpshufb {{.*}} # ymm1 = ymm1[u,8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23]
1353; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128,0,128]
1354; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
Chandler Carrutha03011f2014-09-25 02:20:02 +00001355; AVX2-NEXT: retq
1356 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
Chandler Carruth44deb802014-09-22 20:25:08 +00001357 ret <32 x i8> %shuffle
1358}
1359
1360define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1361; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16
1362; AVX1: # BB#0:
1363; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
1364; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1365; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1366; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1367; AVX1-NEXT: retq
1368;
1369; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16
1370; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001371; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,17,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001372; AVX2-NEXT: retq
1373 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 16, i32 17, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1374 ret <32 x i8> %shuffle
1375}
1376
1377define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1378; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16
1379; AVX1: # BB#0:
1380; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
1381; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1382; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0]
1383; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1384; AVX1-NEXT: retq
1385;
1386; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16
1387; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001388; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,18,16,16,16,16,16,16,16,16,16,16,16,16,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001389; AVX2-NEXT: retq
1390 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 16, i32 16, i32 18, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1391 ret <32 x i8> %shuffle
1392}
1393
1394define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1395; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16
1396; AVX1: # BB#0:
1397; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
1398; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1399; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0]
1400; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1401; AVX1-NEXT: retq
1402;
1403; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16
1404; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001405; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001406; AVX2-NEXT: retq
1407 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 23, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1408 ret <32 x i8> %shuffle
1409}
1410
1411define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1412; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16
1413; AVX1: # BB#0:
1414; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
1415; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1416; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0]
1417; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1418; AVX1-NEXT: retq
1419;
1420; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16
1421; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001422; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001423; AVX2-NEXT: retq
1424 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1425 ret <32 x i8> %shuffle
1426}
1427
1428define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
1429; AVX1-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16
1430; AVX1: # BB#0:
1431; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
1432; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1433; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0]
1434; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1435; AVX1-NEXT: retq
1436;
1437; AVX2-LABEL: @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16
1438; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001439; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,30,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001440; AVX2-NEXT: retq
1441 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
1442 ret <32 x i8> %shuffle
1443}
1444
1445define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31(<32 x i8> %a, <32 x i8> %b) {
1446; AVX1-LABEL: @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31
1447; AVX1: # BB#0:
1448; AVX1-NEXT: movl $15, %eax
1449; AVX1-NEXT: vmovd %eax, %xmm1
1450; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm1
1451; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1452; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15]
1453; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1454; AVX1-NEXT: retq
1455;
1456; AVX2-LABEL: @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31
1457; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001458; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,31]
Chandler Carruth44deb802014-09-22 20:25:08 +00001459; AVX2-NEXT: retq
1460 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 31>
1461 ret <32 x i8> %shuffle
1462}
1463
1464define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1465; AVX1-LABEL: @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16
1466; AVX1: # BB#0:
1467; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
1468; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1469; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
1470; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1471; AVX1-NEXT: retq
1472;
1473; AVX2-LABEL: @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16
1474; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001475; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,28,28,28,28,24,24,24,24,20,20,20,20,16,16,16,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001476; AVX2-NEXT: retq
1477 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 28, i32 28, i32 28, i32 28, i32 24, i32 24, i32 24, i32 24, i32 20, i32 20, i32 20, i32 20, i32 16, i32 16, i32 16, i32 16>
1478 ret <32 x i8> %shuffle
1479}
1480
1481define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
1482; AVX1-LABEL: @shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24
1483; AVX1: # BB#0:
1484; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0]
1485; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1486; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
1487; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1488; AVX1-NEXT: retq
1489;
1490; AVX2-LABEL: @shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24
1491; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001492; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
Chandler Carruth44deb802014-09-22 20:25:08 +00001493; AVX2-NEXT: retq
1494 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
1495 ret <32 x i8> %shuffle
1496}
1497
Chandler Carrutha03011f2014-09-25 02:20:02 +00001498define <32 x i8> @shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
1499; AVX1-LABEL: @shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16
Chandler Carruth44deb802014-09-22 20:25:08 +00001500; AVX1: # BB#0:
1501; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
Chandler Carrutha03011f2014-09-25 02:20:02 +00001502; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm1[0,0,0,0,u,u,u,u,u,0,0,0,0,0,14,0]
Chandler Carruth44deb802014-09-22 20:25:08 +00001503; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1504; AVX1-NEXT: retq
1505;
Chandler Carrutha03011f2014-09-25 02:20:02 +00001506; AVX2-LABEL: @shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16
Chandler Carruth44deb802014-09-22 20:25:08 +00001507; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001508; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,u,u,u,u,u,16,16,16,16,16,30,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001509; AVX2-NEXT: retq
Chandler Carrutha03011f2014-09-25 02:20:02 +00001510 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
Chandler Carruth44deb802014-09-22 20:25:08 +00001511 ret <32 x i8> %shuffle
1512}
1513
Chandler Carruth397d12c2014-09-25 02:44:39 +00001514define <32 x i8> @shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
1515; AVX1-LABEL: @shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16
Chandler Carruth44deb802014-09-22 20:25:08 +00001516; AVX1: # BB#0:
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001517; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[14,14,1,1,0,0,0,0,0,0,0,0,0,0,0,0]
Chandler Carruth44deb802014-09-22 20:25:08 +00001518; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
Chandler Carruth397d12c2014-09-25 02:44:39 +00001519; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,u,0,u,u,u,u,0,0,0,0,0,0,14,0]
Chandler Carruth44deb802014-09-22 20:25:08 +00001520; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1521; AVX1-NEXT: retq
1522;
Chandler Carruth397d12c2014-09-25 02:44:39 +00001523; AVX2-LABEL: @shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16
Chandler Carruth44deb802014-09-22 20:25:08 +00001524; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001525; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,14,u,u,0,0,0,0,0,0,0,0,0,0,0,0,16,16,u,16,u,u,u,u,16,16,16,16,16,16,30,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001526; AVX2-NEXT: retq
Chandler Carruth397d12c2014-09-25 02:44:39 +00001527 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 undef, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
Chandler Carruth44deb802014-09-22 20:25:08 +00001528 ret <32 x i8> %shuffle
1529}
1530
Chandler Carrutha03011f2014-09-25 02:20:02 +00001531define <32 x i8> @shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
1532; AVX1-LABEL: @shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16
Chandler Carruth44deb802014-09-22 20:25:08 +00001533; AVX1: # BB#0:
1534; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
1535; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
Chandler Carruthf572f3b2014-09-26 20:41:45 +00001536; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
Chandler Carruth44deb802014-09-22 20:25:08 +00001537; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1538; AVX1-NEXT: retq
1539;
Chandler Carrutha03011f2014-09-25 02:20:02 +00001540; AVX2-LABEL: @shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16
Chandler Carruth44deb802014-09-22 20:25:08 +00001541; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001542; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[0,0,0,u,u,u,4,u,8,8,8,8,u,u,12,u,28,28,28,28,u,u,u,24,20,20,20,20,16,16,16,16]
Chandler Carruth44deb802014-09-22 20:25:08 +00001543; AVX2-NEXT: retq
Chandler Carrutha03011f2014-09-25 02:20:02 +00001544 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 undef, i32 28, i32 28, i32 28, i32 28, i32 undef, i32 undef, i32 undef, i32 24, i32 20, i32 20, i32 20, i32 20, i32 16, i32 16, i32 16, i32 16>
Chandler Carruth44deb802014-09-22 20:25:08 +00001545 ret <32 x i8> %shuffle
1546}
1547
1548define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
1549; AVX1-LABEL: @shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24
1550; AVX1: # BB#0:
1551; AVX1-NEXT: vpunpckhbw {{.*}} # xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1552; AVX1-NEXT: vpshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
1553; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1554; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[0,0,0,0,8,8,9,9,8,8,8,8,8,8,8,8]
1555; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1556; AVX1-NEXT: retq
1557;
1558; AVX2-LABEL: @shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24
1559; AVX2: # BB#0:
Chandler Carruthd8f528a2014-09-25 02:52:12 +00001560; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[8,8,8,8,8,8,8,8,u,u,u,u,u,u,u,u,16,16,16,u,u,u,u,u,u,u,24,24,24,24,24,24]
Chandler Carruth44deb802014-09-22 20:25:08 +00001561; AVX2-NEXT: retq
1562 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
1563 ret <32 x i8> %shuffle
1564}
Chandler Carruth4d03be12014-09-27 04:42:44 +00001565
1566define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39(<32 x i8> %a, <32 x i8> %b) {
1567; AVX1-LABEL: @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39
1568; AVX1: # BB#0:
1569; AVX1-NEXT: vpshufb {{.*}} # xmm2 = zero,zero,xmm0[u],zero,xmm0[u,u,u,u,u,u,u,7,u,u,u,u]
1570; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1571; AVX1-NEXT: vpshufb {{.*}} # xmm4 = xmm3[4,3,u,3,u,u,u,u,u,u,u],zero,xmm3[u,u,u,u]
1572; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2
1573; AVX1-NEXT: vpshufb {{.*}} # xmm2 = xmm2[0,1],zero,xmm2[3],zero,zero,zero,zero,zero,zero,zero,xmm2[11],zero,zero,zero,zero
1574; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
1575; AVX1-NEXT: vpshufb {{.*}} # xmm5 = xmm4[u,u,4,u,1,6],zero,zero,xmm4[0],zero,xmm4[11,u],zero,zero,zero,zero
1576; AVX1-NEXT: vpshufb {{.*}} # xmm6 = xmm1[u,u],zero,xmm1[u],zero,zero,xmm1[5,0],zero,xmm1[10],zero,xmm1[u,4,2,4,7]
1577; AVX1-NEXT: vpor %xmm5, %xmm6, %xmm5
1578; AVX1-NEXT: vpshufb {{.*}} # xmm5 = zero,zero,xmm5[2],zero,xmm5[4,5,6,7,8,9,10],zero,xmm5[12,13,14,15]
1579; AVX1-NEXT: vpor %xmm2, %xmm5, %xmm2
1580; AVX1-NEXT: vpshufb {{.*}} # xmm3 = xmm3[u,u],zero,zero,xmm3[u,u,u,u,1,6,13,u,u],zero,xmm3[u,u]
1581; AVX1-NEXT: vpshufb {{.*}} # xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u]
1582; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm0
1583; AVX1-NEXT: vpshufb {{.*}} # xmm0 = zero,zero,xmm0[2,3],zero,zero,zero,zero,xmm0[8,9,10],zero,zero,xmm0[13],zero,zero
1584; AVX1-NEXT: vpshufb {{.*}} # xmm3 = zero,zero,xmm4[u,u],zero,zero,xmm4[12],zero,xmm4[u,u,u],zero,zero,xmm4[u,0,3]
1585; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero
1586; AVX1-NEXT: vpor %xmm3, %xmm1, %xmm1
1587; AVX1-NEXT: vpshufb {{.*}} # xmm1 = xmm1[0,1],zero,zero,xmm1[4,5,6,7],zero,zero,zero,xmm1[11,12],zero,xmm1[14,15]
1588; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
1589; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1590; AVX1-NEXT: retq
1591;
1592; AVX2-LABEL: @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39
1593; AVX2: # BB#0:
1594; AVX2-NEXT: vperm2i128 {{.*}} # ymm2 = ymm1[2,3,0,1]
1595; AVX2-NEXT: vpshufb {{.*}} # ymm2 = ymm2[u,u,u,u,u,u,12,u,u,u,u,u,u,u,0,3,u,u,u,u,u,u,21,16,u,26,u,u,20,18,20,23]
1596; AVX2-NEXT: vpshufb {{.*}} # ymm1 = ymm1[10,13,u,u,3,3,u,8,u,u,u,12,1,u,u,u,u,u,20,u,17,22,u,u,16,u,27,u,u,u,u,u]
1597; AVX2-NEXT: vmovdqa {{.*}} # ymm3 = <0,0,u,u,0,0,128,0,u,u,u,0,0,u,128,128,u,u,0,u,0,0,128,128,0,128,0,u,128,128,128,128>
1598; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
1599; AVX2-NEXT: vperm2i128 {{.*}} # ymm2 = ymm0[2,3,0,1]
1600; AVX2-NEXT: vpshufb {{.*}} # ymm2 = ymm2[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u]
1601; AVX2-NEXT: vpshufb {{.*}} # ymm0 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u]
1602; AVX2-NEXT: vpblendd {{.*}} # ymm0 = ymm0[0,1],ymm2[2],ymm0[3,4,5],ymm2[6],ymm0[7]
1603; AVX2-NEXT: vmovdqa {{.*}} # ymm2 = [0,0,128,128,0,0,0,0,128,128,128,0,0,128,0,0,128,128,0,128,0,0,0,0,0,0,0,128,0,0,0,0]
1604; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
1605; AVX2-NEXT: retq
1606 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 42, i32 45, i32 12, i32 13, i32 35, i32 35, i32 60, i32 40, i32 17, i32 22, i32 29, i32 44, i32 33, i32 12, i32 48, i32 51, i32 20, i32 19, i32 52, i32 19, i32 49, i32 54, i32 37, i32 32, i32 48, i32 42, i32 59, i32 7, i32 36, i32 34, i32 36, i32 39>
1607 ret <32 x i8> %shuffle
1608}