blob: aa9e60df140447b83602a81683761cfa30857aa8 [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8;
9; 128-bit vectors
10;
11
12define <2 x i64> @ext_i2_2i64(i2 %a0) {
13; SSE2-SSSE3-LABEL: ext_i2_2i64:
14; SSE2-SSSE3: # BB#0:
15; SSE2-SSSE3-NEXT: andb $3, %dil
16; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
17; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
18; SSE2-SSSE3-NEXT: movl %eax, %ecx
19; SSE2-SSSE3-NEXT: andl $1, %ecx
20; SSE2-SSSE3-NEXT: movq %rcx, %xmm0
21; SSE2-SSSE3-NEXT: shrl %eax
22; SSE2-SSSE3-NEXT: andl $1, %eax
23; SSE2-SSSE3-NEXT: movq %rax, %xmm1
24; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
25; SSE2-SSSE3-NEXT: retq
26;
27; AVX12-LABEL: ext_i2_2i64:
28; AVX12: # BB#0:
29; AVX12-NEXT: andb $3, %dil
30; AVX12-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
31; AVX12-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
32; AVX12-NEXT: movl %eax, %ecx
33; AVX12-NEXT: andl $1, %ecx
34; AVX12-NEXT: vmovq %rcx, %xmm0
35; AVX12-NEXT: shrl %eax
36; AVX12-NEXT: andl $1, %eax
37; AVX12-NEXT: vmovq %rax, %xmm1
38; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
39; AVX12-NEXT: retq
40;
41; AVX512-LABEL: ext_i2_2i64:
42; AVX512: # BB#0:
43; AVX512-NEXT: andb $3, %dil
44; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
45; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
46; AVX512-NEXT: kmovd %eax, %k1
47; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
48; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
49; AVX512-NEXT: vzeroupper
50; AVX512-NEXT: retq
51 %1 = bitcast i2 %a0 to <2 x i1>
52 %2 = zext <2 x i1> %1 to <2 x i64>
53 ret <2 x i64> %2
54}
55
56define <4 x i32> @ext_i4_4i32(i4 %a0) {
57; SSE2-SSSE3-LABEL: ext_i4_4i32:
58; SSE2-SSSE3: # BB#0:
59; SSE2-SSSE3-NEXT: andb $15, %dil
60; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
61; SSE2-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %eax
62; SSE2-SSSE3-NEXT: movl %eax, %ecx
63; SSE2-SSSE3-NEXT: shrl $3, %ecx
64; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
65; SSE2-SSSE3-NEXT: movl %eax, %ecx
66; SSE2-SSSE3-NEXT: shrl $2, %ecx
67; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
68; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
69; SSE2-SSSE3-NEXT: movd %eax, %xmm0
70; SSE2-SSSE3-NEXT: shrl %eax
71; SSE2-SSSE3-NEXT: movd %eax, %xmm2
72; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
73; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
74; SSE2-SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
75; SSE2-SSSE3-NEXT: retq
76;
77; AVX1-LABEL: ext_i4_4i32:
78; AVX1: # BB#0:
79; AVX1-NEXT: andb $15, %dil
80; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
81; AVX1-NEXT: movl -{{[0-9]+}}(%rsp), %eax
82; AVX1-NEXT: movl %eax, %ecx
83; AVX1-NEXT: shrl %ecx
84; AVX1-NEXT: vmovd %eax, %xmm0
85; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
86; AVX1-NEXT: movl %eax, %ecx
87; AVX1-NEXT: shrl $2, %ecx
88; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
89; AVX1-NEXT: shrl $3, %eax
90; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
91; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
92; AVX1-NEXT: retq
93;
94; AVX2-LABEL: ext_i4_4i32:
95; AVX2: # BB#0:
96; AVX2-NEXT: andb $15, %dil
97; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
98; AVX2-NEXT: movl -{{[0-9]+}}(%rsp), %eax
99; AVX2-NEXT: movl %eax, %ecx
100; AVX2-NEXT: shrl %ecx
101; AVX2-NEXT: vmovd %eax, %xmm0
102; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
103; AVX2-NEXT: movl %eax, %ecx
104; AVX2-NEXT: shrl $2, %ecx
105; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
106; AVX2-NEXT: shrl $3, %eax
107; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
108; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
109; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
110; AVX2-NEXT: retq
111;
112; AVX512-LABEL: ext_i4_4i32:
113; AVX512: # BB#0:
114; AVX512-NEXT: andb $15, %dil
115; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
116; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
117; AVX512-NEXT: kmovd %eax, %k1
118; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
119; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
120; AVX512-NEXT: vzeroupper
121; AVX512-NEXT: retq
122 %1 = bitcast i4 %a0 to <4 x i1>
123 %2 = zext <4 x i1> %1 to <4 x i32>
124 ret <4 x i32> %2
125}
126
127define <8 x i16> @ext_i8_8i16(i8 %a0) {
128; SSE2-SSSE3-LABEL: ext_i8_8i16:
129; SSE2-SSSE3: # BB#0:
130; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
131; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
132; SSE2-SSSE3-NEXT: movl %eax, %ecx
133; SSE2-SSSE3-NEXT: shrl $3, %ecx
134; SSE2-SSSE3-NEXT: andl $1, %ecx
135; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
136; SSE2-SSSE3-NEXT: movl %eax, %ecx
137; SSE2-SSSE3-NEXT: shrl $2, %ecx
138; SSE2-SSSE3-NEXT: andl $1, %ecx
139; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
140; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
141; SSE2-SSSE3-NEXT: movl %eax, %ecx
142; SSE2-SSSE3-NEXT: andl $1, %ecx
143; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
144; SSE2-SSSE3-NEXT: movl %eax, %ecx
145; SSE2-SSSE3-NEXT: shrl %ecx
146; SSE2-SSSE3-NEXT: andl $1, %ecx
147; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
148; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
149; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
150; SSE2-SSSE3-NEXT: movl %eax, %ecx
151; SSE2-SSSE3-NEXT: shrl $5, %ecx
152; SSE2-SSSE3-NEXT: andl $1, %ecx
153; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
154; SSE2-SSSE3-NEXT: movl %eax, %ecx
155; SSE2-SSSE3-NEXT: shrl $4, %ecx
156; SSE2-SSSE3-NEXT: andl $1, %ecx
157; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
158; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
159; SSE2-SSSE3-NEXT: movl %eax, %ecx
160; SSE2-SSSE3-NEXT: shrl $6, %ecx
161; SSE2-SSSE3-NEXT: andl $1, %ecx
162; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
163; SSE2-SSSE3-NEXT: shrl $7, %eax
164; SSE2-SSSE3-NEXT: movzwl %ax, %eax
165; SSE2-SSSE3-NEXT: movd %eax, %xmm3
166; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
167; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
168; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
169; SSE2-SSSE3-NEXT: retq
170;
171; AVX12-LABEL: ext_i8_8i16:
172; AVX12: # BB#0:
173; AVX12-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
174; AVX12-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
175; AVX12-NEXT: movl %eax, %ecx
176; AVX12-NEXT: shrl %ecx
177; AVX12-NEXT: andl $1, %ecx
178; AVX12-NEXT: movl %eax, %edx
179; AVX12-NEXT: andl $1, %edx
180; AVX12-NEXT: vmovd %edx, %xmm0
181; AVX12-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
182; AVX12-NEXT: movl %eax, %ecx
183; AVX12-NEXT: shrl $2, %ecx
184; AVX12-NEXT: andl $1, %ecx
185; AVX12-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0
186; AVX12-NEXT: movl %eax, %ecx
187; AVX12-NEXT: shrl $3, %ecx
188; AVX12-NEXT: andl $1, %ecx
189; AVX12-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
190; AVX12-NEXT: movl %eax, %ecx
191; AVX12-NEXT: shrl $4, %ecx
192; AVX12-NEXT: andl $1, %ecx
193; AVX12-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
194; AVX12-NEXT: movl %eax, %ecx
195; AVX12-NEXT: shrl $5, %ecx
196; AVX12-NEXT: andl $1, %ecx
197; AVX12-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
198; AVX12-NEXT: movl %eax, %ecx
199; AVX12-NEXT: shrl $6, %ecx
200; AVX12-NEXT: andl $1, %ecx
201; AVX12-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
202; AVX12-NEXT: shrl $7, %eax
203; AVX12-NEXT: movzwl %ax, %eax
204; AVX12-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
205; AVX12-NEXT: retq
206;
207; AVX512-LABEL: ext_i8_8i16:
208; AVX512: # BB#0:
209; AVX512-NEXT: kmovd %edi, %k5
210; AVX512-NEXT: kshiftlw $8, %k5, %k0
211; AVX512-NEXT: kshiftrw $15, %k0, %k0
212; AVX512-NEXT: kshiftlw $9, %k5, %k1
213; AVX512-NEXT: kshiftrw $15, %k1, %k1
214; AVX512-NEXT: kshiftlw $10, %k5, %k2
215; AVX512-NEXT: kshiftrw $15, %k2, %k2
216; AVX512-NEXT: kshiftlw $11, %k5, %k3
217; AVX512-NEXT: kshiftrw $15, %k3, %k3
218; AVX512-NEXT: kshiftlw $12, %k5, %k4
219; AVX512-NEXT: kshiftrw $15, %k4, %k4
220; AVX512-NEXT: kshiftlw $13, %k5, %k6
221; AVX512-NEXT: kshiftrw $15, %k6, %k6
222; AVX512-NEXT: kshiftlw $15, %k5, %k7
223; AVX512-NEXT: kshiftrw $15, %k7, %k7
224; AVX512-NEXT: kshiftlw $14, %k5, %k5
225; AVX512-NEXT: kshiftrw $15, %k5, %k5
226; AVX512-NEXT: kmovd %k5, %eax
227; AVX512-NEXT: andl $1, %eax
228; AVX512-NEXT: kmovd %k7, %ecx
229; AVX512-NEXT: andl $1, %ecx
230; AVX512-NEXT: vmovd %ecx, %xmm0
231; AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
232; AVX512-NEXT: kmovd %k6, %eax
233; AVX512-NEXT: andl $1, %eax
234; AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
235; AVX512-NEXT: kmovd %k4, %eax
236; AVX512-NEXT: andl $1, %eax
237; AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
238; AVX512-NEXT: kmovd %k3, %eax
239; AVX512-NEXT: andl $1, %eax
240; AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
241; AVX512-NEXT: kmovd %k2, %eax
242; AVX512-NEXT: andl $1, %eax
243; AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
244; AVX512-NEXT: kmovd %k1, %eax
245; AVX512-NEXT: andl $1, %eax
246; AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
247; AVX512-NEXT: kmovd %k0, %eax
248; AVX512-NEXT: andl $1, %eax
249; AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
250; AVX512-NEXT: retq
251 %1 = bitcast i8 %a0 to <8 x i1>
252 %2 = zext <8 x i1> %1 to <8 x i16>
253 ret <8 x i16> %2
254}
255
256define <16 x i8> @ext_i16_16i8(i16 %a0) {
257; SSE2-SSSE3-LABEL: ext_i16_16i8:
258; SSE2-SSSE3: # BB#0:
259; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
260; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
261; SSE2-SSSE3-NEXT: movl %eax, %ecx
262; SSE2-SSSE3-NEXT: shrl $7, %ecx
263; SSE2-SSSE3-NEXT: andl $1, %ecx
264; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
265; SSE2-SSSE3-NEXT: movl %eax, %ecx
266; SSE2-SSSE3-NEXT: shrl $6, %ecx
267; SSE2-SSSE3-NEXT: andl $1, %ecx
268; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
269; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
270; SSE2-SSSE3-NEXT: movl %eax, %ecx
271; SSE2-SSSE3-NEXT: shrl $5, %ecx
272; SSE2-SSSE3-NEXT: andl $1, %ecx
273; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
274; SSE2-SSSE3-NEXT: movl %eax, %ecx
275; SSE2-SSSE3-NEXT: shrl $4, %ecx
276; SSE2-SSSE3-NEXT: andl $1, %ecx
277; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
278; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
279; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
280; SSE2-SSSE3-NEXT: movl %eax, %ecx
281; SSE2-SSSE3-NEXT: shrl $3, %ecx
282; SSE2-SSSE3-NEXT: andl $1, %ecx
283; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
284; SSE2-SSSE3-NEXT: movl %eax, %ecx
285; SSE2-SSSE3-NEXT: shrl $2, %ecx
286; SSE2-SSSE3-NEXT: andl $1, %ecx
287; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
288; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
289; SSE2-SSSE3-NEXT: movl %eax, %ecx
290; SSE2-SSSE3-NEXT: andl $1, %ecx
291; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
292; SSE2-SSSE3-NEXT: movl %eax, %ecx
293; SSE2-SSSE3-NEXT: shrl %ecx
294; SSE2-SSSE3-NEXT: andl $1, %ecx
295; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
296; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
297; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
298; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
299; SSE2-SSSE3-NEXT: movl %eax, %ecx
300; SSE2-SSSE3-NEXT: shrl $11, %ecx
301; SSE2-SSSE3-NEXT: andl $1, %ecx
302; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
303; SSE2-SSSE3-NEXT: movl %eax, %ecx
304; SSE2-SSSE3-NEXT: shrl $10, %ecx
305; SSE2-SSSE3-NEXT: andl $1, %ecx
306; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
307; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
308; SSE2-SSSE3-NEXT: movl %eax, %ecx
309; SSE2-SSSE3-NEXT: shrl $9, %ecx
310; SSE2-SSSE3-NEXT: andl $1, %ecx
311; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
312; SSE2-SSSE3-NEXT: movl %eax, %ecx
313; SSE2-SSSE3-NEXT: shrl $8, %ecx
314; SSE2-SSSE3-NEXT: andl $1, %ecx
315; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
316; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
317; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
318; SSE2-SSSE3-NEXT: movl %eax, %ecx
319; SSE2-SSSE3-NEXT: shrl $13, %ecx
320; SSE2-SSSE3-NEXT: andl $1, %ecx
321; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
322; SSE2-SSSE3-NEXT: movl %eax, %ecx
323; SSE2-SSSE3-NEXT: shrl $12, %ecx
324; SSE2-SSSE3-NEXT: andl $1, %ecx
325; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
326; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
327; SSE2-SSSE3-NEXT: movl %eax, %ecx
328; SSE2-SSSE3-NEXT: shrl $14, %ecx
329; SSE2-SSSE3-NEXT: andl $1, %ecx
330; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
331; SSE2-SSSE3-NEXT: shrl $15, %eax
332; SSE2-SSSE3-NEXT: movzwl %ax, %eax
333; SSE2-SSSE3-NEXT: movd %eax, %xmm4
334; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
335; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
336; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
337; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
338; SSE2-SSSE3-NEXT: retq
339;
340; AVX12-LABEL: ext_i16_16i8:
341; AVX12: # BB#0:
342; AVX12-NEXT: movw %di, -{{[0-9]+}}(%rsp)
343; AVX12-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
344; AVX12-NEXT: movl %eax, %ecx
345; AVX12-NEXT: shrl %ecx
346; AVX12-NEXT: andl $1, %ecx
347; AVX12-NEXT: movl %eax, %edx
348; AVX12-NEXT: andl $1, %edx
349; AVX12-NEXT: vmovd %edx, %xmm0
350; AVX12-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
351; AVX12-NEXT: movl %eax, %ecx
352; AVX12-NEXT: shrl $2, %ecx
353; AVX12-NEXT: andl $1, %ecx
354; AVX12-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
355; AVX12-NEXT: movl %eax, %ecx
356; AVX12-NEXT: shrl $3, %ecx
357; AVX12-NEXT: andl $1, %ecx
358; AVX12-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
359; AVX12-NEXT: movl %eax, %ecx
360; AVX12-NEXT: shrl $4, %ecx
361; AVX12-NEXT: andl $1, %ecx
362; AVX12-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
363; AVX12-NEXT: movl %eax, %ecx
364; AVX12-NEXT: shrl $5, %ecx
365; AVX12-NEXT: andl $1, %ecx
366; AVX12-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
367; AVX12-NEXT: movl %eax, %ecx
368; AVX12-NEXT: shrl $6, %ecx
369; AVX12-NEXT: andl $1, %ecx
370; AVX12-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
371; AVX12-NEXT: movl %eax, %ecx
372; AVX12-NEXT: shrl $7, %ecx
373; AVX12-NEXT: andl $1, %ecx
374; AVX12-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
375; AVX12-NEXT: movl %eax, %ecx
376; AVX12-NEXT: shrl $8, %ecx
377; AVX12-NEXT: andl $1, %ecx
378; AVX12-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
379; AVX12-NEXT: movl %eax, %ecx
380; AVX12-NEXT: shrl $9, %ecx
381; AVX12-NEXT: andl $1, %ecx
382; AVX12-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
383; AVX12-NEXT: movl %eax, %ecx
384; AVX12-NEXT: shrl $10, %ecx
385; AVX12-NEXT: andl $1, %ecx
386; AVX12-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
387; AVX12-NEXT: movl %eax, %ecx
388; AVX12-NEXT: shrl $11, %ecx
389; AVX12-NEXT: andl $1, %ecx
390; AVX12-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
391; AVX12-NEXT: movl %eax, %ecx
392; AVX12-NEXT: shrl $12, %ecx
393; AVX12-NEXT: andl $1, %ecx
394; AVX12-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
395; AVX12-NEXT: movl %eax, %ecx
396; AVX12-NEXT: shrl $13, %ecx
397; AVX12-NEXT: andl $1, %ecx
398; AVX12-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
399; AVX12-NEXT: movl %eax, %ecx
400; AVX12-NEXT: shrl $14, %ecx
401; AVX12-NEXT: andl $1, %ecx
402; AVX12-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
403; AVX12-NEXT: shrl $15, %eax
404; AVX12-NEXT: movzwl %ax, %eax
405; AVX12-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
406; AVX12-NEXT: retq
407;
408; AVX512-LABEL: ext_i16_16i8:
409; AVX512: # BB#0:
410; AVX512-NEXT: pushq %rbp
411; AVX512-NEXT: .Lcfi0:
412; AVX512-NEXT: .cfi_def_cfa_offset 16
413; AVX512-NEXT: pushq %r15
414; AVX512-NEXT: .Lcfi1:
415; AVX512-NEXT: .cfi_def_cfa_offset 24
416; AVX512-NEXT: pushq %r14
417; AVX512-NEXT: .Lcfi2:
418; AVX512-NEXT: .cfi_def_cfa_offset 32
419; AVX512-NEXT: pushq %r13
420; AVX512-NEXT: .Lcfi3:
421; AVX512-NEXT: .cfi_def_cfa_offset 40
422; AVX512-NEXT: pushq %r12
423; AVX512-NEXT: .Lcfi4:
424; AVX512-NEXT: .cfi_def_cfa_offset 48
425; AVX512-NEXT: pushq %rbx
426; AVX512-NEXT: .Lcfi5:
427; AVX512-NEXT: .cfi_def_cfa_offset 56
428; AVX512-NEXT: .Lcfi6:
429; AVX512-NEXT: .cfi_offset %rbx, -56
430; AVX512-NEXT: .Lcfi7:
431; AVX512-NEXT: .cfi_offset %r12, -48
432; AVX512-NEXT: .Lcfi8:
433; AVX512-NEXT: .cfi_offset %r13, -40
434; AVX512-NEXT: .Lcfi9:
435; AVX512-NEXT: .cfi_offset %r14, -32
436; AVX512-NEXT: .Lcfi10:
437; AVX512-NEXT: .cfi_offset %r15, -24
438; AVX512-NEXT: .Lcfi11:
439; AVX512-NEXT: .cfi_offset %rbp, -16
440; AVX512-NEXT: kmovd %edi, %k0
441; AVX512-NEXT: kshiftlw $14, %k0, %k1
442; AVX512-NEXT: kshiftrw $15, %k1, %k1
443; AVX512-NEXT: kmovd %k1, %r8d
444; AVX512-NEXT: kshiftlw $15, %k0, %k1
445; AVX512-NEXT: kshiftrw $15, %k1, %k1
446; AVX512-NEXT: kmovd %k1, %r9d
447; AVX512-NEXT: kshiftlw $13, %k0, %k1
448; AVX512-NEXT: kshiftrw $15, %k1, %k1
449; AVX512-NEXT: kmovd %k1, %r10d
450; AVX512-NEXT: kshiftlw $12, %k0, %k1
451; AVX512-NEXT: kshiftrw $15, %k1, %k1
452; AVX512-NEXT: kmovd %k1, %r11d
453; AVX512-NEXT: kshiftlw $11, %k0, %k1
454; AVX512-NEXT: kshiftrw $15, %k1, %k1
455; AVX512-NEXT: kmovd %k1, %r14d
456; AVX512-NEXT: kshiftlw $10, %k0, %k1
457; AVX512-NEXT: kshiftrw $15, %k1, %k1
458; AVX512-NEXT: kmovd %k1, %r15d
459; AVX512-NEXT: kshiftlw $9, %k0, %k1
460; AVX512-NEXT: kshiftrw $15, %k1, %k1
461; AVX512-NEXT: kmovd %k1, %r12d
462; AVX512-NEXT: kshiftlw $8, %k0, %k1
463; AVX512-NEXT: kshiftrw $15, %k1, %k1
464; AVX512-NEXT: kmovd %k1, %r13d
465; AVX512-NEXT: kshiftlw $7, %k0, %k1
466; AVX512-NEXT: kshiftrw $15, %k1, %k1
467; AVX512-NEXT: kmovd %k1, %esi
468; AVX512-NEXT: kshiftlw $6, %k0, %k1
469; AVX512-NEXT: kshiftrw $15, %k1, %k1
470; AVX512-NEXT: kmovd %k1, %ebx
471; AVX512-NEXT: kshiftlw $5, %k0, %k1
472; AVX512-NEXT: kshiftrw $15, %k1, %k1
473; AVX512-NEXT: kmovd %k1, %ebp
474; AVX512-NEXT: kshiftlw $4, %k0, %k1
475; AVX512-NEXT: kshiftrw $15, %k1, %k1
476; AVX512-NEXT: kmovd %k1, %edi
477; AVX512-NEXT: kshiftlw $3, %k0, %k1
478; AVX512-NEXT: kshiftrw $15, %k1, %k1
479; AVX512-NEXT: kmovd %k1, %eax
480; AVX512-NEXT: kshiftlw $2, %k0, %k1
481; AVX512-NEXT: kshiftrw $15, %k1, %k1
482; AVX512-NEXT: kmovd %k1, %ecx
483; AVX512-NEXT: kshiftlw $1, %k0, %k1
484; AVX512-NEXT: kshiftrw $15, %k1, %k1
485; AVX512-NEXT: kmovd %k1, %edx
486; AVX512-NEXT: kshiftrw $15, %k0, %k0
487; AVX512-NEXT: vmovd %r9d, %xmm0
488; AVX512-NEXT: kmovd %k0, %r9d
489; AVX512-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
490; AVX512-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
491; AVX512-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
492; AVX512-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
493; AVX512-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
494; AVX512-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
495; AVX512-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
496; AVX512-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
497; AVX512-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
498; AVX512-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
499; AVX512-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0
500; AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
501; AVX512-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
502; AVX512-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0
503; AVX512-NEXT: vpinsrb $15, %r9d, %xmm0, %xmm0
504; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
505; AVX512-NEXT: popq %rbx
506; AVX512-NEXT: popq %r12
507; AVX512-NEXT: popq %r13
508; AVX512-NEXT: popq %r14
509; AVX512-NEXT: popq %r15
510; AVX512-NEXT: popq %rbp
511; AVX512-NEXT: retq
512 %1 = bitcast i16 %a0 to <16 x i1>
513 %2 = zext <16 x i1> %1 to <16 x i8>
514 ret <16 x i8> %2
515}
516
517;
518; 256-bit vectors
519;
520
521define <4 x i64> @ext_i4_4i64(i4 %a0) {
522; SSE2-SSSE3-LABEL: ext_i4_4i64:
523; SSE2-SSSE3: # BB#0:
524; SSE2-SSSE3-NEXT: andb $15, %dil
525; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
526; SSE2-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %eax
527; SSE2-SSSE3-NEXT: movl %eax, %ecx
528; SSE2-SSSE3-NEXT: shrl $3, %ecx
529; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
530; SSE2-SSSE3-NEXT: movl %eax, %ecx
531; SSE2-SSSE3-NEXT: shrl $2, %ecx
532; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
533; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
534; SSE2-SSSE3-NEXT: movd %eax, %xmm2
535; SSE2-SSSE3-NEXT: shrl %eax
536; SSE2-SSSE3-NEXT: movd %eax, %xmm0
537; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
538; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
539; SSE2-SSSE3-NEXT: pand {{.*}}(%rip), %xmm2
540; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
541; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1]
542; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
543; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3]
544; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
545; SSE2-SSSE3-NEXT: retq
546;
547; AVX1-LABEL: ext_i4_4i64:
548; AVX1: # BB#0:
549; AVX1-NEXT: andb $15, %dil
550; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
551; AVX1-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
552; AVX1-NEXT: movl %eax, %ecx
553; AVX1-NEXT: shrl $3, %ecx
554; AVX1-NEXT: andl $1, %ecx
555; AVX1-NEXT: vmovq %rcx, %xmm0
556; AVX1-NEXT: movl %eax, %ecx
557; AVX1-NEXT: shrl $2, %ecx
558; AVX1-NEXT: andl $1, %ecx
559; AVX1-NEXT: vmovq %rcx, %xmm1
560; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
561; AVX1-NEXT: movl %eax, %ecx
562; AVX1-NEXT: andl $1, %ecx
563; AVX1-NEXT: vmovq %rcx, %xmm1
564; AVX1-NEXT: shrl %eax
565; AVX1-NEXT: andl $1, %eax
566; AVX1-NEXT: vmovq %rax, %xmm2
567; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
568; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
569; AVX1-NEXT: retq
570;
571; AVX2-LABEL: ext_i4_4i64:
572; AVX2: # BB#0:
573; AVX2-NEXT: andb $15, %dil
574; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
575; AVX2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
576; AVX2-NEXT: movl %eax, %ecx
577; AVX2-NEXT: shrl $3, %ecx
578; AVX2-NEXT: andl $1, %ecx
579; AVX2-NEXT: vmovq %rcx, %xmm0
580; AVX2-NEXT: movl %eax, %ecx
581; AVX2-NEXT: shrl $2, %ecx
582; AVX2-NEXT: andl $1, %ecx
583; AVX2-NEXT: vmovq %rcx, %xmm1
584; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
585; AVX2-NEXT: movl %eax, %ecx
586; AVX2-NEXT: andl $1, %ecx
587; AVX2-NEXT: vmovq %rcx, %xmm1
588; AVX2-NEXT: shrl %eax
589; AVX2-NEXT: andl $1, %eax
590; AVX2-NEXT: vmovq %rax, %xmm2
591; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
592; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
593; AVX2-NEXT: retq
594;
595; AVX512-LABEL: ext_i4_4i64:
596; AVX512: # BB#0:
597; AVX512-NEXT: andb $15, %dil
598; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
599; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
600; AVX512-NEXT: kmovd %eax, %k1
601; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
602; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
603; AVX512-NEXT: retq
604 %1 = bitcast i4 %a0 to <4 x i1>
605 %2 = zext <4 x i1> %1 to <4 x i64>
606 ret <4 x i64> %2
607}
608
609define <8 x i32> @ext_i8_8i32(i8 %a0) {
610; SSE2-SSSE3-LABEL: ext_i8_8i32:
611; SSE2-SSSE3: # BB#0:
612; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
613; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
614; SSE2-SSSE3-NEXT: movl %eax, %ecx
615; SSE2-SSSE3-NEXT: shrl $3, %ecx
616; SSE2-SSSE3-NEXT: andl $1, %ecx
617; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
618; SSE2-SSSE3-NEXT: movl %eax, %ecx
619; SSE2-SSSE3-NEXT: shrl $2, %ecx
620; SSE2-SSSE3-NEXT: andl $1, %ecx
621; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
622; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
623; SSE2-SSSE3-NEXT: movl %eax, %ecx
624; SSE2-SSSE3-NEXT: andl $1, %ecx
625; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
626; SSE2-SSSE3-NEXT: movl %eax, %ecx
627; SSE2-SSSE3-NEXT: shrl %ecx
628; SSE2-SSSE3-NEXT: andl $1, %ecx
629; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
630; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
631; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
632; SSE2-SSSE3-NEXT: movl %eax, %ecx
633; SSE2-SSSE3-NEXT: shrl $5, %ecx
634; SSE2-SSSE3-NEXT: andl $1, %ecx
635; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
636; SSE2-SSSE3-NEXT: movl %eax, %ecx
637; SSE2-SSSE3-NEXT: shrl $4, %ecx
638; SSE2-SSSE3-NEXT: andl $1, %ecx
639; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
640; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
641; SSE2-SSSE3-NEXT: movl %eax, %ecx
642; SSE2-SSSE3-NEXT: shrl $6, %ecx
643; SSE2-SSSE3-NEXT: andl $1, %ecx
644; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
645; SSE2-SSSE3-NEXT: shrl $7, %eax
646; SSE2-SSSE3-NEXT: movzwl %ax, %eax
647; SSE2-SSSE3-NEXT: movd %eax, %xmm3
648; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
649; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
650; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
651; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
652; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
653; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
654; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
655; SSE2-SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
656; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
657; SSE2-SSSE3-NEXT: retq
658;
659; AVX1-LABEL: ext_i8_8i32:
660; AVX1: # BB#0:
661; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
662; AVX1-NEXT: movl -{{[0-9]+}}(%rsp), %eax
663; AVX1-NEXT: movl %eax, %ecx
664; AVX1-NEXT: shrl $5, %ecx
665; AVX1-NEXT: movl %eax, %edx
666; AVX1-NEXT: shrl $4, %edx
667; AVX1-NEXT: vmovd %edx, %xmm0
668; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
669; AVX1-NEXT: movl %eax, %ecx
670; AVX1-NEXT: shrl $6, %ecx
671; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
672; AVX1-NEXT: movl %eax, %ecx
673; AVX1-NEXT: shrl $7, %ecx
674; AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
675; AVX1-NEXT: movl %eax, %ecx
676; AVX1-NEXT: shrl %ecx
677; AVX1-NEXT: vmovd %eax, %xmm1
678; AVX1-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
679; AVX1-NEXT: movl %eax, %ecx
680; AVX1-NEXT: shrl $2, %ecx
681; AVX1-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
682; AVX1-NEXT: shrl $3, %eax
683; AVX1-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
684; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
685; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
686; AVX1-NEXT: retq
687;
688; AVX2-LABEL: ext_i8_8i32:
689; AVX2: # BB#0:
690; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
691; AVX2-NEXT: movl -{{[0-9]+}}(%rsp), %eax
692; AVX2-NEXT: movl %eax, %ecx
693; AVX2-NEXT: shrl $5, %ecx
694; AVX2-NEXT: movl %eax, %edx
695; AVX2-NEXT: shrl $4, %edx
696; AVX2-NEXT: vmovd %edx, %xmm0
697; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
698; AVX2-NEXT: movl %eax, %ecx
699; AVX2-NEXT: shrl $6, %ecx
700; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
701; AVX2-NEXT: movl %eax, %ecx
702; AVX2-NEXT: shrl $7, %ecx
703; AVX2-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
704; AVX2-NEXT: movl %eax, %ecx
705; AVX2-NEXT: shrl %ecx
706; AVX2-NEXT: vmovd %eax, %xmm1
707; AVX2-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
708; AVX2-NEXT: movl %eax, %ecx
709; AVX2-NEXT: shrl $2, %ecx
710; AVX2-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
711; AVX2-NEXT: shrl $3, %eax
712; AVX2-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
713; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
714; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1]
715; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
716; AVX2-NEXT: retq
717;
718; AVX512-LABEL: ext_i8_8i32:
719; AVX512: # BB#0:
720; AVX512-NEXT: kmovd %edi, %k1
721; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
722; AVX512-NEXT: retq
723 %1 = bitcast i8 %a0 to <8 x i1>
724 %2 = zext <8 x i1> %1 to <8 x i32>
725 ret <8 x i32> %2
726}
727
728define <16 x i16> @ext_i16_16i16(i16 %a0) {
729; SSE2-SSSE3-LABEL: ext_i16_16i16:
730; SSE2-SSSE3: # BB#0:
731; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
732; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
733; SSE2-SSSE3-NEXT: movl %eax, %ecx
734; SSE2-SSSE3-NEXT: shrl $7, %ecx
735; SSE2-SSSE3-NEXT: andl $1, %ecx
736; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
737; SSE2-SSSE3-NEXT: movl %eax, %ecx
738; SSE2-SSSE3-NEXT: shrl $6, %ecx
739; SSE2-SSSE3-NEXT: andl $1, %ecx
740; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
741; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
742; SSE2-SSSE3-NEXT: movl %eax, %ecx
743; SSE2-SSSE3-NEXT: shrl $5, %ecx
744; SSE2-SSSE3-NEXT: andl $1, %ecx
745; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
746; SSE2-SSSE3-NEXT: movl %eax, %ecx
747; SSE2-SSSE3-NEXT: shrl $4, %ecx
748; SSE2-SSSE3-NEXT: andl $1, %ecx
749; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
750; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
751; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
752; SSE2-SSSE3-NEXT: movl %eax, %ecx
753; SSE2-SSSE3-NEXT: shrl $3, %ecx
754; SSE2-SSSE3-NEXT: andl $1, %ecx
755; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
756; SSE2-SSSE3-NEXT: movl %eax, %ecx
757; SSE2-SSSE3-NEXT: shrl $2, %ecx
758; SSE2-SSSE3-NEXT: andl $1, %ecx
759; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
760; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
761; SSE2-SSSE3-NEXT: movl %eax, %ecx
762; SSE2-SSSE3-NEXT: andl $1, %ecx
763; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
764; SSE2-SSSE3-NEXT: movl %eax, %ecx
765; SSE2-SSSE3-NEXT: shrl %ecx
766; SSE2-SSSE3-NEXT: andl $1, %ecx
767; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
768; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
769; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
770; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
771; SSE2-SSSE3-NEXT: movl %eax, %ecx
772; SSE2-SSSE3-NEXT: shrl $11, %ecx
773; SSE2-SSSE3-NEXT: andl $1, %ecx
774; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
775; SSE2-SSSE3-NEXT: movl %eax, %ecx
776; SSE2-SSSE3-NEXT: shrl $10, %ecx
777; SSE2-SSSE3-NEXT: andl $1, %ecx
778; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
779; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
780; SSE2-SSSE3-NEXT: movl %eax, %ecx
781; SSE2-SSSE3-NEXT: shrl $9, %ecx
782; SSE2-SSSE3-NEXT: andl $1, %ecx
783; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
784; SSE2-SSSE3-NEXT: movl %eax, %ecx
785; SSE2-SSSE3-NEXT: shrl $8, %ecx
786; SSE2-SSSE3-NEXT: andl $1, %ecx
787; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
788; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
789; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
790; SSE2-SSSE3-NEXT: movl %eax, %ecx
791; SSE2-SSSE3-NEXT: shrl $13, %ecx
792; SSE2-SSSE3-NEXT: andl $1, %ecx
793; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
794; SSE2-SSSE3-NEXT: movl %eax, %ecx
795; SSE2-SSSE3-NEXT: shrl $12, %ecx
796; SSE2-SSSE3-NEXT: andl $1, %ecx
797; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
798; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
799; SSE2-SSSE3-NEXT: movl %eax, %ecx
800; SSE2-SSSE3-NEXT: shrl $14, %ecx
801; SSE2-SSSE3-NEXT: andl $1, %ecx
802; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
803; SSE2-SSSE3-NEXT: shrl $15, %eax
804; SSE2-SSSE3-NEXT: movzwl %ax, %eax
805; SSE2-SSSE3-NEXT: movd %eax, %xmm4
806; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
807; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
808; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
809; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
810; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
811; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
812; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
813; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
814; SSE2-SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
815; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
816; SSE2-SSSE3-NEXT: retq
817;
818; AVX1-LABEL: ext_i16_16i16:
819; AVX1: # BB#0:
820; AVX1-NEXT: movw %di, -{{[0-9]+}}(%rsp)
821; AVX1-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
822; AVX1-NEXT: movl %eax, %ecx
823; AVX1-NEXT: shrl $9, %ecx
824; AVX1-NEXT: andl $1, %ecx
825; AVX1-NEXT: movl %eax, %edx
826; AVX1-NEXT: shrl $8, %edx
827; AVX1-NEXT: andl $1, %edx
828; AVX1-NEXT: vmovd %edx, %xmm0
829; AVX1-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
830; AVX1-NEXT: movl %eax, %ecx
831; AVX1-NEXT: shrl $10, %ecx
832; AVX1-NEXT: andl $1, %ecx
833; AVX1-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0
834; AVX1-NEXT: movl %eax, %ecx
835; AVX1-NEXT: shrl $11, %ecx
836; AVX1-NEXT: andl $1, %ecx
837; AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
838; AVX1-NEXT: movl %eax, %ecx
839; AVX1-NEXT: shrl $12, %ecx
840; AVX1-NEXT: andl $1, %ecx
841; AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
842; AVX1-NEXT: movl %eax, %ecx
843; AVX1-NEXT: shrl $13, %ecx
844; AVX1-NEXT: andl $1, %ecx
845; AVX1-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
846; AVX1-NEXT: movl %eax, %ecx
847; AVX1-NEXT: shrl $14, %ecx
848; AVX1-NEXT: andl $1, %ecx
849; AVX1-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
850; AVX1-NEXT: movl %eax, %ecx
851; AVX1-NEXT: shrl $15, %ecx
852; AVX1-NEXT: movzwl %cx, %ecx
853; AVX1-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
854; AVX1-NEXT: movl %eax, %ecx
855; AVX1-NEXT: shrl %ecx
856; AVX1-NEXT: andl $1, %ecx
857; AVX1-NEXT: movl %eax, %edx
858; AVX1-NEXT: andl $1, %edx
859; AVX1-NEXT: vmovd %edx, %xmm1
860; AVX1-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
861; AVX1-NEXT: movl %eax, %ecx
862; AVX1-NEXT: shrl $2, %ecx
863; AVX1-NEXT: andl $1, %ecx
864; AVX1-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
865; AVX1-NEXT: movl %eax, %ecx
866; AVX1-NEXT: shrl $3, %ecx
867; AVX1-NEXT: andl $1, %ecx
868; AVX1-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
869; AVX1-NEXT: movl %eax, %ecx
870; AVX1-NEXT: shrl $4, %ecx
871; AVX1-NEXT: andl $1, %ecx
872; AVX1-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
873; AVX1-NEXT: movl %eax, %ecx
874; AVX1-NEXT: shrl $5, %ecx
875; AVX1-NEXT: andl $1, %ecx
876; AVX1-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
877; AVX1-NEXT: movl %eax, %ecx
878; AVX1-NEXT: shrl $6, %ecx
879; AVX1-NEXT: andl $1, %ecx
880; AVX1-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
881; AVX1-NEXT: shrl $7, %eax
882; AVX1-NEXT: andl $1, %eax
883; AVX1-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
884; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
885; AVX1-NEXT: retq
886;
887; AVX2-LABEL: ext_i16_16i16:
888; AVX2: # BB#0:
889; AVX2-NEXT: movw %di, -{{[0-9]+}}(%rsp)
890; AVX2-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
891; AVX2-NEXT: movl %eax, %ecx
892; AVX2-NEXT: shrl $9, %ecx
893; AVX2-NEXT: andl $1, %ecx
894; AVX2-NEXT: movl %eax, %edx
895; AVX2-NEXT: shrl $8, %edx
896; AVX2-NEXT: andl $1, %edx
897; AVX2-NEXT: vmovd %edx, %xmm0
898; AVX2-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
899; AVX2-NEXT: movl %eax, %ecx
900; AVX2-NEXT: shrl $10, %ecx
901; AVX2-NEXT: andl $1, %ecx
902; AVX2-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0
903; AVX2-NEXT: movl %eax, %ecx
904; AVX2-NEXT: shrl $11, %ecx
905; AVX2-NEXT: andl $1, %ecx
906; AVX2-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
907; AVX2-NEXT: movl %eax, %ecx
908; AVX2-NEXT: shrl $12, %ecx
909; AVX2-NEXT: andl $1, %ecx
910; AVX2-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
911; AVX2-NEXT: movl %eax, %ecx
912; AVX2-NEXT: shrl $13, %ecx
913; AVX2-NEXT: andl $1, %ecx
914; AVX2-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
915; AVX2-NEXT: movl %eax, %ecx
916; AVX2-NEXT: shrl $14, %ecx
917; AVX2-NEXT: andl $1, %ecx
918; AVX2-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
919; AVX2-NEXT: movl %eax, %ecx
920; AVX2-NEXT: shrl $15, %ecx
921; AVX2-NEXT: movzwl %cx, %ecx
922; AVX2-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
923; AVX2-NEXT: movl %eax, %ecx
924; AVX2-NEXT: shrl %ecx
925; AVX2-NEXT: andl $1, %ecx
926; AVX2-NEXT: movl %eax, %edx
927; AVX2-NEXT: andl $1, %edx
928; AVX2-NEXT: vmovd %edx, %xmm1
929; AVX2-NEXT: vpinsrw $1, %ecx, %xmm1, %xmm1
930; AVX2-NEXT: movl %eax, %ecx
931; AVX2-NEXT: shrl $2, %ecx
932; AVX2-NEXT: andl $1, %ecx
933; AVX2-NEXT: vpinsrw $2, %ecx, %xmm1, %xmm1
934; AVX2-NEXT: movl %eax, %ecx
935; AVX2-NEXT: shrl $3, %ecx
936; AVX2-NEXT: andl $1, %ecx
937; AVX2-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1
938; AVX2-NEXT: movl %eax, %ecx
939; AVX2-NEXT: shrl $4, %ecx
940; AVX2-NEXT: andl $1, %ecx
941; AVX2-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
942; AVX2-NEXT: movl %eax, %ecx
943; AVX2-NEXT: shrl $5, %ecx
944; AVX2-NEXT: andl $1, %ecx
945; AVX2-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
946; AVX2-NEXT: movl %eax, %ecx
947; AVX2-NEXT: shrl $6, %ecx
948; AVX2-NEXT: andl $1, %ecx
949; AVX2-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
950; AVX2-NEXT: shrl $7, %eax
951; AVX2-NEXT: andl $1, %eax
952; AVX2-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
953; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
954; AVX2-NEXT: retq
955;
956; AVX512-LABEL: ext_i16_16i16:
957; AVX512: # BB#0:
958; AVX512-NEXT: kmovd %edi, %k1
959; AVX512-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z}
960; AVX512-NEXT: retq
961 %1 = bitcast i16 %a0 to <16 x i1>
962 %2 = zext <16 x i1> %1 to <16 x i16>
963 ret <16 x i16> %2
964}
965
966define <32 x i8> @ext_i32_32i8(i32 %a0) {
967; SSE2-SSSE3-LABEL: ext_i32_32i8:
968; SSE2-SSSE3: # BB#0:
969; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
970; SSE2-SSSE3-NEXT: shrl $16, %edi
971; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
972; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
973; SSE2-SSSE3-NEXT: movl %eax, %ecx
974; SSE2-SSSE3-NEXT: shrl $7, %ecx
975; SSE2-SSSE3-NEXT: andl $1, %ecx
976; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
977; SSE2-SSSE3-NEXT: movl %eax, %ecx
978; SSE2-SSSE3-NEXT: shrl $6, %ecx
979; SSE2-SSSE3-NEXT: andl $1, %ecx
980; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
981; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
982; SSE2-SSSE3-NEXT: movl %eax, %ecx
983; SSE2-SSSE3-NEXT: shrl $5, %ecx
984; SSE2-SSSE3-NEXT: andl $1, %ecx
985; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
986; SSE2-SSSE3-NEXT: movl %eax, %ecx
987; SSE2-SSSE3-NEXT: shrl $4, %ecx
988; SSE2-SSSE3-NEXT: andl $1, %ecx
989; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
990; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
991; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
992; SSE2-SSSE3-NEXT: movl %eax, %ecx
993; SSE2-SSSE3-NEXT: shrl $3, %ecx
994; SSE2-SSSE3-NEXT: andl $1, %ecx
995; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
996; SSE2-SSSE3-NEXT: movl %eax, %ecx
997; SSE2-SSSE3-NEXT: shrl $2, %ecx
998; SSE2-SSSE3-NEXT: andl $1, %ecx
999; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1000; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1001; SSE2-SSSE3-NEXT: movl %eax, %ecx
1002; SSE2-SSSE3-NEXT: andl $1, %ecx
1003; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1004; SSE2-SSSE3-NEXT: movl %eax, %ecx
1005; SSE2-SSSE3-NEXT: shrl %ecx
1006; SSE2-SSSE3-NEXT: andl $1, %ecx
1007; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1008; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
1009; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1010; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1011; SSE2-SSSE3-NEXT: movl %eax, %ecx
1012; SSE2-SSSE3-NEXT: shrl $11, %ecx
1013; SSE2-SSSE3-NEXT: andl $1, %ecx
1014; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1015; SSE2-SSSE3-NEXT: movl %eax, %ecx
1016; SSE2-SSSE3-NEXT: shrl $10, %ecx
1017; SSE2-SSSE3-NEXT: andl $1, %ecx
1018; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1019; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
1020; SSE2-SSSE3-NEXT: movl %eax, %ecx
1021; SSE2-SSSE3-NEXT: shrl $9, %ecx
1022; SSE2-SSSE3-NEXT: andl $1, %ecx
1023; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1024; SSE2-SSSE3-NEXT: movl %eax, %ecx
1025; SSE2-SSSE3-NEXT: shrl $8, %ecx
1026; SSE2-SSSE3-NEXT: andl $1, %ecx
1027; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1028; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
1029; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1030; SSE2-SSSE3-NEXT: movl %eax, %ecx
1031; SSE2-SSSE3-NEXT: shrl $13, %ecx
1032; SSE2-SSSE3-NEXT: andl $1, %ecx
1033; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1034; SSE2-SSSE3-NEXT: movl %eax, %ecx
1035; SSE2-SSSE3-NEXT: shrl $12, %ecx
1036; SSE2-SSSE3-NEXT: andl $1, %ecx
1037; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1038; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1039; SSE2-SSSE3-NEXT: movl %eax, %ecx
1040; SSE2-SSSE3-NEXT: shrl $14, %ecx
1041; SSE2-SSSE3-NEXT: andl $1, %ecx
1042; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1043; SSE2-SSSE3-NEXT: shrl $15, %eax
1044; SSE2-SSSE3-NEXT: movzwl %ax, %eax
1045; SSE2-SSSE3-NEXT: movd %eax, %xmm4
1046; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
1047; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1048; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1049; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1050; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1051; SSE2-SSSE3-NEXT: movl %eax, %ecx
1052; SSE2-SSSE3-NEXT: shrl $7, %ecx
1053; SSE2-SSSE3-NEXT: andl $1, %ecx
1054; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1055; SSE2-SSSE3-NEXT: movl %eax, %ecx
1056; SSE2-SSSE3-NEXT: shrl $6, %ecx
1057; SSE2-SSSE3-NEXT: andl $1, %ecx
1058; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1059; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
1060; SSE2-SSSE3-NEXT: movl %eax, %ecx
1061; SSE2-SSSE3-NEXT: shrl $5, %ecx
1062; SSE2-SSSE3-NEXT: andl $1, %ecx
1063; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1064; SSE2-SSSE3-NEXT: movl %eax, %ecx
1065; SSE2-SSSE3-NEXT: shrl $4, %ecx
1066; SSE2-SSSE3-NEXT: andl $1, %ecx
1067; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1068; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
1069; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1070; SSE2-SSSE3-NEXT: movl %eax, %ecx
1071; SSE2-SSSE3-NEXT: shrl $3, %ecx
1072; SSE2-SSSE3-NEXT: andl $1, %ecx
1073; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1074; SSE2-SSSE3-NEXT: movl %eax, %ecx
1075; SSE2-SSSE3-NEXT: shrl $2, %ecx
1076; SSE2-SSSE3-NEXT: andl $1, %ecx
1077; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1078; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
1079; SSE2-SSSE3-NEXT: movl %eax, %ecx
1080; SSE2-SSSE3-NEXT: andl $1, %ecx
1081; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1082; SSE2-SSSE3-NEXT: movl %eax, %ecx
1083; SSE2-SSSE3-NEXT: shrl %ecx
1084; SSE2-SSSE3-NEXT: andl $1, %ecx
1085; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
1086; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
1087; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
1088; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1089; SSE2-SSSE3-NEXT: movl %eax, %ecx
1090; SSE2-SSSE3-NEXT: shrl $11, %ecx
1091; SSE2-SSSE3-NEXT: andl $1, %ecx
1092; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1093; SSE2-SSSE3-NEXT: movl %eax, %ecx
1094; SSE2-SSSE3-NEXT: shrl $10, %ecx
1095; SSE2-SSSE3-NEXT: andl $1, %ecx
1096; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1097; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1098; SSE2-SSSE3-NEXT: movl %eax, %ecx
1099; SSE2-SSSE3-NEXT: shrl $9, %ecx
1100; SSE2-SSSE3-NEXT: andl $1, %ecx
1101; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
1102; SSE2-SSSE3-NEXT: movl %eax, %ecx
1103; SSE2-SSSE3-NEXT: shrl $8, %ecx
1104; SSE2-SSSE3-NEXT: andl $1, %ecx
1105; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1106; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
1107; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
1108; SSE2-SSSE3-NEXT: movl %eax, %ecx
1109; SSE2-SSSE3-NEXT: shrl $13, %ecx
1110; SSE2-SSSE3-NEXT: andl $1, %ecx
1111; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1112; SSE2-SSSE3-NEXT: movl %eax, %ecx
1113; SSE2-SSSE3-NEXT: shrl $12, %ecx
1114; SSE2-SSSE3-NEXT: andl $1, %ecx
1115; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
1116; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
1117; SSE2-SSSE3-NEXT: movl %eax, %ecx
1118; SSE2-SSSE3-NEXT: shrl $14, %ecx
1119; SSE2-SSSE3-NEXT: andl $1, %ecx
1120; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1121; SSE2-SSSE3-NEXT: shrl $15, %eax
1122; SSE2-SSSE3-NEXT: movzwl %ax, %eax
1123; SSE2-SSSE3-NEXT: movd %eax, %xmm5
1124; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
1125; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
1126; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
1127; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
1128; SSE2-SSSE3-NEXT: retq
1129;
1130; AVX1-LABEL: ext_i32_32i8:
1131; AVX1: # BB#0:
1132; AVX1-NEXT: pushq %rbp
1133; AVX1-NEXT: .Lcfi0:
1134; AVX1-NEXT: .cfi_def_cfa_offset 16
1135; AVX1-NEXT: .Lcfi1:
1136; AVX1-NEXT: .cfi_offset %rbp, -16
1137; AVX1-NEXT: movq %rsp, %rbp
1138; AVX1-NEXT: .Lcfi2:
1139; AVX1-NEXT: .cfi_def_cfa_register %rbp
1140; AVX1-NEXT: andq $-32, %rsp
1141; AVX1-NEXT: subq $32, %rsp
1142; AVX1-NEXT: movl %edi, %eax
1143; AVX1-NEXT: shrl $17, %eax
1144; AVX1-NEXT: andl $1, %eax
1145; AVX1-NEXT: movl %edi, %ecx
1146; AVX1-NEXT: shrl $16, %ecx
1147; AVX1-NEXT: andl $1, %ecx
1148; AVX1-NEXT: vmovd %ecx, %xmm0
1149; AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
1150; AVX1-NEXT: movl %edi, %eax
1151; AVX1-NEXT: shrl $18, %eax
1152; AVX1-NEXT: andl $1, %eax
1153; AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1154; AVX1-NEXT: movl %edi, %eax
1155; AVX1-NEXT: shrl $19, %eax
1156; AVX1-NEXT: andl $1, %eax
1157; AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
1158; AVX1-NEXT: movl %edi, %eax
1159; AVX1-NEXT: shrl $20, %eax
1160; AVX1-NEXT: andl $1, %eax
1161; AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1162; AVX1-NEXT: movl %edi, %eax
1163; AVX1-NEXT: shrl $21, %eax
1164; AVX1-NEXT: andl $1, %eax
1165; AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
1166; AVX1-NEXT: movl %edi, %eax
1167; AVX1-NEXT: shrl $22, %eax
1168; AVX1-NEXT: andl $1, %eax
1169; AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1170; AVX1-NEXT: movl %edi, %eax
1171; AVX1-NEXT: shrl $23, %eax
1172; AVX1-NEXT: andl $1, %eax
1173; AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
1174; AVX1-NEXT: movl %edi, %eax
1175; AVX1-NEXT: shrl $24, %eax
1176; AVX1-NEXT: andl $1, %eax
1177; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
1178; AVX1-NEXT: movl %edi, %eax
1179; AVX1-NEXT: shrl $25, %eax
1180; AVX1-NEXT: andl $1, %eax
1181; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
1182; AVX1-NEXT: movl %edi, %eax
1183; AVX1-NEXT: shrl $26, %eax
1184; AVX1-NEXT: andl $1, %eax
1185; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1186; AVX1-NEXT: movl %edi, %eax
1187; AVX1-NEXT: shrl $27, %eax
1188; AVX1-NEXT: andl $1, %eax
1189; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
1190; AVX1-NEXT: movl %edi, %eax
1191; AVX1-NEXT: shrl $28, %eax
1192; AVX1-NEXT: andl $1, %eax
1193; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1194; AVX1-NEXT: movl %edi, %eax
1195; AVX1-NEXT: shrl $29, %eax
1196; AVX1-NEXT: andl $1, %eax
1197; AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
1198; AVX1-NEXT: movl %edi, %eax
1199; AVX1-NEXT: shrl $30, %eax
1200; AVX1-NEXT: andl $1, %eax
1201; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1202; AVX1-NEXT: movl %edi, %eax
1203; AVX1-NEXT: shrl $31, %eax
1204; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
1205; AVX1-NEXT: movl %edi, %eax
1206; AVX1-NEXT: shrl %eax
1207; AVX1-NEXT: andl $1, %eax
1208; AVX1-NEXT: movl %edi, %ecx
1209; AVX1-NEXT: andl $1, %ecx
1210; AVX1-NEXT: vmovd %ecx, %xmm1
1211; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
1212; AVX1-NEXT: movl %edi, %eax
1213; AVX1-NEXT: shrl $2, %eax
1214; AVX1-NEXT: andl $1, %eax
1215; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
1216; AVX1-NEXT: movl %edi, %eax
1217; AVX1-NEXT: shrl $3, %eax
1218; AVX1-NEXT: andl $1, %eax
1219; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
1220; AVX1-NEXT: movl %edi, %eax
1221; AVX1-NEXT: shrl $4, %eax
1222; AVX1-NEXT: andl $1, %eax
1223; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
1224; AVX1-NEXT: movl %edi, %eax
1225; AVX1-NEXT: shrl $5, %eax
1226; AVX1-NEXT: andl $1, %eax
1227; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
1228; AVX1-NEXT: movl %edi, %eax
1229; AVX1-NEXT: shrl $6, %eax
1230; AVX1-NEXT: andl $1, %eax
1231; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
1232; AVX1-NEXT: movl %edi, %eax
1233; AVX1-NEXT: shrl $7, %eax
1234; AVX1-NEXT: andl $1, %eax
1235; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
1236; AVX1-NEXT: movl %edi, %eax
1237; AVX1-NEXT: shrl $8, %eax
1238; AVX1-NEXT: andl $1, %eax
1239; AVX1-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
1240; AVX1-NEXT: movl %edi, %eax
1241; AVX1-NEXT: shrl $9, %eax
1242; AVX1-NEXT: andl $1, %eax
1243; AVX1-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
1244; AVX1-NEXT: movl %edi, %eax
1245; AVX1-NEXT: shrl $10, %eax
1246; AVX1-NEXT: andl $1, %eax
1247; AVX1-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
1248; AVX1-NEXT: movl %edi, %eax
1249; AVX1-NEXT: shrl $11, %eax
1250; AVX1-NEXT: andl $1, %eax
1251; AVX1-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
1252; AVX1-NEXT: movl %edi, %eax
1253; AVX1-NEXT: shrl $12, %eax
1254; AVX1-NEXT: andl $1, %eax
1255; AVX1-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
1256; AVX1-NEXT: movl %edi, %eax
1257; AVX1-NEXT: shrl $13, %eax
1258; AVX1-NEXT: andl $1, %eax
1259; AVX1-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
1260; AVX1-NEXT: movl %edi, %eax
1261; AVX1-NEXT: shrl $14, %eax
1262; AVX1-NEXT: andl $1, %eax
1263; AVX1-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
1264; AVX1-NEXT: shrl $15, %edi
1265; AVX1-NEXT: andl $1, %edi
1266; AVX1-NEXT: vpinsrb $15, %edi, %xmm1, %xmm1
1267; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1268; AVX1-NEXT: movq %rbp, %rsp
1269; AVX1-NEXT: popq %rbp
1270; AVX1-NEXT: retq
1271;
1272; AVX2-LABEL: ext_i32_32i8:
1273; AVX2: # BB#0:
1274; AVX2-NEXT: pushq %rbp
1275; AVX2-NEXT: .Lcfi0:
1276; AVX2-NEXT: .cfi_def_cfa_offset 16
1277; AVX2-NEXT: .Lcfi1:
1278; AVX2-NEXT: .cfi_offset %rbp, -16
1279; AVX2-NEXT: movq %rsp, %rbp
1280; AVX2-NEXT: .Lcfi2:
1281; AVX2-NEXT: .cfi_def_cfa_register %rbp
1282; AVX2-NEXT: andq $-32, %rsp
1283; AVX2-NEXT: subq $32, %rsp
1284; AVX2-NEXT: movl %edi, %eax
1285; AVX2-NEXT: shrl $17, %eax
1286; AVX2-NEXT: andl $1, %eax
1287; AVX2-NEXT: movl %edi, %ecx
1288; AVX2-NEXT: shrl $16, %ecx
1289; AVX2-NEXT: andl $1, %ecx
1290; AVX2-NEXT: vmovd %ecx, %xmm0
1291; AVX2-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
1292; AVX2-NEXT: movl %edi, %eax
1293; AVX2-NEXT: shrl $18, %eax
1294; AVX2-NEXT: andl $1, %eax
1295; AVX2-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1296; AVX2-NEXT: movl %edi, %eax
1297; AVX2-NEXT: shrl $19, %eax
1298; AVX2-NEXT: andl $1, %eax
1299; AVX2-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
1300; AVX2-NEXT: movl %edi, %eax
1301; AVX2-NEXT: shrl $20, %eax
1302; AVX2-NEXT: andl $1, %eax
1303; AVX2-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1304; AVX2-NEXT: movl %edi, %eax
1305; AVX2-NEXT: shrl $21, %eax
1306; AVX2-NEXT: andl $1, %eax
1307; AVX2-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
1308; AVX2-NEXT: movl %edi, %eax
1309; AVX2-NEXT: shrl $22, %eax
1310; AVX2-NEXT: andl $1, %eax
1311; AVX2-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1312; AVX2-NEXT: movl %edi, %eax
1313; AVX2-NEXT: shrl $23, %eax
1314; AVX2-NEXT: andl $1, %eax
1315; AVX2-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
1316; AVX2-NEXT: movl %edi, %eax
1317; AVX2-NEXT: shrl $24, %eax
1318; AVX2-NEXT: andl $1, %eax
1319; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
1320; AVX2-NEXT: movl %edi, %eax
1321; AVX2-NEXT: shrl $25, %eax
1322; AVX2-NEXT: andl $1, %eax
1323; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
1324; AVX2-NEXT: movl %edi, %eax
1325; AVX2-NEXT: shrl $26, %eax
1326; AVX2-NEXT: andl $1, %eax
1327; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1328; AVX2-NEXT: movl %edi, %eax
1329; AVX2-NEXT: shrl $27, %eax
1330; AVX2-NEXT: andl $1, %eax
1331; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
1332; AVX2-NEXT: movl %edi, %eax
1333; AVX2-NEXT: shrl $28, %eax
1334; AVX2-NEXT: andl $1, %eax
1335; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1336; AVX2-NEXT: movl %edi, %eax
1337; AVX2-NEXT: shrl $29, %eax
1338; AVX2-NEXT: andl $1, %eax
1339; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
1340; AVX2-NEXT: movl %edi, %eax
1341; AVX2-NEXT: shrl $30, %eax
1342; AVX2-NEXT: andl $1, %eax
1343; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1344; AVX2-NEXT: movl %edi, %eax
1345; AVX2-NEXT: shrl $31, %eax
1346; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
1347; AVX2-NEXT: movl %edi, %eax
1348; AVX2-NEXT: shrl %eax
1349; AVX2-NEXT: andl $1, %eax
1350; AVX2-NEXT: movl %edi, %ecx
1351; AVX2-NEXT: andl $1, %ecx
1352; AVX2-NEXT: vmovd %ecx, %xmm1
1353; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
1354; AVX2-NEXT: movl %edi, %eax
1355; AVX2-NEXT: shrl $2, %eax
1356; AVX2-NEXT: andl $1, %eax
1357; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
1358; AVX2-NEXT: movl %edi, %eax
1359; AVX2-NEXT: shrl $3, %eax
1360; AVX2-NEXT: andl $1, %eax
1361; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
1362; AVX2-NEXT: movl %edi, %eax
1363; AVX2-NEXT: shrl $4, %eax
1364; AVX2-NEXT: andl $1, %eax
1365; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
1366; AVX2-NEXT: movl %edi, %eax
1367; AVX2-NEXT: shrl $5, %eax
1368; AVX2-NEXT: andl $1, %eax
1369; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
1370; AVX2-NEXT: movl %edi, %eax
1371; AVX2-NEXT: shrl $6, %eax
1372; AVX2-NEXT: andl $1, %eax
1373; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
1374; AVX2-NEXT: movl %edi, %eax
1375; AVX2-NEXT: shrl $7, %eax
1376; AVX2-NEXT: andl $1, %eax
1377; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
1378; AVX2-NEXT: movl %edi, %eax
1379; AVX2-NEXT: shrl $8, %eax
1380; AVX2-NEXT: andl $1, %eax
1381; AVX2-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
1382; AVX2-NEXT: movl %edi, %eax
1383; AVX2-NEXT: shrl $9, %eax
1384; AVX2-NEXT: andl $1, %eax
1385; AVX2-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
1386; AVX2-NEXT: movl %edi, %eax
1387; AVX2-NEXT: shrl $10, %eax
1388; AVX2-NEXT: andl $1, %eax
1389; AVX2-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
1390; AVX2-NEXT: movl %edi, %eax
1391; AVX2-NEXT: shrl $11, %eax
1392; AVX2-NEXT: andl $1, %eax
1393; AVX2-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
1394; AVX2-NEXT: movl %edi, %eax
1395; AVX2-NEXT: shrl $12, %eax
1396; AVX2-NEXT: andl $1, %eax
1397; AVX2-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
1398; AVX2-NEXT: movl %edi, %eax
1399; AVX2-NEXT: shrl $13, %eax
1400; AVX2-NEXT: andl $1, %eax
1401; AVX2-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
1402; AVX2-NEXT: movl %edi, %eax
1403; AVX2-NEXT: shrl $14, %eax
1404; AVX2-NEXT: andl $1, %eax
1405; AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
1406; AVX2-NEXT: shrl $15, %edi
1407; AVX2-NEXT: andl $1, %edi
1408; AVX2-NEXT: vpinsrb $15, %edi, %xmm1, %xmm1
1409; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1410; AVX2-NEXT: movq %rbp, %rsp
1411; AVX2-NEXT: popq %rbp
1412; AVX2-NEXT: retq
1413;
1414; AVX512-LABEL: ext_i32_32i8:
1415; AVX512: # BB#0:
1416; AVX512-NEXT: kmovd %edi, %k1
1417; AVX512-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
1418; AVX512-NEXT: retq
1419 %1 = bitcast i32 %a0 to <32 x i1>
1420 %2 = zext <32 x i1> %1 to <32 x i8>
1421 ret <32 x i8> %2
1422}
1423
1424;
1425; 512-bit vectors
1426;
1427
1428define <8 x i64> @ext_i8_8i64(i8 %a0) {
1429; SSE2-SSSE3-LABEL: ext_i8_8i64:
1430; SSE2-SSSE3: # BB#0:
1431; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1432; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1433; SSE2-SSSE3-NEXT: movl %eax, %ecx
1434; SSE2-SSSE3-NEXT: shrl $3, %ecx
1435; SSE2-SSSE3-NEXT: andl $1, %ecx
1436; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1437; SSE2-SSSE3-NEXT: movl %eax, %ecx
1438; SSE2-SSSE3-NEXT: shrl $2, %ecx
1439; SSE2-SSSE3-NEXT: andl $1, %ecx
1440; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1441; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1442; SSE2-SSSE3-NEXT: movl %eax, %ecx
1443; SSE2-SSSE3-NEXT: andl $1, %ecx
1444; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1445; SSE2-SSSE3-NEXT: movl %eax, %ecx
1446; SSE2-SSSE3-NEXT: shrl %ecx
1447; SSE2-SSSE3-NEXT: andl $1, %ecx
1448; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1449; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
1450; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1451; SSE2-SSSE3-NEXT: movl %eax, %ecx
1452; SSE2-SSSE3-NEXT: shrl $5, %ecx
1453; SSE2-SSSE3-NEXT: andl $1, %ecx
1454; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1455; SSE2-SSSE3-NEXT: movl %eax, %ecx
1456; SSE2-SSSE3-NEXT: shrl $4, %ecx
1457; SSE2-SSSE3-NEXT: andl $1, %ecx
1458; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1459; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1460; SSE2-SSSE3-NEXT: movl %eax, %ecx
1461; SSE2-SSSE3-NEXT: shrl $6, %ecx
1462; SSE2-SSSE3-NEXT: andl $1, %ecx
1463; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1464; SSE2-SSSE3-NEXT: shrl $7, %eax
1465; SSE2-SSSE3-NEXT: movzwl %ax, %eax
1466; SSE2-SSSE3-NEXT: movd %eax, %xmm2
1467; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1468; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1469; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
1470; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3]
1471; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
1472; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,1]
1473; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
1474; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3]
1475; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7]
1476; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
1477; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3]
1478; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7]
1479; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
1480; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3]
1481; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7]
1482; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
1483; SSE2-SSSE3-NEXT: retq
1484;
1485; AVX1-LABEL: ext_i8_8i64:
1486; AVX1: # BB#0:
1487; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1488; AVX1-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1489; AVX1-NEXT: movl %eax, %ecx
1490; AVX1-NEXT: shrl %ecx
1491; AVX1-NEXT: andl $1, %ecx
1492; AVX1-NEXT: movl %eax, %edx
1493; AVX1-NEXT: andl $1, %edx
1494; AVX1-NEXT: vmovd %edx, %xmm0
1495; AVX1-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
1496; AVX1-NEXT: movl %eax, %ecx
1497; AVX1-NEXT: shrl $2, %ecx
1498; AVX1-NEXT: andl $1, %ecx
1499; AVX1-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0
1500; AVX1-NEXT: movl %eax, %ecx
1501; AVX1-NEXT: shrl $3, %ecx
1502; AVX1-NEXT: andl $1, %ecx
1503; AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
1504; AVX1-NEXT: movl %eax, %ecx
1505; AVX1-NEXT: shrl $4, %ecx
1506; AVX1-NEXT: andl $1, %ecx
1507; AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
1508; AVX1-NEXT: movl %eax, %ecx
1509; AVX1-NEXT: shrl $5, %ecx
1510; AVX1-NEXT: andl $1, %ecx
1511; AVX1-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
1512; AVX1-NEXT: movl %eax, %ecx
1513; AVX1-NEXT: shrl $6, %ecx
1514; AVX1-NEXT: andl $1, %ecx
1515; AVX1-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1516; AVX1-NEXT: shrl $7, %eax
1517; AVX1-NEXT: movzwl %ax, %eax
1518; AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm1
1519; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1520; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
1521; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
1522; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1523; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1]
1524; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1525; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1526; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
1527; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1528; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
1529; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1530; AVX1-NEXT: retq
1531;
1532; AVX2-LABEL: ext_i8_8i64:
1533; AVX2: # BB#0:
1534; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1535; AVX2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1536; AVX2-NEXT: movl %eax, %ecx
1537; AVX2-NEXT: shrl %ecx
1538; AVX2-NEXT: andl $1, %ecx
1539; AVX2-NEXT: movl %eax, %edx
1540; AVX2-NEXT: andl $1, %edx
1541; AVX2-NEXT: vmovd %edx, %xmm0
1542; AVX2-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
1543; AVX2-NEXT: movl %eax, %ecx
1544; AVX2-NEXT: shrl $2, %ecx
1545; AVX2-NEXT: andl $1, %ecx
1546; AVX2-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0
1547; AVX2-NEXT: movl %eax, %ecx
1548; AVX2-NEXT: shrl $3, %ecx
1549; AVX2-NEXT: andl $1, %ecx
1550; AVX2-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
1551; AVX2-NEXT: movl %eax, %ecx
1552; AVX2-NEXT: shrl $4, %ecx
1553; AVX2-NEXT: andl $1, %ecx
1554; AVX2-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
1555; AVX2-NEXT: movl %eax, %ecx
1556; AVX2-NEXT: shrl $5, %ecx
1557; AVX2-NEXT: andl $1, %ecx
1558; AVX2-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
1559; AVX2-NEXT: movl %eax, %ecx
1560; AVX2-NEXT: shrl $6, %ecx
1561; AVX2-NEXT: andl $1, %ecx
1562; AVX2-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1563; AVX2-NEXT: shrl $7, %eax
1564; AVX2-NEXT: movzwl %ax, %eax
1565; AVX2-NEXT: vpinsrw $7, %eax, %xmm0, %xmm1
1566; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1567; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1568; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
1569; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
1570; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1571; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1572; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
1573; AVX2-NEXT: retq
1574;
1575; AVX512-LABEL: ext_i8_8i64:
1576; AVX512: # BB#0:
1577; AVX512-NEXT: kmovd %edi, %k1
1578; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
1579; AVX512-NEXT: retq
1580 %1 = bitcast i8 %a0 to <8 x i1>
1581 %2 = zext <8 x i1> %1 to <8 x i64>
1582 ret <8 x i64> %2
1583}
1584
1585define <16 x i32> @ext_i16_16i32(i16 %a0) {
1586; SSE2-SSSE3-LABEL: ext_i16_16i32:
1587; SSE2-SSSE3: # BB#0:
1588; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
1589; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1590; SSE2-SSSE3-NEXT: movl %eax, %ecx
1591; SSE2-SSSE3-NEXT: shrl $7, %ecx
1592; SSE2-SSSE3-NEXT: andl $1, %ecx
1593; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1594; SSE2-SSSE3-NEXT: movl %eax, %ecx
1595; SSE2-SSSE3-NEXT: shrl $6, %ecx
1596; SSE2-SSSE3-NEXT: andl $1, %ecx
1597; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1598; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1599; SSE2-SSSE3-NEXT: movl %eax, %ecx
1600; SSE2-SSSE3-NEXT: shrl $5, %ecx
1601; SSE2-SSSE3-NEXT: andl $1, %ecx
1602; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1603; SSE2-SSSE3-NEXT: movl %eax, %ecx
1604; SSE2-SSSE3-NEXT: shrl $4, %ecx
1605; SSE2-SSSE3-NEXT: andl $1, %ecx
1606; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1607; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
1608; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
1609; SSE2-SSSE3-NEXT: movl %eax, %ecx
1610; SSE2-SSSE3-NEXT: shrl $3, %ecx
1611; SSE2-SSSE3-NEXT: andl $1, %ecx
1612; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1613; SSE2-SSSE3-NEXT: movl %eax, %ecx
1614; SSE2-SSSE3-NEXT: shrl $2, %ecx
1615; SSE2-SSSE3-NEXT: andl $1, %ecx
1616; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1617; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1618; SSE2-SSSE3-NEXT: movl %eax, %ecx
1619; SSE2-SSSE3-NEXT: andl $1, %ecx
1620; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1621; SSE2-SSSE3-NEXT: movl %eax, %ecx
1622; SSE2-SSSE3-NEXT: shrl %ecx
1623; SSE2-SSSE3-NEXT: andl $1, %ecx
1624; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1625; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
1626; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
1627; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1628; SSE2-SSSE3-NEXT: movl %eax, %ecx
1629; SSE2-SSSE3-NEXT: shrl $11, %ecx
1630; SSE2-SSSE3-NEXT: andl $1, %ecx
1631; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1632; SSE2-SSSE3-NEXT: movl %eax, %ecx
1633; SSE2-SSSE3-NEXT: shrl $10, %ecx
1634; SSE2-SSSE3-NEXT: andl $1, %ecx
1635; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1636; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1637; SSE2-SSSE3-NEXT: movl %eax, %ecx
1638; SSE2-SSSE3-NEXT: shrl $9, %ecx
1639; SSE2-SSSE3-NEXT: andl $1, %ecx
1640; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1641; SSE2-SSSE3-NEXT: movl %eax, %ecx
1642; SSE2-SSSE3-NEXT: shrl $8, %ecx
1643; SSE2-SSSE3-NEXT: andl $1, %ecx
1644; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1645; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1646; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1647; SSE2-SSSE3-NEXT: movl %eax, %ecx
1648; SSE2-SSSE3-NEXT: shrl $13, %ecx
1649; SSE2-SSSE3-NEXT: andl $1, %ecx
1650; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1651; SSE2-SSSE3-NEXT: movl %eax, %ecx
1652; SSE2-SSSE3-NEXT: shrl $12, %ecx
1653; SSE2-SSSE3-NEXT: andl $1, %ecx
1654; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1655; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
1656; SSE2-SSSE3-NEXT: movl %eax, %ecx
1657; SSE2-SSSE3-NEXT: shrl $14, %ecx
1658; SSE2-SSSE3-NEXT: andl $1, %ecx
1659; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1660; SSE2-SSSE3-NEXT: shrl $15, %eax
1661; SSE2-SSSE3-NEXT: movzwl %ax, %eax
1662; SSE2-SSSE3-NEXT: movd %eax, %xmm4
1663; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
1664; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
1665; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1666; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
1667; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
1668; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1669; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
1670; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1671; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1]
1672; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
1673; SSE2-SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1674; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
1675; SSE2-SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
1676; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
1677; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
1678; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
1679; SSE2-SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
1680; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
1681; SSE2-SSSE3-NEXT: retq
1682;
1683; AVX1-LABEL: ext_i16_16i32:
1684; AVX1: # BB#0:
1685; AVX1-NEXT: movw %di, -{{[0-9]+}}(%rsp)
1686; AVX1-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1687; AVX1-NEXT: movl %eax, %ecx
1688; AVX1-NEXT: shrl %ecx
1689; AVX1-NEXT: andl $1, %ecx
1690; AVX1-NEXT: movl %eax, %edx
1691; AVX1-NEXT: andl $1, %edx
1692; AVX1-NEXT: vmovd %edx, %xmm0
1693; AVX1-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
1694; AVX1-NEXT: movl %eax, %ecx
1695; AVX1-NEXT: shrl $2, %ecx
1696; AVX1-NEXT: andl $1, %ecx
1697; AVX1-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
1698; AVX1-NEXT: movl %eax, %ecx
1699; AVX1-NEXT: shrl $3, %ecx
1700; AVX1-NEXT: andl $1, %ecx
1701; AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
1702; AVX1-NEXT: movl %eax, %ecx
1703; AVX1-NEXT: shrl $4, %ecx
1704; AVX1-NEXT: andl $1, %ecx
1705; AVX1-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
1706; AVX1-NEXT: movl %eax, %ecx
1707; AVX1-NEXT: shrl $5, %ecx
1708; AVX1-NEXT: andl $1, %ecx
1709; AVX1-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
1710; AVX1-NEXT: movl %eax, %ecx
1711; AVX1-NEXT: shrl $6, %ecx
1712; AVX1-NEXT: andl $1, %ecx
1713; AVX1-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
1714; AVX1-NEXT: movl %eax, %ecx
1715; AVX1-NEXT: shrl $7, %ecx
1716; AVX1-NEXT: andl $1, %ecx
1717; AVX1-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
1718; AVX1-NEXT: movl %eax, %ecx
1719; AVX1-NEXT: shrl $8, %ecx
1720; AVX1-NEXT: andl $1, %ecx
1721; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
1722; AVX1-NEXT: movl %eax, %ecx
1723; AVX1-NEXT: shrl $9, %ecx
1724; AVX1-NEXT: andl $1, %ecx
1725; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
1726; AVX1-NEXT: movl %eax, %ecx
1727; AVX1-NEXT: shrl $10, %ecx
1728; AVX1-NEXT: andl $1, %ecx
1729; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
1730; AVX1-NEXT: movl %eax, %ecx
1731; AVX1-NEXT: shrl $11, %ecx
1732; AVX1-NEXT: andl $1, %ecx
1733; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
1734; AVX1-NEXT: movl %eax, %ecx
1735; AVX1-NEXT: shrl $12, %ecx
1736; AVX1-NEXT: andl $1, %ecx
1737; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
1738; AVX1-NEXT: movl %eax, %ecx
1739; AVX1-NEXT: shrl $13, %ecx
1740; AVX1-NEXT: andl $1, %ecx
1741; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
1742; AVX1-NEXT: movl %eax, %ecx
1743; AVX1-NEXT: shrl $14, %ecx
1744; AVX1-NEXT: andl $1, %ecx
1745; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
1746; AVX1-NEXT: shrl $15, %eax
1747; AVX1-NEXT: movzwl %ax, %eax
1748; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
1749; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
1750; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4,4,5,5,6,6,7,7]
1751; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1752; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1753; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
1754; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1755; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1756; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1757; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1758; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
1759; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1760; AVX1-NEXT: retq
1761;
1762; AVX2-LABEL: ext_i16_16i32:
1763; AVX2: # BB#0:
1764; AVX2-NEXT: movw %di, -{{[0-9]+}}(%rsp)
1765; AVX2-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1766; AVX2-NEXT: movl %eax, %ecx
1767; AVX2-NEXT: shrl %ecx
1768; AVX2-NEXT: andl $1, %ecx
1769; AVX2-NEXT: movl %eax, %edx
1770; AVX2-NEXT: andl $1, %edx
1771; AVX2-NEXT: vmovd %edx, %xmm0
1772; AVX2-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
1773; AVX2-NEXT: movl %eax, %ecx
1774; AVX2-NEXT: shrl $2, %ecx
1775; AVX2-NEXT: andl $1, %ecx
1776; AVX2-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
1777; AVX2-NEXT: movl %eax, %ecx
1778; AVX2-NEXT: shrl $3, %ecx
1779; AVX2-NEXT: andl $1, %ecx
1780; AVX2-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
1781; AVX2-NEXT: movl %eax, %ecx
1782; AVX2-NEXT: shrl $4, %ecx
1783; AVX2-NEXT: andl $1, %ecx
1784; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
1785; AVX2-NEXT: movl %eax, %ecx
1786; AVX2-NEXT: shrl $5, %ecx
1787; AVX2-NEXT: andl $1, %ecx
1788; AVX2-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
1789; AVX2-NEXT: movl %eax, %ecx
1790; AVX2-NEXT: shrl $6, %ecx
1791; AVX2-NEXT: andl $1, %ecx
1792; AVX2-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
1793; AVX2-NEXT: movl %eax, %ecx
1794; AVX2-NEXT: shrl $7, %ecx
1795; AVX2-NEXT: andl $1, %ecx
1796; AVX2-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
1797; AVX2-NEXT: movl %eax, %ecx
1798; AVX2-NEXT: shrl $8, %ecx
1799; AVX2-NEXT: andl $1, %ecx
1800; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
1801; AVX2-NEXT: movl %eax, %ecx
1802; AVX2-NEXT: shrl $9, %ecx
1803; AVX2-NEXT: andl $1, %ecx
1804; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
1805; AVX2-NEXT: movl %eax, %ecx
1806; AVX2-NEXT: shrl $10, %ecx
1807; AVX2-NEXT: andl $1, %ecx
1808; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
1809; AVX2-NEXT: movl %eax, %ecx
1810; AVX2-NEXT: shrl $11, %ecx
1811; AVX2-NEXT: andl $1, %ecx
1812; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
1813; AVX2-NEXT: movl %eax, %ecx
1814; AVX2-NEXT: shrl $12, %ecx
1815; AVX2-NEXT: andl $1, %ecx
1816; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
1817; AVX2-NEXT: movl %eax, %ecx
1818; AVX2-NEXT: shrl $13, %ecx
1819; AVX2-NEXT: andl $1, %ecx
1820; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
1821; AVX2-NEXT: movl %eax, %ecx
1822; AVX2-NEXT: shrl $14, %ecx
1823; AVX2-NEXT: andl $1, %ecx
1824; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
1825; AVX2-NEXT: shrl $15, %eax
1826; AVX2-NEXT: movzwl %ax, %eax
1827; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
1828; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
1829; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1830; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1]
1831; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
1832; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1833; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
1834; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
1835; AVX2-NEXT: retq
1836;
1837; AVX512-LABEL: ext_i16_16i32:
1838; AVX512: # BB#0:
1839; AVX512-NEXT: kmovd %edi, %k1
1840; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1841; AVX512-NEXT: retq
1842 %1 = bitcast i16 %a0 to <16 x i1>
1843 %2 = zext <16 x i1> %1 to <16 x i32>
1844 ret <16 x i32> %2
1845}
1846
1847define <32 x i16> @ext_i32_32i16(i32 %a0) {
1848; SSE2-SSSE3-LABEL: ext_i32_32i16:
1849; SSE2-SSSE3: # BB#0:
1850; SSE2-SSSE3-NEXT: movl %edi, %eax
1851; SSE2-SSSE3-NEXT: shrl $16, %eax
1852; SSE2-SSSE3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
1853; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
1854; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1855; SSE2-SSSE3-NEXT: movl %eax, %ecx
1856; SSE2-SSSE3-NEXT: shrl $7, %ecx
1857; SSE2-SSSE3-NEXT: andl $1, %ecx
1858; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1859; SSE2-SSSE3-NEXT: movl %eax, %ecx
1860; SSE2-SSSE3-NEXT: shrl $6, %ecx
1861; SSE2-SSSE3-NEXT: andl $1, %ecx
1862; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1863; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1864; SSE2-SSSE3-NEXT: movl %eax, %ecx
1865; SSE2-SSSE3-NEXT: shrl $5, %ecx
1866; SSE2-SSSE3-NEXT: andl $1, %ecx
1867; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1868; SSE2-SSSE3-NEXT: movl %eax, %ecx
1869; SSE2-SSSE3-NEXT: shrl $4, %ecx
1870; SSE2-SSSE3-NEXT: andl $1, %ecx
1871; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1872; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
1873; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
1874; SSE2-SSSE3-NEXT: movl %eax, %ecx
1875; SSE2-SSSE3-NEXT: shrl $3, %ecx
1876; SSE2-SSSE3-NEXT: andl $1, %ecx
1877; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1878; SSE2-SSSE3-NEXT: movl %eax, %ecx
1879; SSE2-SSSE3-NEXT: shrl $2, %ecx
1880; SSE2-SSSE3-NEXT: andl $1, %ecx
1881; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1882; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1883; SSE2-SSSE3-NEXT: movl %eax, %ecx
1884; SSE2-SSSE3-NEXT: andl $1, %ecx
1885; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1886; SSE2-SSSE3-NEXT: movl %eax, %ecx
1887; SSE2-SSSE3-NEXT: shrl %ecx
1888; SSE2-SSSE3-NEXT: andl $1, %ecx
1889; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1890; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
1891; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
1892; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1893; SSE2-SSSE3-NEXT: movl %eax, %ecx
1894; SSE2-SSSE3-NEXT: shrl $11, %ecx
1895; SSE2-SSSE3-NEXT: andl $1, %ecx
1896; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1897; SSE2-SSSE3-NEXT: movl %eax, %ecx
1898; SSE2-SSSE3-NEXT: shrl $10, %ecx
1899; SSE2-SSSE3-NEXT: andl $1, %ecx
1900; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1901; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1902; SSE2-SSSE3-NEXT: movl %eax, %ecx
1903; SSE2-SSSE3-NEXT: shrl $9, %ecx
1904; SSE2-SSSE3-NEXT: andl $1, %ecx
1905; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1906; SSE2-SSSE3-NEXT: movl %eax, %ecx
1907; SSE2-SSSE3-NEXT: shrl $8, %ecx
1908; SSE2-SSSE3-NEXT: andl $1, %ecx
1909; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1910; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1911; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1912; SSE2-SSSE3-NEXT: movl %eax, %ecx
1913; SSE2-SSSE3-NEXT: shrl $13, %ecx
1914; SSE2-SSSE3-NEXT: andl $1, %ecx
1915; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1916; SSE2-SSSE3-NEXT: movl %eax, %ecx
1917; SSE2-SSSE3-NEXT: shrl $12, %ecx
1918; SSE2-SSSE3-NEXT: andl $1, %ecx
1919; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1920; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
1921; SSE2-SSSE3-NEXT: movl %eax, %ecx
1922; SSE2-SSSE3-NEXT: shrl $14, %ecx
1923; SSE2-SSSE3-NEXT: andl $1, %ecx
1924; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1925; SSE2-SSSE3-NEXT: shrl $15, %eax
1926; SSE2-SSSE3-NEXT: movzwl %ax, %eax
1927; SSE2-SSSE3-NEXT: movd %eax, %xmm4
1928; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
1929; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
1930; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1931; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
1932; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1933; SSE2-SSSE3-NEXT: movl %eax, %ecx
1934; SSE2-SSSE3-NEXT: shrl $7, %ecx
1935; SSE2-SSSE3-NEXT: andl $1, %ecx
1936; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1937; SSE2-SSSE3-NEXT: movl %eax, %ecx
1938; SSE2-SSSE3-NEXT: shrl $6, %ecx
1939; SSE2-SSSE3-NEXT: andl $1, %ecx
1940; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1941; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1942; SSE2-SSSE3-NEXT: movl %eax, %ecx
1943; SSE2-SSSE3-NEXT: shrl $5, %ecx
1944; SSE2-SSSE3-NEXT: andl $1, %ecx
1945; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1946; SSE2-SSSE3-NEXT: movl %eax, %ecx
1947; SSE2-SSSE3-NEXT: shrl $4, %ecx
1948; SSE2-SSSE3-NEXT: andl $1, %ecx
1949; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1950; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
1951; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
1952; SSE2-SSSE3-NEXT: movl %eax, %ecx
1953; SSE2-SSSE3-NEXT: shrl $3, %ecx
1954; SSE2-SSSE3-NEXT: andl $1, %ecx
1955; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1956; SSE2-SSSE3-NEXT: movl %eax, %ecx
1957; SSE2-SSSE3-NEXT: shrl $2, %ecx
1958; SSE2-SSSE3-NEXT: andl $1, %ecx
1959; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
1960; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
1961; SSE2-SSSE3-NEXT: movl %eax, %ecx
1962; SSE2-SSSE3-NEXT: andl $1, %ecx
1963; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1964; SSE2-SSSE3-NEXT: movl %eax, %ecx
1965; SSE2-SSSE3-NEXT: shrl %ecx
1966; SSE2-SSSE3-NEXT: andl $1, %ecx
1967; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1968; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1969; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1970; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1971; SSE2-SSSE3-NEXT: movl %eax, %ecx
1972; SSE2-SSSE3-NEXT: shrl $11, %ecx
1973; SSE2-SSSE3-NEXT: andl $1, %ecx
1974; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1975; SSE2-SSSE3-NEXT: movl %eax, %ecx
1976; SSE2-SSSE3-NEXT: shrl $10, %ecx
1977; SSE2-SSSE3-NEXT: andl $1, %ecx
1978; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1979; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
1980; SSE2-SSSE3-NEXT: movl %eax, %ecx
1981; SSE2-SSSE3-NEXT: shrl $9, %ecx
1982; SSE2-SSSE3-NEXT: andl $1, %ecx
1983; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
1984; SSE2-SSSE3-NEXT: movl %eax, %ecx
1985; SSE2-SSSE3-NEXT: shrl $8, %ecx
1986; SSE2-SSSE3-NEXT: andl $1, %ecx
1987; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1988; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
1989; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1990; SSE2-SSSE3-NEXT: movl %eax, %ecx
1991; SSE2-SSSE3-NEXT: shrl $13, %ecx
1992; SSE2-SSSE3-NEXT: andl $1, %ecx
1993; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1994; SSE2-SSSE3-NEXT: movl %eax, %ecx
1995; SSE2-SSSE3-NEXT: shrl $12, %ecx
1996; SSE2-SSSE3-NEXT: andl $1, %ecx
1997; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
1998; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
1999; SSE2-SSSE3-NEXT: movl %eax, %ecx
2000; SSE2-SSSE3-NEXT: shrl $14, %ecx
2001; SSE2-SSSE3-NEXT: andl $1, %ecx
2002; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2003; SSE2-SSSE3-NEXT: shrl $15, %eax
2004; SSE2-SSSE3-NEXT: movzwl %ax, %eax
2005; SSE2-SSSE3-NEXT: movd %eax, %xmm5
2006; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
2007; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2008; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
2009; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2010; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
2011; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2012; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1]
2013; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
2014; SSE2-SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
2015; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
2016; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
2017; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2018; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
2019; SSE2-SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
2020; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
2021; SSE2-SSSE3-NEXT: retq
2022;
2023; AVX1-LABEL: ext_i32_32i16:
2024; AVX1: # BB#0:
2025; AVX1-NEXT: pushq %rbp
2026; AVX1-NEXT: .Lcfi3:
2027; AVX1-NEXT: .cfi_def_cfa_offset 16
2028; AVX1-NEXT: .Lcfi4:
2029; AVX1-NEXT: .cfi_offset %rbp, -16
2030; AVX1-NEXT: movq %rsp, %rbp
2031; AVX1-NEXT: .Lcfi5:
2032; AVX1-NEXT: .cfi_def_cfa_register %rbp
2033; AVX1-NEXT: pushq %r15
2034; AVX1-NEXT: pushq %r14
2035; AVX1-NEXT: pushq %r13
2036; AVX1-NEXT: pushq %r12
2037; AVX1-NEXT: pushq %rbx
2038; AVX1-NEXT: andq $-32, %rsp
2039; AVX1-NEXT: subq $128, %rsp
2040; AVX1-NEXT: .Lcfi6:
2041; AVX1-NEXT: .cfi_offset %rbx, -56
2042; AVX1-NEXT: .Lcfi7:
2043; AVX1-NEXT: .cfi_offset %r12, -48
2044; AVX1-NEXT: .Lcfi8:
2045; AVX1-NEXT: .cfi_offset %r13, -40
2046; AVX1-NEXT: .Lcfi9:
2047; AVX1-NEXT: .cfi_offset %r14, -32
2048; AVX1-NEXT: .Lcfi10:
2049; AVX1-NEXT: .cfi_offset %r15, -24
2050; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2051; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2052; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2053; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2054; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2055; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2056; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2057; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2058; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2059; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2060; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2061; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2062; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2063; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2064; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2065; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2066; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2067; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2068; AVX1-NEXT: movl %edi, %r13d
2069; AVX1-NEXT: movl %edi, %r12d
2070; AVX1-NEXT: movl %edi, %r15d
2071; AVX1-NEXT: movl %edi, %r14d
2072; AVX1-NEXT: movl %edi, %ebx
2073; AVX1-NEXT: movl %edi, %r11d
2074; AVX1-NEXT: movl %edi, %r10d
2075; AVX1-NEXT: movl %edi, %r9d
2076; AVX1-NEXT: movl %edi, %r8d
2077; AVX1-NEXT: movl %edi, %esi
2078; AVX1-NEXT: movl %edi, %edx
2079; AVX1-NEXT: movl %edi, %ecx
2080; AVX1-NEXT: movl %edi, %eax
2081; AVX1-NEXT: andl $1, %edi
2082; AVX1-NEXT: vmovd %edi, %xmm0
2083; AVX1-NEXT: shrl %eax
2084; AVX1-NEXT: andl $1, %eax
2085; AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
2086; AVX1-NEXT: shrl $2, %ecx
2087; AVX1-NEXT: andl $1, %ecx
2088; AVX1-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2089; AVX1-NEXT: shrl $3, %edx
2090; AVX1-NEXT: andl $1, %edx
2091; AVX1-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
2092; AVX1-NEXT: shrl $4, %esi
2093; AVX1-NEXT: andl $1, %esi
2094; AVX1-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
2095; AVX1-NEXT: shrl $5, %r8d
2096; AVX1-NEXT: andl $1, %r8d
2097; AVX1-NEXT: vpinsrb $5, %r8d, %xmm0, %xmm0
2098; AVX1-NEXT: shrl $6, %r9d
2099; AVX1-NEXT: andl $1, %r9d
2100; AVX1-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
2101; AVX1-NEXT: shrl $7, %r10d
2102; AVX1-NEXT: andl $1, %r10d
2103; AVX1-NEXT: vpinsrb $7, %r10d, %xmm0, %xmm0
2104; AVX1-NEXT: shrl $8, %r11d
2105; AVX1-NEXT: andl $1, %r11d
2106; AVX1-NEXT: vpinsrb $8, %r11d, %xmm0, %xmm0
2107; AVX1-NEXT: shrl $9, %ebx
2108; AVX1-NEXT: andl $1, %ebx
2109; AVX1-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
2110; AVX1-NEXT: shrl $10, %r14d
2111; AVX1-NEXT: andl $1, %r14d
2112; AVX1-NEXT: vpinsrb $10, %r14d, %xmm0, %xmm0
2113; AVX1-NEXT: shrl $11, %r15d
2114; AVX1-NEXT: andl $1, %r15d
2115; AVX1-NEXT: vpinsrb $11, %r15d, %xmm0, %xmm0
2116; AVX1-NEXT: shrl $12, %r12d
2117; AVX1-NEXT: andl $1, %r12d
2118; AVX1-NEXT: vpinsrb $12, %r12d, %xmm0, %xmm0
2119; AVX1-NEXT: shrl $13, %r13d
2120; AVX1-NEXT: andl $1, %r13d
2121; AVX1-NEXT: vpinsrb $13, %r13d, %xmm0, %xmm0
2122; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2123; AVX1-NEXT: shrl $14, %eax
2124; AVX1-NEXT: andl $1, %eax
2125; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
2126; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2127; AVX1-NEXT: shrl $15, %eax
2128; AVX1-NEXT: andl $1, %eax
2129; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
2130; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2131; AVX1-NEXT: shrl $16, %eax
2132; AVX1-NEXT: andl $1, %eax
2133; AVX1-NEXT: vmovd %eax, %xmm1
2134; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2135; AVX1-NEXT: shrl $17, %eax
2136; AVX1-NEXT: andl $1, %eax
2137; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
2138; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2139; AVX1-NEXT: shrl $18, %eax
2140; AVX1-NEXT: andl $1, %eax
2141; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
2142; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2143; AVX1-NEXT: shrl $19, %eax
2144; AVX1-NEXT: andl $1, %eax
2145; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
2146; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2147; AVX1-NEXT: shrl $20, %eax
2148; AVX1-NEXT: andl $1, %eax
2149; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
2150; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2151; AVX1-NEXT: shrl $21, %eax
2152; AVX1-NEXT: andl $1, %eax
2153; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
2154; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2155; AVX1-NEXT: shrl $22, %eax
2156; AVX1-NEXT: andl $1, %eax
2157; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
2158; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2159; AVX1-NEXT: shrl $23, %eax
2160; AVX1-NEXT: andl $1, %eax
2161; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
2162; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2163; AVX1-NEXT: shrl $24, %eax
2164; AVX1-NEXT: andl $1, %eax
2165; AVX1-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
2166; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2167; AVX1-NEXT: shrl $25, %eax
2168; AVX1-NEXT: andl $1, %eax
2169; AVX1-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
2170; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2171; AVX1-NEXT: shrl $26, %eax
2172; AVX1-NEXT: andl $1, %eax
2173; AVX1-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
2174; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2175; AVX1-NEXT: shrl $27, %eax
2176; AVX1-NEXT: andl $1, %eax
2177; AVX1-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
2178; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2179; AVX1-NEXT: shrl $28, %eax
2180; AVX1-NEXT: andl $1, %eax
2181; AVX1-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
2182; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2183; AVX1-NEXT: shrl $29, %eax
2184; AVX1-NEXT: andl $1, %eax
2185; AVX1-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
2186; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2187; AVX1-NEXT: shrl $30, %eax
2188; AVX1-NEXT: andl $1, %eax
2189; AVX1-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
2190; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2191; AVX1-NEXT: shrl $31, %eax
2192; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
2193; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2194; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2195; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2196; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2197; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
2198; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
2199; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2200; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
2201; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
2202; AVX1-NEXT: leaq -40(%rbp), %rsp
2203; AVX1-NEXT: popq %rbx
2204; AVX1-NEXT: popq %r12
2205; AVX1-NEXT: popq %r13
2206; AVX1-NEXT: popq %r14
2207; AVX1-NEXT: popq %r15
2208; AVX1-NEXT: popq %rbp
2209; AVX1-NEXT: retq
2210;
2211; AVX2-LABEL: ext_i32_32i16:
2212; AVX2: # BB#0:
2213; AVX2-NEXT: pushq %rbp
2214; AVX2-NEXT: .Lcfi3:
2215; AVX2-NEXT: .cfi_def_cfa_offset 16
2216; AVX2-NEXT: .Lcfi4:
2217; AVX2-NEXT: .cfi_offset %rbp, -16
2218; AVX2-NEXT: movq %rsp, %rbp
2219; AVX2-NEXT: .Lcfi5:
2220; AVX2-NEXT: .cfi_def_cfa_register %rbp
2221; AVX2-NEXT: pushq %r15
2222; AVX2-NEXT: pushq %r14
2223; AVX2-NEXT: pushq %r13
2224; AVX2-NEXT: pushq %r12
2225; AVX2-NEXT: pushq %rbx
2226; AVX2-NEXT: andq $-32, %rsp
2227; AVX2-NEXT: subq $128, %rsp
2228; AVX2-NEXT: .Lcfi6:
2229; AVX2-NEXT: .cfi_offset %rbx, -56
2230; AVX2-NEXT: .Lcfi7:
2231; AVX2-NEXT: .cfi_offset %r12, -48
2232; AVX2-NEXT: .Lcfi8:
2233; AVX2-NEXT: .cfi_offset %r13, -40
2234; AVX2-NEXT: .Lcfi9:
2235; AVX2-NEXT: .cfi_offset %r14, -32
2236; AVX2-NEXT: .Lcfi10:
2237; AVX2-NEXT: .cfi_offset %r15, -24
2238; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2239; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2240; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2241; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2242; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2243; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2244; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2245; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2246; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2247; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2248; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2249; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2250; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2251; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2252; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2253; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2254; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2255; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2256; AVX2-NEXT: movl %edi, %r13d
2257; AVX2-NEXT: movl %edi, %r12d
2258; AVX2-NEXT: movl %edi, %r15d
2259; AVX2-NEXT: movl %edi, %r14d
2260; AVX2-NEXT: movl %edi, %ebx
2261; AVX2-NEXT: movl %edi, %r11d
2262; AVX2-NEXT: movl %edi, %r10d
2263; AVX2-NEXT: movl %edi, %r9d
2264; AVX2-NEXT: movl %edi, %r8d
2265; AVX2-NEXT: movl %edi, %esi
2266; AVX2-NEXT: movl %edi, %edx
2267; AVX2-NEXT: movl %edi, %ecx
2268; AVX2-NEXT: movl %edi, %eax
2269; AVX2-NEXT: andl $1, %edi
2270; AVX2-NEXT: vmovd %edi, %xmm0
2271; AVX2-NEXT: shrl %eax
2272; AVX2-NEXT: andl $1, %eax
2273; AVX2-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
2274; AVX2-NEXT: shrl $2, %ecx
2275; AVX2-NEXT: andl $1, %ecx
2276; AVX2-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2277; AVX2-NEXT: shrl $3, %edx
2278; AVX2-NEXT: andl $1, %edx
2279; AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
2280; AVX2-NEXT: shrl $4, %esi
2281; AVX2-NEXT: andl $1, %esi
2282; AVX2-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
2283; AVX2-NEXT: shrl $5, %r8d
2284; AVX2-NEXT: andl $1, %r8d
2285; AVX2-NEXT: vpinsrb $5, %r8d, %xmm0, %xmm0
2286; AVX2-NEXT: shrl $6, %r9d
2287; AVX2-NEXT: andl $1, %r9d
2288; AVX2-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
2289; AVX2-NEXT: shrl $7, %r10d
2290; AVX2-NEXT: andl $1, %r10d
2291; AVX2-NEXT: vpinsrb $7, %r10d, %xmm0, %xmm0
2292; AVX2-NEXT: shrl $8, %r11d
2293; AVX2-NEXT: andl $1, %r11d
2294; AVX2-NEXT: vpinsrb $8, %r11d, %xmm0, %xmm0
2295; AVX2-NEXT: shrl $9, %ebx
2296; AVX2-NEXT: andl $1, %ebx
2297; AVX2-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
2298; AVX2-NEXT: shrl $10, %r14d
2299; AVX2-NEXT: andl $1, %r14d
2300; AVX2-NEXT: vpinsrb $10, %r14d, %xmm0, %xmm0
2301; AVX2-NEXT: shrl $11, %r15d
2302; AVX2-NEXT: andl $1, %r15d
2303; AVX2-NEXT: vpinsrb $11, %r15d, %xmm0, %xmm0
2304; AVX2-NEXT: shrl $12, %r12d
2305; AVX2-NEXT: andl $1, %r12d
2306; AVX2-NEXT: vpinsrb $12, %r12d, %xmm0, %xmm0
2307; AVX2-NEXT: shrl $13, %r13d
2308; AVX2-NEXT: andl $1, %r13d
2309; AVX2-NEXT: vpinsrb $13, %r13d, %xmm0, %xmm0
2310; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2311; AVX2-NEXT: shrl $14, %eax
2312; AVX2-NEXT: andl $1, %eax
2313; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
2314; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2315; AVX2-NEXT: shrl $15, %eax
2316; AVX2-NEXT: andl $1, %eax
2317; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
2318; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2319; AVX2-NEXT: shrl $16, %eax
2320; AVX2-NEXT: andl $1, %eax
2321; AVX2-NEXT: vmovd %eax, %xmm1
2322; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2323; AVX2-NEXT: shrl $17, %eax
2324; AVX2-NEXT: andl $1, %eax
2325; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
2326; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2327; AVX2-NEXT: shrl $18, %eax
2328; AVX2-NEXT: andl $1, %eax
2329; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
2330; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2331; AVX2-NEXT: shrl $19, %eax
2332; AVX2-NEXT: andl $1, %eax
2333; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
2334; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2335; AVX2-NEXT: shrl $20, %eax
2336; AVX2-NEXT: andl $1, %eax
2337; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
2338; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2339; AVX2-NEXT: shrl $21, %eax
2340; AVX2-NEXT: andl $1, %eax
2341; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
2342; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2343; AVX2-NEXT: shrl $22, %eax
2344; AVX2-NEXT: andl $1, %eax
2345; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
2346; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2347; AVX2-NEXT: shrl $23, %eax
2348; AVX2-NEXT: andl $1, %eax
2349; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
2350; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2351; AVX2-NEXT: shrl $24, %eax
2352; AVX2-NEXT: andl $1, %eax
2353; AVX2-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
2354; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2355; AVX2-NEXT: shrl $25, %eax
2356; AVX2-NEXT: andl $1, %eax
2357; AVX2-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
2358; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2359; AVX2-NEXT: shrl $26, %eax
2360; AVX2-NEXT: andl $1, %eax
2361; AVX2-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
2362; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2363; AVX2-NEXT: shrl $27, %eax
2364; AVX2-NEXT: andl $1, %eax
2365; AVX2-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
2366; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2367; AVX2-NEXT: shrl $28, %eax
2368; AVX2-NEXT: andl $1, %eax
2369; AVX2-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
2370; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2371; AVX2-NEXT: shrl $29, %eax
2372; AVX2-NEXT: andl $1, %eax
2373; AVX2-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
2374; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2375; AVX2-NEXT: shrl $30, %eax
2376; AVX2-NEXT: andl $1, %eax
2377; AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
2378; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2379; AVX2-NEXT: shrl $31, %eax
2380; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
2381; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2382; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
2383; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
2384; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
2385; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
2386; AVX2-NEXT: leaq -40(%rbp), %rsp
2387; AVX2-NEXT: popq %rbx
2388; AVX2-NEXT: popq %r12
2389; AVX2-NEXT: popq %r13
2390; AVX2-NEXT: popq %r14
2391; AVX2-NEXT: popq %r15
2392; AVX2-NEXT: popq %rbp
2393; AVX2-NEXT: retq
2394;
2395; AVX512-LABEL: ext_i32_32i16:
2396; AVX512: # BB#0:
2397; AVX512-NEXT: kmovd %edi, %k1
2398; AVX512-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
2399; AVX512-NEXT: retq
2400 %1 = bitcast i32 %a0 to <32 x i1>
2401 %2 = zext <32 x i1> %1 to <32 x i16>
2402 ret <32 x i16> %2
2403}
2404
2405define <64 x i8> @ext_i64_64i8(i64 %a0) {
2406; SSE2-SSSE3-LABEL: ext_i64_64i8:
2407; SSE2-SSSE3: # BB#0:
2408; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
2409; SSE2-SSSE3-NEXT: movq %rdi, %rax
2410; SSE2-SSSE3-NEXT: shrq $32, %rax
2411; SSE2-SSSE3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2412; SSE2-SSSE3-NEXT: movq %rdi, %rax
2413; SSE2-SSSE3-NEXT: shrq $48, %rax
2414; SSE2-SSSE3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2415; SSE2-SSSE3-NEXT: shrl $16, %edi
2416; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
2417; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2418; SSE2-SSSE3-NEXT: movl %eax, %ecx
2419; SSE2-SSSE3-NEXT: shrl $7, %ecx
2420; SSE2-SSSE3-NEXT: andl $1, %ecx
2421; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
2422; SSE2-SSSE3-NEXT: movl %eax, %ecx
2423; SSE2-SSSE3-NEXT: shrl $6, %ecx
2424; SSE2-SSSE3-NEXT: andl $1, %ecx
2425; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2426; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2427; SSE2-SSSE3-NEXT: movl %eax, %ecx
2428; SSE2-SSSE3-NEXT: shrl $5, %ecx
2429; SSE2-SSSE3-NEXT: andl $1, %ecx
2430; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
2431; SSE2-SSSE3-NEXT: movl %eax, %ecx
2432; SSE2-SSSE3-NEXT: shrl $4, %ecx
2433; SSE2-SSSE3-NEXT: andl $1, %ecx
2434; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2435; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2436; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2437; SSE2-SSSE3-NEXT: movl %eax, %ecx
2438; SSE2-SSSE3-NEXT: shrl $3, %ecx
2439; SSE2-SSSE3-NEXT: andl $1, %ecx
2440; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
2441; SSE2-SSSE3-NEXT: movl %eax, %ecx
2442; SSE2-SSSE3-NEXT: shrl $2, %ecx
2443; SSE2-SSSE3-NEXT: andl $1, %ecx
2444; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2445; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2446; SSE2-SSSE3-NEXT: movl %eax, %ecx
2447; SSE2-SSSE3-NEXT: andl $1, %ecx
2448; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
2449; SSE2-SSSE3-NEXT: movl %eax, %ecx
2450; SSE2-SSSE3-NEXT: shrl %ecx
2451; SSE2-SSSE3-NEXT: andl $1, %ecx
2452; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
2453; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
2454; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2455; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2456; SSE2-SSSE3-NEXT: movl %eax, %ecx
2457; SSE2-SSSE3-NEXT: shrl $11, %ecx
2458; SSE2-SSSE3-NEXT: andl $1, %ecx
2459; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2460; SSE2-SSSE3-NEXT: movl %eax, %ecx
2461; SSE2-SSSE3-NEXT: shrl $10, %ecx
2462; SSE2-SSSE3-NEXT: andl $1, %ecx
2463; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2464; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2465; SSE2-SSSE3-NEXT: movl %eax, %ecx
2466; SSE2-SSSE3-NEXT: shrl $9, %ecx
2467; SSE2-SSSE3-NEXT: andl $1, %ecx
2468; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
2469; SSE2-SSSE3-NEXT: movl %eax, %ecx
2470; SSE2-SSSE3-NEXT: shrl $8, %ecx
2471; SSE2-SSSE3-NEXT: andl $1, %ecx
2472; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2473; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
2474; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2475; SSE2-SSSE3-NEXT: movl %eax, %ecx
2476; SSE2-SSSE3-NEXT: shrl $13, %ecx
2477; SSE2-SSSE3-NEXT: andl $1, %ecx
2478; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2479; SSE2-SSSE3-NEXT: movl %eax, %ecx
2480; SSE2-SSSE3-NEXT: shrl $12, %ecx
2481; SSE2-SSSE3-NEXT: andl $1, %ecx
2482; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
2483; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2484; SSE2-SSSE3-NEXT: movl %eax, %ecx
2485; SSE2-SSSE3-NEXT: shrl $14, %ecx
2486; SSE2-SSSE3-NEXT: andl $1, %ecx
2487; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2488; SSE2-SSSE3-NEXT: shrl $15, %eax
2489; SSE2-SSSE3-NEXT: movzwl %ax, %eax
2490; SSE2-SSSE3-NEXT: movd %eax, %xmm4
2491; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
2492; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2493; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
2494; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2495; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2496; SSE2-SSSE3-NEXT: movl %eax, %ecx
2497; SSE2-SSSE3-NEXT: shrl $7, %ecx
2498; SSE2-SSSE3-NEXT: andl $1, %ecx
2499; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2500; SSE2-SSSE3-NEXT: movl %eax, %ecx
2501; SSE2-SSSE3-NEXT: shrl $6, %ecx
2502; SSE2-SSSE3-NEXT: andl $1, %ecx
2503; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2504; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2505; SSE2-SSSE3-NEXT: movl %eax, %ecx
2506; SSE2-SSSE3-NEXT: shrl $5, %ecx
2507; SSE2-SSSE3-NEXT: andl $1, %ecx
2508; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2509; SSE2-SSSE3-NEXT: movl %eax, %ecx
2510; SSE2-SSSE3-NEXT: shrl $4, %ecx
2511; SSE2-SSSE3-NEXT: andl $1, %ecx
2512; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
2513; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
2514; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
2515; SSE2-SSSE3-NEXT: movl %eax, %ecx
2516; SSE2-SSSE3-NEXT: shrl $3, %ecx
2517; SSE2-SSSE3-NEXT: andl $1, %ecx
2518; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2519; SSE2-SSSE3-NEXT: movl %eax, %ecx
2520; SSE2-SSSE3-NEXT: shrl $2, %ecx
2521; SSE2-SSSE3-NEXT: andl $1, %ecx
2522; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2523; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
2524; SSE2-SSSE3-NEXT: movl %eax, %ecx
2525; SSE2-SSSE3-NEXT: andl $1, %ecx
2526; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2527; SSE2-SSSE3-NEXT: movl %eax, %ecx
2528; SSE2-SSSE3-NEXT: shrl %ecx
2529; SSE2-SSSE3-NEXT: andl $1, %ecx
2530; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2531; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2532; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
2533; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2534; SSE2-SSSE3-NEXT: movl %eax, %ecx
2535; SSE2-SSSE3-NEXT: shrl $11, %ecx
2536; SSE2-SSSE3-NEXT: andl $1, %ecx
2537; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2538; SSE2-SSSE3-NEXT: movl %eax, %ecx
2539; SSE2-SSSE3-NEXT: shrl $10, %ecx
2540; SSE2-SSSE3-NEXT: andl $1, %ecx
2541; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
2542; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
2543; SSE2-SSSE3-NEXT: movl %eax, %ecx
2544; SSE2-SSSE3-NEXT: shrl $9, %ecx
2545; SSE2-SSSE3-NEXT: andl $1, %ecx
2546; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2547; SSE2-SSSE3-NEXT: movl %eax, %ecx
2548; SSE2-SSSE3-NEXT: shrl $8, %ecx
2549; SSE2-SSSE3-NEXT: andl $1, %ecx
2550; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2551; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
2552; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
2553; SSE2-SSSE3-NEXT: movl %eax, %ecx
2554; SSE2-SSSE3-NEXT: shrl $13, %ecx
2555; SSE2-SSSE3-NEXT: andl $1, %ecx
2556; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
2557; SSE2-SSSE3-NEXT: movl %eax, %ecx
2558; SSE2-SSSE3-NEXT: shrl $12, %ecx
2559; SSE2-SSSE3-NEXT: andl $1, %ecx
2560; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2561; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
2562; SSE2-SSSE3-NEXT: movl %eax, %ecx
2563; SSE2-SSSE3-NEXT: shrl $14, %ecx
2564; SSE2-SSSE3-NEXT: andl $1, %ecx
2565; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
2566; SSE2-SSSE3-NEXT: shrl $15, %eax
2567; SSE2-SSSE3-NEXT: movzwl %ax, %eax
2568; SSE2-SSSE3-NEXT: movd %eax, %xmm5
2569; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
2570; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2571; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
2572; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
2573; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2574; SSE2-SSSE3-NEXT: movl %eax, %ecx
2575; SSE2-SSSE3-NEXT: shrl $7, %ecx
2576; SSE2-SSSE3-NEXT: andl $1, %ecx
2577; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2578; SSE2-SSSE3-NEXT: movl %eax, %ecx
2579; SSE2-SSSE3-NEXT: shrl $6, %ecx
2580; SSE2-SSSE3-NEXT: andl $1, %ecx
2581; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
2582; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
2583; SSE2-SSSE3-NEXT: movl %eax, %ecx
2584; SSE2-SSSE3-NEXT: shrl $5, %ecx
2585; SSE2-SSSE3-NEXT: andl $1, %ecx
2586; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2587; SSE2-SSSE3-NEXT: movl %eax, %ecx
2588; SSE2-SSSE3-NEXT: shrl $4, %ecx
2589; SSE2-SSSE3-NEXT: andl $1, %ecx
2590; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2591; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
2592; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
2593; SSE2-SSSE3-NEXT: movl %eax, %ecx
2594; SSE2-SSSE3-NEXT: shrl $3, %ecx
2595; SSE2-SSSE3-NEXT: andl $1, %ecx
2596; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2597; SSE2-SSSE3-NEXT: movl %eax, %ecx
2598; SSE2-SSSE3-NEXT: shrl $2, %ecx
2599; SSE2-SSSE3-NEXT: andl $1, %ecx
2600; SSE2-SSSE3-NEXT: movd %ecx, %xmm5
2601; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
2602; SSE2-SSSE3-NEXT: movl %eax, %ecx
2603; SSE2-SSSE3-NEXT: andl $1, %ecx
2604; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
2605; SSE2-SSSE3-NEXT: movl %eax, %ecx
2606; SSE2-SSSE3-NEXT: shrl %ecx
2607; SSE2-SSSE3-NEXT: andl $1, %ecx
2608; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2609; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
2610; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3]
2611; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
2612; SSE2-SSSE3-NEXT: movl %eax, %ecx
2613; SSE2-SSSE3-NEXT: shrl $11, %ecx
2614; SSE2-SSSE3-NEXT: andl $1, %ecx
2615; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2616; SSE2-SSSE3-NEXT: movl %eax, %ecx
2617; SSE2-SSSE3-NEXT: shrl $10, %ecx
2618; SSE2-SSSE3-NEXT: andl $1, %ecx
2619; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2620; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
2621; SSE2-SSSE3-NEXT: movl %eax, %ecx
2622; SSE2-SSSE3-NEXT: shrl $9, %ecx
2623; SSE2-SSSE3-NEXT: andl $1, %ecx
2624; SSE2-SSSE3-NEXT: movd %ecx, %xmm5
2625; SSE2-SSSE3-NEXT: movl %eax, %ecx
2626; SSE2-SSSE3-NEXT: shrl $8, %ecx
2627; SSE2-SSSE3-NEXT: andl $1, %ecx
2628; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2629; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3],xmm1[4],xmm5[4],xmm1[5],xmm5[5],xmm1[6],xmm5[6],xmm1[7],xmm5[7]
2630; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
2631; SSE2-SSSE3-NEXT: movl %eax, %ecx
2632; SSE2-SSSE3-NEXT: shrl $13, %ecx
2633; SSE2-SSSE3-NEXT: andl $1, %ecx
2634; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2635; SSE2-SSSE3-NEXT: movl %eax, %ecx
2636; SSE2-SSSE3-NEXT: shrl $12, %ecx
2637; SSE2-SSSE3-NEXT: andl $1, %ecx
2638; SSE2-SSSE3-NEXT: movd %ecx, %xmm5
2639; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
2640; SSE2-SSSE3-NEXT: movl %eax, %ecx
2641; SSE2-SSSE3-NEXT: shrl $14, %ecx
2642; SSE2-SSSE3-NEXT: andl $1, %ecx
2643; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2644; SSE2-SSSE3-NEXT: shrl $15, %eax
2645; SSE2-SSSE3-NEXT: movzwl %ax, %eax
2646; SSE2-SSSE3-NEXT: movd %eax, %xmm6
2647; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1],xmm4[2],xmm6[2],xmm4[3],xmm6[3],xmm4[4],xmm6[4],xmm4[5],xmm6[5],xmm4[6],xmm6[6],xmm4[7],xmm6[7]
2648; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
2649; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
2650; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
2651; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2652; SSE2-SSSE3-NEXT: movl %eax, %ecx
2653; SSE2-SSSE3-NEXT: shrl $7, %ecx
2654; SSE2-SSSE3-NEXT: andl $1, %ecx
2655; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2656; SSE2-SSSE3-NEXT: movl %eax, %ecx
2657; SSE2-SSSE3-NEXT: shrl $6, %ecx
2658; SSE2-SSSE3-NEXT: andl $1, %ecx
2659; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2660; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
2661; SSE2-SSSE3-NEXT: movl %eax, %ecx
2662; SSE2-SSSE3-NEXT: shrl $5, %ecx
2663; SSE2-SSSE3-NEXT: andl $1, %ecx
2664; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2665; SSE2-SSSE3-NEXT: movl %eax, %ecx
2666; SSE2-SSSE3-NEXT: shrl $4, %ecx
2667; SSE2-SSSE3-NEXT: andl $1, %ecx
2668; SSE2-SSSE3-NEXT: movd %ecx, %xmm5
2669; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7]
2670; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
2671; SSE2-SSSE3-NEXT: movl %eax, %ecx
2672; SSE2-SSSE3-NEXT: shrl $3, %ecx
2673; SSE2-SSSE3-NEXT: andl $1, %ecx
2674; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2675; SSE2-SSSE3-NEXT: movl %eax, %ecx
2676; SSE2-SSSE3-NEXT: shrl $2, %ecx
2677; SSE2-SSSE3-NEXT: andl $1, %ecx
2678; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2679; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
2680; SSE2-SSSE3-NEXT: movl %eax, %ecx
2681; SSE2-SSSE3-NEXT: andl $1, %ecx
2682; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2683; SSE2-SSSE3-NEXT: movl %eax, %ecx
2684; SSE2-SSSE3-NEXT: shrl %ecx
2685; SSE2-SSSE3-NEXT: andl $1, %ecx
2686; SSE2-SSSE3-NEXT: movd %ecx, %xmm6
2687; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
2688; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
2689; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1]
2690; SSE2-SSSE3-NEXT: movl %eax, %ecx
2691; SSE2-SSSE3-NEXT: shrl $11, %ecx
2692; SSE2-SSSE3-NEXT: andl $1, %ecx
2693; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2694; SSE2-SSSE3-NEXT: movl %eax, %ecx
2695; SSE2-SSSE3-NEXT: shrl $10, %ecx
2696; SSE2-SSSE3-NEXT: andl $1, %ecx
2697; SSE2-SSSE3-NEXT: movd %ecx, %xmm5
2698; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
2699; SSE2-SSSE3-NEXT: movl %eax, %ecx
2700; SSE2-SSSE3-NEXT: shrl $9, %ecx
2701; SSE2-SSSE3-NEXT: andl $1, %ecx
2702; SSE2-SSSE3-NEXT: movd %ecx, %xmm6
2703; SSE2-SSSE3-NEXT: movl %eax, %ecx
2704; SSE2-SSSE3-NEXT: shrl $8, %ecx
2705; SSE2-SSSE3-NEXT: andl $1, %ecx
2706; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2707; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1],xmm4[2],xmm6[2],xmm4[3],xmm6[3],xmm4[4],xmm6[4],xmm4[5],xmm6[5],xmm4[6],xmm6[6],xmm4[7],xmm6[7]
2708; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
2709; SSE2-SSSE3-NEXT: movl %eax, %ecx
2710; SSE2-SSSE3-NEXT: shrl $13, %ecx
2711; SSE2-SSSE3-NEXT: andl $1, %ecx
2712; SSE2-SSSE3-NEXT: movd %ecx, %xmm5
2713; SSE2-SSSE3-NEXT: movl %eax, %ecx
2714; SSE2-SSSE3-NEXT: shrl $12, %ecx
2715; SSE2-SSSE3-NEXT: andl $1, %ecx
2716; SSE2-SSSE3-NEXT: movd %ecx, %xmm6
2717; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
2718; SSE2-SSSE3-NEXT: movl %eax, %ecx
2719; SSE2-SSSE3-NEXT: shrl $14, %ecx
2720; SSE2-SSSE3-NEXT: andl $1, %ecx
2721; SSE2-SSSE3-NEXT: movd %ecx, %xmm5
2722; SSE2-SSSE3-NEXT: shrl $15, %eax
2723; SSE2-SSSE3-NEXT: movzwl %ax, %eax
2724; SSE2-SSSE3-NEXT: movd %eax, %xmm7
2725; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1],xmm5[2],xmm7[2],xmm5[3],xmm7[3],xmm5[4],xmm7[4],xmm5[5],xmm7[5],xmm5[6],xmm7[6],xmm5[7],xmm7[7]
2726; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3]
2727; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1]
2728; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
2729; SSE2-SSSE3-NEXT: retq
2730;
2731; AVX1-LABEL: ext_i64_64i8:
2732; AVX1: # BB#0:
2733; AVX1-NEXT: pushq %rbp
2734; AVX1-NEXT: .Lcfi11:
2735; AVX1-NEXT: .cfi_def_cfa_offset 16
2736; AVX1-NEXT: .Lcfi12:
2737; AVX1-NEXT: .cfi_offset %rbp, -16
2738; AVX1-NEXT: movq %rsp, %rbp
2739; AVX1-NEXT: .Lcfi13:
2740; AVX1-NEXT: .cfi_def_cfa_register %rbp
2741; AVX1-NEXT: andq $-32, %rsp
2742; AVX1-NEXT: subq $64, %rsp
2743; AVX1-NEXT: movl %edi, %eax
2744; AVX1-NEXT: shrl $17, %eax
2745; AVX1-NEXT: andl $1, %eax
2746; AVX1-NEXT: movl %edi, %ecx
2747; AVX1-NEXT: shrl $16, %ecx
2748; AVX1-NEXT: andl $1, %ecx
2749; AVX1-NEXT: vmovd %ecx, %xmm0
2750; AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
2751; AVX1-NEXT: movl %edi, %eax
2752; AVX1-NEXT: shrl $18, %eax
2753; AVX1-NEXT: andl $1, %eax
2754; AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
2755; AVX1-NEXT: movl %edi, %eax
2756; AVX1-NEXT: shrl $19, %eax
2757; AVX1-NEXT: andl $1, %eax
2758; AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
2759; AVX1-NEXT: movl %edi, %eax
2760; AVX1-NEXT: shrl $20, %eax
2761; AVX1-NEXT: andl $1, %eax
2762; AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
2763; AVX1-NEXT: movl %edi, %eax
2764; AVX1-NEXT: shrl $21, %eax
2765; AVX1-NEXT: andl $1, %eax
2766; AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
2767; AVX1-NEXT: movl %edi, %eax
2768; AVX1-NEXT: shrl $22, %eax
2769; AVX1-NEXT: andl $1, %eax
2770; AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
2771; AVX1-NEXT: movl %edi, %eax
2772; AVX1-NEXT: shrl $23, %eax
2773; AVX1-NEXT: andl $1, %eax
2774; AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
2775; AVX1-NEXT: movl %edi, %eax
2776; AVX1-NEXT: shrl $24, %eax
2777; AVX1-NEXT: andl $1, %eax
2778; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
2779; AVX1-NEXT: movl %edi, %eax
2780; AVX1-NEXT: shrl $25, %eax
2781; AVX1-NEXT: andl $1, %eax
2782; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
2783; AVX1-NEXT: movl %edi, %eax
2784; AVX1-NEXT: shrl $26, %eax
2785; AVX1-NEXT: andl $1, %eax
2786; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
2787; AVX1-NEXT: movl %edi, %eax
2788; AVX1-NEXT: shrl $27, %eax
2789; AVX1-NEXT: andl $1, %eax
2790; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
2791; AVX1-NEXT: movl %edi, %eax
2792; AVX1-NEXT: shrl $28, %eax
2793; AVX1-NEXT: andl $1, %eax
2794; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
2795; AVX1-NEXT: movl %edi, %eax
2796; AVX1-NEXT: shrl $29, %eax
2797; AVX1-NEXT: andl $1, %eax
2798; AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
2799; AVX1-NEXT: movl %edi, %eax
2800; AVX1-NEXT: shrl $30, %eax
2801; AVX1-NEXT: andl $1, %eax
2802; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
2803; AVX1-NEXT: movl %edi, %eax
2804; AVX1-NEXT: shrl $31, %eax
2805; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
2806; AVX1-NEXT: movl %edi, %eax
2807; AVX1-NEXT: shrl %eax
2808; AVX1-NEXT: andl $1, %eax
2809; AVX1-NEXT: movl %edi, %ecx
2810; AVX1-NEXT: andl $1, %ecx
2811; AVX1-NEXT: vmovd %ecx, %xmm1
2812; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
2813; AVX1-NEXT: movl %edi, %eax
2814; AVX1-NEXT: shrl $2, %eax
2815; AVX1-NEXT: andl $1, %eax
2816; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
2817; AVX1-NEXT: movl %edi, %eax
2818; AVX1-NEXT: shrl $3, %eax
2819; AVX1-NEXT: andl $1, %eax
2820; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
2821; AVX1-NEXT: movl %edi, %eax
2822; AVX1-NEXT: shrl $4, %eax
2823; AVX1-NEXT: andl $1, %eax
2824; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
2825; AVX1-NEXT: movl %edi, %eax
2826; AVX1-NEXT: shrl $5, %eax
2827; AVX1-NEXT: andl $1, %eax
2828; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
2829; AVX1-NEXT: movl %edi, %eax
2830; AVX1-NEXT: shrl $6, %eax
2831; AVX1-NEXT: andl $1, %eax
2832; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
2833; AVX1-NEXT: movl %edi, %eax
2834; AVX1-NEXT: shrl $7, %eax
2835; AVX1-NEXT: andl $1, %eax
2836; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
2837; AVX1-NEXT: movl %edi, %eax
2838; AVX1-NEXT: shrl $8, %eax
2839; AVX1-NEXT: andl $1, %eax
2840; AVX1-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
2841; AVX1-NEXT: movl %edi, %eax
2842; AVX1-NEXT: shrl $9, %eax
2843; AVX1-NEXT: andl $1, %eax
2844; AVX1-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
2845; AVX1-NEXT: movl %edi, %eax
2846; AVX1-NEXT: shrl $10, %eax
2847; AVX1-NEXT: andl $1, %eax
2848; AVX1-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
2849; AVX1-NEXT: movl %edi, %eax
2850; AVX1-NEXT: shrl $11, %eax
2851; AVX1-NEXT: andl $1, %eax
2852; AVX1-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
2853; AVX1-NEXT: movl %edi, %eax
2854; AVX1-NEXT: shrl $12, %eax
2855; AVX1-NEXT: andl $1, %eax
2856; AVX1-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
2857; AVX1-NEXT: movl %edi, %eax
2858; AVX1-NEXT: shrl $13, %eax
2859; AVX1-NEXT: andl $1, %eax
2860; AVX1-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
2861; AVX1-NEXT: movl %edi, %eax
2862; AVX1-NEXT: shrl $14, %eax
2863; AVX1-NEXT: andl $1, %eax
2864; AVX1-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
2865; AVX1-NEXT: movl %edi, %eax
2866; AVX1-NEXT: shrl $15, %eax
2867; AVX1-NEXT: andl $1, %eax
2868; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
2869; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2870; AVX1-NEXT: movq %rdi, %rax
2871; AVX1-NEXT: shrq $49, %rax
2872; AVX1-NEXT: andl $1, %eax
2873; AVX1-NEXT: movq %rdi, %rcx
2874; AVX1-NEXT: shrq $48, %rcx
2875; AVX1-NEXT: andl $1, %ecx
2876; AVX1-NEXT: vmovd %ecx, %xmm1
2877; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
2878; AVX1-NEXT: movq %rdi, %rax
2879; AVX1-NEXT: shrq $50, %rax
2880; AVX1-NEXT: andl $1, %eax
2881; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
2882; AVX1-NEXT: movq %rdi, %rax
2883; AVX1-NEXT: shrq $51, %rax
2884; AVX1-NEXT: andl $1, %eax
2885; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
2886; AVX1-NEXT: movq %rdi, %rax
2887; AVX1-NEXT: shrq $52, %rax
2888; AVX1-NEXT: andl $1, %eax
2889; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
2890; AVX1-NEXT: movq %rdi, %rax
2891; AVX1-NEXT: shrq $53, %rax
2892; AVX1-NEXT: andl $1, %eax
2893; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
2894; AVX1-NEXT: movq %rdi, %rax
2895; AVX1-NEXT: shrq $54, %rax
2896; AVX1-NEXT: andl $1, %eax
2897; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
2898; AVX1-NEXT: movq %rdi, %rax
2899; AVX1-NEXT: shrq $55, %rax
2900; AVX1-NEXT: andl $1, %eax
2901; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
2902; AVX1-NEXT: movq %rdi, %rax
2903; AVX1-NEXT: shrq $56, %rax
2904; AVX1-NEXT: andl $1, %eax
2905; AVX1-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
2906; AVX1-NEXT: movq %rdi, %rax
2907; AVX1-NEXT: shrq $57, %rax
2908; AVX1-NEXT: andl $1, %eax
2909; AVX1-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
2910; AVX1-NEXT: movq %rdi, %rax
2911; AVX1-NEXT: shrq $58, %rax
2912; AVX1-NEXT: andl $1, %eax
2913; AVX1-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
2914; AVX1-NEXT: movq %rdi, %rax
2915; AVX1-NEXT: shrq $59, %rax
2916; AVX1-NEXT: andl $1, %eax
2917; AVX1-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
2918; AVX1-NEXT: movq %rdi, %rax
2919; AVX1-NEXT: shrq $60, %rax
2920; AVX1-NEXT: andl $1, %eax
2921; AVX1-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
2922; AVX1-NEXT: movq %rdi, %rax
2923; AVX1-NEXT: shrq $61, %rax
2924; AVX1-NEXT: andl $1, %eax
2925; AVX1-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
2926; AVX1-NEXT: movq %rdi, %rax
2927; AVX1-NEXT: shrq $62, %rax
2928; AVX1-NEXT: andl $1, %eax
2929; AVX1-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
2930; AVX1-NEXT: movq %rdi, %rax
2931; AVX1-NEXT: shrq $63, %rax
2932; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
2933; AVX1-NEXT: movq %rdi, %rax
2934; AVX1-NEXT: shrq $33, %rax
2935; AVX1-NEXT: andl $1, %eax
2936; AVX1-NEXT: movq %rdi, %rcx
2937; AVX1-NEXT: shrq $32, %rcx
2938; AVX1-NEXT: andl $1, %ecx
2939; AVX1-NEXT: vmovd %ecx, %xmm2
2940; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
2941; AVX1-NEXT: movq %rdi, %rax
2942; AVX1-NEXT: shrq $34, %rax
2943; AVX1-NEXT: andl $1, %eax
2944; AVX1-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
2945; AVX1-NEXT: movq %rdi, %rax
2946; AVX1-NEXT: shrq $35, %rax
2947; AVX1-NEXT: andl $1, %eax
2948; AVX1-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
2949; AVX1-NEXT: movq %rdi, %rax
2950; AVX1-NEXT: shrq $36, %rax
2951; AVX1-NEXT: andl $1, %eax
2952; AVX1-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
2953; AVX1-NEXT: movq %rdi, %rax
2954; AVX1-NEXT: shrq $37, %rax
2955; AVX1-NEXT: andl $1, %eax
2956; AVX1-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
2957; AVX1-NEXT: movq %rdi, %rax
2958; AVX1-NEXT: shrq $38, %rax
2959; AVX1-NEXT: andl $1, %eax
2960; AVX1-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
2961; AVX1-NEXT: movq %rdi, %rax
2962; AVX1-NEXT: shrq $39, %rax
2963; AVX1-NEXT: andl $1, %eax
2964; AVX1-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
2965; AVX1-NEXT: movq %rdi, %rax
2966; AVX1-NEXT: shrq $40, %rax
2967; AVX1-NEXT: andl $1, %eax
2968; AVX1-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
2969; AVX1-NEXT: movq %rdi, %rax
2970; AVX1-NEXT: shrq $41, %rax
2971; AVX1-NEXT: andl $1, %eax
2972; AVX1-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
2973; AVX1-NEXT: movq %rdi, %rax
2974; AVX1-NEXT: shrq $42, %rax
2975; AVX1-NEXT: andl $1, %eax
2976; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
2977; AVX1-NEXT: movq %rdi, %rax
2978; AVX1-NEXT: shrq $43, %rax
2979; AVX1-NEXT: andl $1, %eax
2980; AVX1-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
2981; AVX1-NEXT: movq %rdi, %rax
2982; AVX1-NEXT: shrq $44, %rax
2983; AVX1-NEXT: andl $1, %eax
2984; AVX1-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
2985; AVX1-NEXT: movq %rdi, %rax
2986; AVX1-NEXT: shrq $45, %rax
2987; AVX1-NEXT: andl $1, %eax
2988; AVX1-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
2989; AVX1-NEXT: movq %rdi, %rax
2990; AVX1-NEXT: shrq $46, %rax
2991; AVX1-NEXT: andl $1, %eax
2992; AVX1-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
2993; AVX1-NEXT: shrq $47, %rdi
2994; AVX1-NEXT: andl $1, %edi
2995; AVX1-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2
2996; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2997; AVX1-NEXT: movq %rbp, %rsp
2998; AVX1-NEXT: popq %rbp
2999; AVX1-NEXT: retq
3000;
3001; AVX2-LABEL: ext_i64_64i8:
3002; AVX2: # BB#0:
3003; AVX2-NEXT: pushq %rbp
3004; AVX2-NEXT: .Lcfi11:
3005; AVX2-NEXT: .cfi_def_cfa_offset 16
3006; AVX2-NEXT: .Lcfi12:
3007; AVX2-NEXT: .cfi_offset %rbp, -16
3008; AVX2-NEXT: movq %rsp, %rbp
3009; AVX2-NEXT: .Lcfi13:
3010; AVX2-NEXT: .cfi_def_cfa_register %rbp
3011; AVX2-NEXT: andq $-32, %rsp
3012; AVX2-NEXT: subq $64, %rsp
3013; AVX2-NEXT: movl %edi, %eax
3014; AVX2-NEXT: shrl $17, %eax
3015; AVX2-NEXT: andl $1, %eax
3016; AVX2-NEXT: movl %edi, %ecx
3017; AVX2-NEXT: shrl $16, %ecx
3018; AVX2-NEXT: andl $1, %ecx
3019; AVX2-NEXT: vmovd %ecx, %xmm0
3020; AVX2-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
3021; AVX2-NEXT: movl %edi, %eax
3022; AVX2-NEXT: shrl $18, %eax
3023; AVX2-NEXT: andl $1, %eax
3024; AVX2-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
3025; AVX2-NEXT: movl %edi, %eax
3026; AVX2-NEXT: shrl $19, %eax
3027; AVX2-NEXT: andl $1, %eax
3028; AVX2-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
3029; AVX2-NEXT: movl %edi, %eax
3030; AVX2-NEXT: shrl $20, %eax
3031; AVX2-NEXT: andl $1, %eax
3032; AVX2-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
3033; AVX2-NEXT: movl %edi, %eax
3034; AVX2-NEXT: shrl $21, %eax
3035; AVX2-NEXT: andl $1, %eax
3036; AVX2-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
3037; AVX2-NEXT: movl %edi, %eax
3038; AVX2-NEXT: shrl $22, %eax
3039; AVX2-NEXT: andl $1, %eax
3040; AVX2-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
3041; AVX2-NEXT: movl %edi, %eax
3042; AVX2-NEXT: shrl $23, %eax
3043; AVX2-NEXT: andl $1, %eax
3044; AVX2-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
3045; AVX2-NEXT: movl %edi, %eax
3046; AVX2-NEXT: shrl $24, %eax
3047; AVX2-NEXT: andl $1, %eax
3048; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
3049; AVX2-NEXT: movl %edi, %eax
3050; AVX2-NEXT: shrl $25, %eax
3051; AVX2-NEXT: andl $1, %eax
3052; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
3053; AVX2-NEXT: movl %edi, %eax
3054; AVX2-NEXT: shrl $26, %eax
3055; AVX2-NEXT: andl $1, %eax
3056; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
3057; AVX2-NEXT: movl %edi, %eax
3058; AVX2-NEXT: shrl $27, %eax
3059; AVX2-NEXT: andl $1, %eax
3060; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
3061; AVX2-NEXT: movl %edi, %eax
3062; AVX2-NEXT: shrl $28, %eax
3063; AVX2-NEXT: andl $1, %eax
3064; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
3065; AVX2-NEXT: movl %edi, %eax
3066; AVX2-NEXT: shrl $29, %eax
3067; AVX2-NEXT: andl $1, %eax
3068; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
3069; AVX2-NEXT: movl %edi, %eax
3070; AVX2-NEXT: shrl $30, %eax
3071; AVX2-NEXT: andl $1, %eax
3072; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
3073; AVX2-NEXT: movl %edi, %eax
3074; AVX2-NEXT: shrl $31, %eax
3075; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
3076; AVX2-NEXT: movl %edi, %eax
3077; AVX2-NEXT: shrl %eax
3078; AVX2-NEXT: andl $1, %eax
3079; AVX2-NEXT: movl %edi, %ecx
3080; AVX2-NEXT: andl $1, %ecx
3081; AVX2-NEXT: vmovd %ecx, %xmm1
3082; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
3083; AVX2-NEXT: movl %edi, %eax
3084; AVX2-NEXT: shrl $2, %eax
3085; AVX2-NEXT: andl $1, %eax
3086; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
3087; AVX2-NEXT: movl %edi, %eax
3088; AVX2-NEXT: shrl $3, %eax
3089; AVX2-NEXT: andl $1, %eax
3090; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
3091; AVX2-NEXT: movl %edi, %eax
3092; AVX2-NEXT: shrl $4, %eax
3093; AVX2-NEXT: andl $1, %eax
3094; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
3095; AVX2-NEXT: movl %edi, %eax
3096; AVX2-NEXT: shrl $5, %eax
3097; AVX2-NEXT: andl $1, %eax
3098; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
3099; AVX2-NEXT: movl %edi, %eax
3100; AVX2-NEXT: shrl $6, %eax
3101; AVX2-NEXT: andl $1, %eax
3102; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
3103; AVX2-NEXT: movl %edi, %eax
3104; AVX2-NEXT: shrl $7, %eax
3105; AVX2-NEXT: andl $1, %eax
3106; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
3107; AVX2-NEXT: movl %edi, %eax
3108; AVX2-NEXT: shrl $8, %eax
3109; AVX2-NEXT: andl $1, %eax
3110; AVX2-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
3111; AVX2-NEXT: movl %edi, %eax
3112; AVX2-NEXT: shrl $9, %eax
3113; AVX2-NEXT: andl $1, %eax
3114; AVX2-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
3115; AVX2-NEXT: movl %edi, %eax
3116; AVX2-NEXT: shrl $10, %eax
3117; AVX2-NEXT: andl $1, %eax
3118; AVX2-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
3119; AVX2-NEXT: movl %edi, %eax
3120; AVX2-NEXT: shrl $11, %eax
3121; AVX2-NEXT: andl $1, %eax
3122; AVX2-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
3123; AVX2-NEXT: movl %edi, %eax
3124; AVX2-NEXT: shrl $12, %eax
3125; AVX2-NEXT: andl $1, %eax
3126; AVX2-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
3127; AVX2-NEXT: movl %edi, %eax
3128; AVX2-NEXT: shrl $13, %eax
3129; AVX2-NEXT: andl $1, %eax
3130; AVX2-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
3131; AVX2-NEXT: movl %edi, %eax
3132; AVX2-NEXT: shrl $14, %eax
3133; AVX2-NEXT: andl $1, %eax
3134; AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
3135; AVX2-NEXT: movl %edi, %eax
3136; AVX2-NEXT: shrl $15, %eax
3137; AVX2-NEXT: andl $1, %eax
3138; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
3139; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
3140; AVX2-NEXT: movq %rdi, %rax
3141; AVX2-NEXT: shrq $49, %rax
3142; AVX2-NEXT: andl $1, %eax
3143; AVX2-NEXT: movq %rdi, %rcx
3144; AVX2-NEXT: shrq $48, %rcx
3145; AVX2-NEXT: andl $1, %ecx
3146; AVX2-NEXT: vmovd %ecx, %xmm1
3147; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
3148; AVX2-NEXT: movq %rdi, %rax
3149; AVX2-NEXT: shrq $50, %rax
3150; AVX2-NEXT: andl $1, %eax
3151; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
3152; AVX2-NEXT: movq %rdi, %rax
3153; AVX2-NEXT: shrq $51, %rax
3154; AVX2-NEXT: andl $1, %eax
3155; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
3156; AVX2-NEXT: movq %rdi, %rax
3157; AVX2-NEXT: shrq $52, %rax
3158; AVX2-NEXT: andl $1, %eax
3159; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
3160; AVX2-NEXT: movq %rdi, %rax
3161; AVX2-NEXT: shrq $53, %rax
3162; AVX2-NEXT: andl $1, %eax
3163; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
3164; AVX2-NEXT: movq %rdi, %rax
3165; AVX2-NEXT: shrq $54, %rax
3166; AVX2-NEXT: andl $1, %eax
3167; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
3168; AVX2-NEXT: movq %rdi, %rax
3169; AVX2-NEXT: shrq $55, %rax
3170; AVX2-NEXT: andl $1, %eax
3171; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
3172; AVX2-NEXT: movq %rdi, %rax
3173; AVX2-NEXT: shrq $56, %rax
3174; AVX2-NEXT: andl $1, %eax
3175; AVX2-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
3176; AVX2-NEXT: movq %rdi, %rax
3177; AVX2-NEXT: shrq $57, %rax
3178; AVX2-NEXT: andl $1, %eax
3179; AVX2-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
3180; AVX2-NEXT: movq %rdi, %rax
3181; AVX2-NEXT: shrq $58, %rax
3182; AVX2-NEXT: andl $1, %eax
3183; AVX2-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
3184; AVX2-NEXT: movq %rdi, %rax
3185; AVX2-NEXT: shrq $59, %rax
3186; AVX2-NEXT: andl $1, %eax
3187; AVX2-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
3188; AVX2-NEXT: movq %rdi, %rax
3189; AVX2-NEXT: shrq $60, %rax
3190; AVX2-NEXT: andl $1, %eax
3191; AVX2-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
3192; AVX2-NEXT: movq %rdi, %rax
3193; AVX2-NEXT: shrq $61, %rax
3194; AVX2-NEXT: andl $1, %eax
3195; AVX2-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
3196; AVX2-NEXT: movq %rdi, %rax
3197; AVX2-NEXT: shrq $62, %rax
3198; AVX2-NEXT: andl $1, %eax
3199; AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
3200; AVX2-NEXT: movq %rdi, %rax
3201; AVX2-NEXT: shrq $63, %rax
3202; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
3203; AVX2-NEXT: movq %rdi, %rax
3204; AVX2-NEXT: shrq $33, %rax
3205; AVX2-NEXT: andl $1, %eax
3206; AVX2-NEXT: movq %rdi, %rcx
3207; AVX2-NEXT: shrq $32, %rcx
3208; AVX2-NEXT: andl $1, %ecx
3209; AVX2-NEXT: vmovd %ecx, %xmm2
3210; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
3211; AVX2-NEXT: movq %rdi, %rax
3212; AVX2-NEXT: shrq $34, %rax
3213; AVX2-NEXT: andl $1, %eax
3214; AVX2-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
3215; AVX2-NEXT: movq %rdi, %rax
3216; AVX2-NEXT: shrq $35, %rax
3217; AVX2-NEXT: andl $1, %eax
3218; AVX2-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
3219; AVX2-NEXT: movq %rdi, %rax
3220; AVX2-NEXT: shrq $36, %rax
3221; AVX2-NEXT: andl $1, %eax
3222; AVX2-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
3223; AVX2-NEXT: movq %rdi, %rax
3224; AVX2-NEXT: shrq $37, %rax
3225; AVX2-NEXT: andl $1, %eax
3226; AVX2-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
3227; AVX2-NEXT: movq %rdi, %rax
3228; AVX2-NEXT: shrq $38, %rax
3229; AVX2-NEXT: andl $1, %eax
3230; AVX2-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
3231; AVX2-NEXT: movq %rdi, %rax
3232; AVX2-NEXT: shrq $39, %rax
3233; AVX2-NEXT: andl $1, %eax
3234; AVX2-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
3235; AVX2-NEXT: movq %rdi, %rax
3236; AVX2-NEXT: shrq $40, %rax
3237; AVX2-NEXT: andl $1, %eax
3238; AVX2-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
3239; AVX2-NEXT: movq %rdi, %rax
3240; AVX2-NEXT: shrq $41, %rax
3241; AVX2-NEXT: andl $1, %eax
3242; AVX2-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
3243; AVX2-NEXT: movq %rdi, %rax
3244; AVX2-NEXT: shrq $42, %rax
3245; AVX2-NEXT: andl $1, %eax
3246; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
3247; AVX2-NEXT: movq %rdi, %rax
3248; AVX2-NEXT: shrq $43, %rax
3249; AVX2-NEXT: andl $1, %eax
3250; AVX2-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
3251; AVX2-NEXT: movq %rdi, %rax
3252; AVX2-NEXT: shrq $44, %rax
3253; AVX2-NEXT: andl $1, %eax
3254; AVX2-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
3255; AVX2-NEXT: movq %rdi, %rax
3256; AVX2-NEXT: shrq $45, %rax
3257; AVX2-NEXT: andl $1, %eax
3258; AVX2-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
3259; AVX2-NEXT: movq %rdi, %rax
3260; AVX2-NEXT: shrq $46, %rax
3261; AVX2-NEXT: andl $1, %eax
3262; AVX2-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
3263; AVX2-NEXT: shrq $47, %rdi
3264; AVX2-NEXT: andl $1, %edi
3265; AVX2-NEXT: vpinsrb $15, %edi, %xmm2, %xmm2
3266; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
3267; AVX2-NEXT: movq %rbp, %rsp
3268; AVX2-NEXT: popq %rbp
3269; AVX2-NEXT: retq
3270;
3271; AVX512-LABEL: ext_i64_64i8:
3272; AVX512: # BB#0:
3273; AVX512-NEXT: kmovq %rdi, %k1
3274; AVX512-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
3275; AVX512-NEXT: retq
3276 %1 = bitcast i64 %a0 to <64 x i1>
3277 %2 = zext <64 x i1> %1 to <64 x i8>
3278 ret <64 x i8> %2
3279}