blob: 9b6401d1a76c98179fe8764e5a575a63295dc1e5 [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8;
9; 128-bit vectors
10;
11
12define <2 x i64> @ext_i2_2i64(i2 %a0) {
13; SSE2-SSSE3-LABEL: ext_i2_2i64:
14; SSE2-SSSE3: # BB#0:
15; SSE2-SSSE3-NEXT: andb $3, %dil
16; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
17; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
18; SSE2-SSSE3-NEXT: movq %rax, %rcx
19; SSE2-SSSE3-NEXT: shlq $62, %rcx
20; SSE2-SSSE3-NEXT: sarq $63, %rcx
21; SSE2-SSSE3-NEXT: movq %rcx, %xmm1
22; SSE2-SSSE3-NEXT: shlq $63, %rax
23; SSE2-SSSE3-NEXT: sarq $63, %rax
24; SSE2-SSSE3-NEXT: movq %rax, %xmm0
25; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
26; SSE2-SSSE3-NEXT: retq
27;
28; AVX12-LABEL: ext_i2_2i64:
29; AVX12: # BB#0:
30; AVX12-NEXT: andb $3, %dil
31; AVX12-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
32; AVX12-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
33; AVX12-NEXT: movq %rax, %rcx
34; AVX12-NEXT: shlq $62, %rcx
35; AVX12-NEXT: sarq $63, %rcx
36; AVX12-NEXT: vmovq %rcx, %xmm0
37; AVX12-NEXT: shlq $63, %rax
38; AVX12-NEXT: sarq $63, %rax
39; AVX12-NEXT: vmovq %rax, %xmm1
40; AVX12-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
41; AVX12-NEXT: retq
42;
43; AVX512-LABEL: ext_i2_2i64:
44; AVX512: # BB#0:
45; AVX512-NEXT: andb $3, %dil
46; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
47; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
48; AVX512-NEXT: kmovd %eax, %k1
49; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
50; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
51; AVX512-NEXT: vzeroupper
52; AVX512-NEXT: retq
53 %1 = bitcast i2 %a0 to <2 x i1>
54 %2 = sext <2 x i1> %1 to <2 x i64>
55 ret <2 x i64> %2
56}
57
58define <4 x i32> @ext_i4_4i32(i4 %a0) {
59; SSE2-SSSE3-LABEL: ext_i4_4i32:
60; SSE2-SSSE3: # BB#0:
61; SSE2-SSSE3-NEXT: andb $15, %dil
62; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
63; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
64; SSE2-SSSE3-NEXT: movq %rax, %rcx
65; SSE2-SSSE3-NEXT: shlq $60, %rcx
66; SSE2-SSSE3-NEXT: sarq $63, %rcx
67; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
68; SSE2-SSSE3-NEXT: movq %rax, %rcx
69; SSE2-SSSE3-NEXT: shlq $61, %rcx
70; SSE2-SSSE3-NEXT: sarq $63, %rcx
71; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
72; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
73; SSE2-SSSE3-NEXT: movq %rax, %rcx
74; SSE2-SSSE3-NEXT: shlq $62, %rcx
75; SSE2-SSSE3-NEXT: sarq $63, %rcx
76; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
77; SSE2-SSSE3-NEXT: shlq $63, %rax
78; SSE2-SSSE3-NEXT: sarq $63, %rax
79; SSE2-SSSE3-NEXT: movd %eax, %xmm0
80; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
81; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
82; SSE2-SSSE3-NEXT: retq
83;
84; AVX12-LABEL: ext_i4_4i32:
85; AVX12: # BB#0:
86; AVX12-NEXT: andb $15, %dil
87; AVX12-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
88; AVX12-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
89; AVX12-NEXT: movq %rax, %rcx
90; AVX12-NEXT: shlq $62, %rcx
91; AVX12-NEXT: sarq $63, %rcx
92; AVX12-NEXT: movq %rax, %rdx
93; AVX12-NEXT: shlq $63, %rdx
94; AVX12-NEXT: sarq $63, %rdx
95; AVX12-NEXT: vmovd %edx, %xmm0
96; AVX12-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
97; AVX12-NEXT: movq %rax, %rcx
98; AVX12-NEXT: shlq $61, %rcx
99; AVX12-NEXT: sarq $63, %rcx
100; AVX12-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
101; AVX12-NEXT: shlq $60, %rax
102; AVX12-NEXT: sarq $63, %rax
103; AVX12-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
104; AVX12-NEXT: retq
105;
106; AVX512-LABEL: ext_i4_4i32:
107; AVX512: # BB#0:
108; AVX512-NEXT: andb $15, %dil
109; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
110; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
111; AVX512-NEXT: kmovd %eax, %k1
112; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
113; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
114; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
115; AVX512-NEXT: vzeroupper
116; AVX512-NEXT: retq
117 %1 = bitcast i4 %a0 to <4 x i1>
118 %2 = sext <4 x i1> %1 to <4 x i32>
119 ret <4 x i32> %2
120}
121
122define <8 x i16> @ext_i8_8i16(i8 %a0) {
123; SSE2-SSSE3-LABEL: ext_i8_8i16:
124; SSE2-SSSE3: # BB#0:
125; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
126; SSE2-SSSE3-NEXT: movsbq -{{[0-9]+}}(%rsp), %rax
127; SSE2-SSSE3-NEXT: movq %rax, %rcx
128; SSE2-SSSE3-NEXT: shrq $7, %rcx
129; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
130; SSE2-SSSE3-NEXT: movq %rax, %rcx
131; SSE2-SSSE3-NEXT: shlq $57, %rcx
132; SSE2-SSSE3-NEXT: sarq $63, %rcx
133; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
134; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
135; SSE2-SSSE3-NEXT: movq %rax, %rcx
136; SSE2-SSSE3-NEXT: shlq $58, %rcx
137; SSE2-SSSE3-NEXT: sarq $63, %rcx
138; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
139; SSE2-SSSE3-NEXT: movq %rax, %rcx
140; SSE2-SSSE3-NEXT: shlq $59, %rcx
141; SSE2-SSSE3-NEXT: sarq $63, %rcx
142; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
143; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
144; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
145; SSE2-SSSE3-NEXT: movq %rax, %rcx
146; SSE2-SSSE3-NEXT: shlq $60, %rcx
147; SSE2-SSSE3-NEXT: sarq $63, %rcx
148; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
149; SSE2-SSSE3-NEXT: movq %rax, %rcx
150; SSE2-SSSE3-NEXT: shlq $61, %rcx
151; SSE2-SSSE3-NEXT: sarq $63, %rcx
152; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
153; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
154; SSE2-SSSE3-NEXT: movq %rax, %rcx
155; SSE2-SSSE3-NEXT: shlq $62, %rcx
156; SSE2-SSSE3-NEXT: sarq $63, %rcx
157; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
158; SSE2-SSSE3-NEXT: shlq $63, %rax
159; SSE2-SSSE3-NEXT: sarq $63, %rax
160; SSE2-SSSE3-NEXT: movd %eax, %xmm0
161; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
162; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
163; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
164; SSE2-SSSE3-NEXT: retq
165;
166; AVX12-LABEL: ext_i8_8i16:
167; AVX12: # BB#0:
168; AVX12-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
169; AVX12-NEXT: movsbq -{{[0-9]+}}(%rsp), %rax
170; AVX12-NEXT: movq %rax, %rcx
171; AVX12-NEXT: shlq $62, %rcx
172; AVX12-NEXT: sarq $63, %rcx
173; AVX12-NEXT: movq %rax, %rdx
174; AVX12-NEXT: shlq $63, %rdx
175; AVX12-NEXT: sarq $63, %rdx
176; AVX12-NEXT: vmovd %edx, %xmm0
177; AVX12-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
178; AVX12-NEXT: movq %rax, %rcx
179; AVX12-NEXT: shlq $61, %rcx
180; AVX12-NEXT: sarq $63, %rcx
181; AVX12-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0
182; AVX12-NEXT: movq %rax, %rcx
183; AVX12-NEXT: shlq $60, %rcx
184; AVX12-NEXT: sarq $63, %rcx
185; AVX12-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
186; AVX12-NEXT: movq %rax, %rcx
187; AVX12-NEXT: shlq $59, %rcx
188; AVX12-NEXT: sarq $63, %rcx
189; AVX12-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
190; AVX12-NEXT: movq %rax, %rcx
191; AVX12-NEXT: shlq $58, %rcx
192; AVX12-NEXT: sarq $63, %rcx
193; AVX12-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
194; AVX12-NEXT: movq %rax, %rcx
195; AVX12-NEXT: shlq $57, %rcx
196; AVX12-NEXT: sarq $63, %rcx
197; AVX12-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
198; AVX12-NEXT: shrq $7, %rax
199; AVX12-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
200; AVX12-NEXT: retq
201;
202; AVX512-LABEL: ext_i8_8i16:
203; AVX512: # BB#0:
204; AVX512-NEXT: kmovd %edi, %k0
205; AVX512-NEXT: vpmovm2w %k0, %xmm0
206; AVX512-NEXT: retq
207 %1 = bitcast i8 %a0 to <8 x i1>
208 %2 = sext <8 x i1> %1 to <8 x i16>
209 ret <8 x i16> %2
210}
211
212define <16 x i8> @ext_i16_16i8(i16 %a0) {
213; SSE2-SSSE3-LABEL: ext_i16_16i8:
214; SSE2-SSSE3: # BB#0:
215; SSE2-SSSE3-NEXT: pushq %rbp
216; SSE2-SSSE3-NEXT: .Lcfi0:
217; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 16
218; SSE2-SSSE3-NEXT: pushq %r15
219; SSE2-SSSE3-NEXT: .Lcfi1:
220; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 24
221; SSE2-SSSE3-NEXT: pushq %r14
222; SSE2-SSSE3-NEXT: .Lcfi2:
223; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 32
224; SSE2-SSSE3-NEXT: pushq %r13
225; SSE2-SSSE3-NEXT: .Lcfi3:
226; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 40
227; SSE2-SSSE3-NEXT: pushq %r12
228; SSE2-SSSE3-NEXT: .Lcfi4:
229; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 48
230; SSE2-SSSE3-NEXT: pushq %rbx
231; SSE2-SSSE3-NEXT: .Lcfi5:
232; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 56
233; SSE2-SSSE3-NEXT: .Lcfi6:
234; SSE2-SSSE3-NEXT: .cfi_offset %rbx, -56
235; SSE2-SSSE3-NEXT: .Lcfi7:
236; SSE2-SSSE3-NEXT: .cfi_offset %r12, -48
237; SSE2-SSSE3-NEXT: .Lcfi8:
238; SSE2-SSSE3-NEXT: .cfi_offset %r13, -40
239; SSE2-SSSE3-NEXT: .Lcfi9:
240; SSE2-SSSE3-NEXT: .cfi_offset %r14, -32
241; SSE2-SSSE3-NEXT: .Lcfi10:
242; SSE2-SSSE3-NEXT: .cfi_offset %r15, -24
243; SSE2-SSSE3-NEXT: .Lcfi11:
244; SSE2-SSSE3-NEXT: .cfi_offset %rbp, -16
245; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
246; SSE2-SSSE3-NEXT: movswq -{{[0-9]+}}(%rsp), %rax
247; SSE2-SSSE3-NEXT: movq %rax, %r8
248; SSE2-SSSE3-NEXT: movq %rax, %r9
249; SSE2-SSSE3-NEXT: movq %rax, %r10
250; SSE2-SSSE3-NEXT: movq %rax, %r11
251; SSE2-SSSE3-NEXT: movq %rax, %r14
252; SSE2-SSSE3-NEXT: movq %rax, %r15
253; SSE2-SSSE3-NEXT: movq %rax, %r12
254; SSE2-SSSE3-NEXT: movq %rax, %r13
255; SSE2-SSSE3-NEXT: movq %rax, %rbx
256; SSE2-SSSE3-NEXT: movq %rax, %rcx
257; SSE2-SSSE3-NEXT: movq %rax, %rdx
258; SSE2-SSSE3-NEXT: movq %rax, %rsi
259; SSE2-SSSE3-NEXT: movq %rax, %rdi
260; SSE2-SSSE3-NEXT: movq %rax, %rbp
261; SSE2-SSSE3-NEXT: shrq $15, %rbp
262; SSE2-SSSE3-NEXT: movd %ebp, %xmm0
263; SSE2-SSSE3-NEXT: movq %rax, %rbp
264; SSE2-SSSE3-NEXT: movsbq %al, %rax
265; SSE2-SSSE3-NEXT: shlq $49, %r8
266; SSE2-SSSE3-NEXT: sarq $63, %r8
267; SSE2-SSSE3-NEXT: movd %r8d, %xmm1
268; SSE2-SSSE3-NEXT: shlq $50, %r9
269; SSE2-SSSE3-NEXT: sarq $63, %r9
270; SSE2-SSSE3-NEXT: movd %r9d, %xmm2
271; SSE2-SSSE3-NEXT: shlq $51, %r10
272; SSE2-SSSE3-NEXT: sarq $63, %r10
273; SSE2-SSSE3-NEXT: movd %r10d, %xmm3
274; SSE2-SSSE3-NEXT: shlq $52, %r11
275; SSE2-SSSE3-NEXT: sarq $63, %r11
276; SSE2-SSSE3-NEXT: movd %r11d, %xmm4
277; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
278; SSE2-SSSE3-NEXT: shlq $53, %r14
279; SSE2-SSSE3-NEXT: sarq $63, %r14
280; SSE2-SSSE3-NEXT: movd %r14d, %xmm0
281; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
282; SSE2-SSSE3-NEXT: shlq $54, %r15
283; SSE2-SSSE3-NEXT: sarq $63, %r15
284; SSE2-SSSE3-NEXT: movd %r15d, %xmm2
285; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
286; SSE2-SSSE3-NEXT: shlq $55, %r12
287; SSE2-SSSE3-NEXT: sarq $63, %r12
288; SSE2-SSSE3-NEXT: movd %r12d, %xmm1
289; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
290; SSE2-SSSE3-NEXT: shlq $60, %r13
291; SSE2-SSSE3-NEXT: sarq $63, %r13
292; SSE2-SSSE3-NEXT: movd %r13d, %xmm4
293; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
294; SSE2-SSSE3-NEXT: shlq $61, %rbx
295; SSE2-SSSE3-NEXT: sarq $63, %rbx
296; SSE2-SSSE3-NEXT: movd %ebx, %xmm2
297; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
298; SSE2-SSSE3-NEXT: shlq $62, %rcx
299; SSE2-SSSE3-NEXT: sarq $63, %rcx
300; SSE2-SSSE3-NEXT: movd %ecx, %xmm5
301; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
302; SSE2-SSSE3-NEXT: shlq $63, %rdx
303; SSE2-SSSE3-NEXT: sarq $63, %rdx
304; SSE2-SSSE3-NEXT: movd %edx, %xmm0
305; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
306; SSE2-SSSE3-NEXT: shlq $58, %rsi
307; SSE2-SSSE3-NEXT: sarq $63, %rsi
308; SSE2-SSSE3-NEXT: movd %esi, %xmm3
309; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
310; SSE2-SSSE3-NEXT: shlq $59, %rdi
311; SSE2-SSSE3-NEXT: sarq $63, %rdi
312; SSE2-SSSE3-NEXT: movd %edi, %xmm4
313; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
314; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
315; SSE2-SSSE3-NEXT: shlq $57, %rbp
316; SSE2-SSSE3-NEXT: sarq $63, %rbp
317; SSE2-SSSE3-NEXT: movd %ebp, %xmm2
318; SSE2-SSSE3-NEXT: shrq $7, %rax
319; SSE2-SSSE3-NEXT: movd %eax, %xmm3
320; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
321; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
322; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
323; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
324; SSE2-SSSE3-NEXT: popq %rbx
325; SSE2-SSSE3-NEXT: popq %r12
326; SSE2-SSSE3-NEXT: popq %r13
327; SSE2-SSSE3-NEXT: popq %r14
328; SSE2-SSSE3-NEXT: popq %r15
329; SSE2-SSSE3-NEXT: popq %rbp
330; SSE2-SSSE3-NEXT: retq
331;
332; AVX12-LABEL: ext_i16_16i8:
333; AVX12: # BB#0:
334; AVX12-NEXT: movw %di, -{{[0-9]+}}(%rsp)
335; AVX12-NEXT: movswq -{{[0-9]+}}(%rsp), %rax
336; AVX12-NEXT: movq %rax, %rcx
337; AVX12-NEXT: shlq $62, %rcx
338; AVX12-NEXT: sarq $63, %rcx
339; AVX12-NEXT: movq %rax, %rdx
340; AVX12-NEXT: shlq $63, %rdx
341; AVX12-NEXT: sarq $63, %rdx
342; AVX12-NEXT: vmovd %edx, %xmm0
343; AVX12-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
344; AVX12-NEXT: movq %rax, %rcx
345; AVX12-NEXT: shlq $61, %rcx
346; AVX12-NEXT: sarq $63, %rcx
347; AVX12-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
348; AVX12-NEXT: movq %rax, %rcx
349; AVX12-NEXT: shlq $60, %rcx
350; AVX12-NEXT: sarq $63, %rcx
351; AVX12-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
352; AVX12-NEXT: movq %rax, %rcx
353; AVX12-NEXT: shlq $59, %rcx
354; AVX12-NEXT: sarq $63, %rcx
355; AVX12-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
356; AVX12-NEXT: movq %rax, %rcx
357; AVX12-NEXT: shlq $58, %rcx
358; AVX12-NEXT: sarq $63, %rcx
359; AVX12-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
360; AVX12-NEXT: movq %rax, %rcx
361; AVX12-NEXT: shlq $57, %rcx
362; AVX12-NEXT: sarq $63, %rcx
363; AVX12-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
364; AVX12-NEXT: movsbq %al, %rcx
365; AVX12-NEXT: shrq $7, %rcx
366; AVX12-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
367; AVX12-NEXT: movq %rax, %rcx
368; AVX12-NEXT: shlq $55, %rcx
369; AVX12-NEXT: sarq $63, %rcx
370; AVX12-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
371; AVX12-NEXT: movq %rax, %rcx
372; AVX12-NEXT: shlq $54, %rcx
373; AVX12-NEXT: sarq $63, %rcx
374; AVX12-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
375; AVX12-NEXT: movq %rax, %rcx
376; AVX12-NEXT: shlq $53, %rcx
377; AVX12-NEXT: sarq $63, %rcx
378; AVX12-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
379; AVX12-NEXT: movq %rax, %rcx
380; AVX12-NEXT: shlq $52, %rcx
381; AVX12-NEXT: sarq $63, %rcx
382; AVX12-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
383; AVX12-NEXT: movq %rax, %rcx
384; AVX12-NEXT: shlq $51, %rcx
385; AVX12-NEXT: sarq $63, %rcx
386; AVX12-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
387; AVX12-NEXT: movq %rax, %rcx
388; AVX12-NEXT: shlq $50, %rcx
389; AVX12-NEXT: sarq $63, %rcx
390; AVX12-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
391; AVX12-NEXT: movq %rax, %rcx
392; AVX12-NEXT: shlq $49, %rcx
393; AVX12-NEXT: sarq $63, %rcx
394; AVX12-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
395; AVX12-NEXT: shrq $15, %rax
396; AVX12-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
397; AVX12-NEXT: retq
398;
399; AVX512-LABEL: ext_i16_16i8:
400; AVX512: # BB#0:
401; AVX512-NEXT: kmovd %edi, %k0
402; AVX512-NEXT: vpmovm2b %k0, %xmm0
403; AVX512-NEXT: retq
404 %1 = bitcast i16 %a0 to <16 x i1>
405 %2 = sext <16 x i1> %1 to <16 x i8>
406 ret <16 x i8> %2
407}
408
409;
410; 256-bit vectors
411;
412
413define <4 x i64> @ext_i4_4i64(i4 %a0) {
414; SSE2-SSSE3-LABEL: ext_i4_4i64:
415; SSE2-SSSE3: # BB#0:
416; SSE2-SSSE3-NEXT: andb $15, %dil
417; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
418; SSE2-SSSE3-NEXT: movl -{{[0-9]+}}(%rsp), %eax
419; SSE2-SSSE3-NEXT: movl %eax, %ecx
420; SSE2-SSSE3-NEXT: shrl $3, %ecx
421; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
422; SSE2-SSSE3-NEXT: movl %eax, %ecx
423; SSE2-SSSE3-NEXT: shrl $2, %ecx
424; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
425; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
426; SSE2-SSSE3-NEXT: movd %eax, %xmm2
427; SSE2-SSSE3-NEXT: shrl %eax
428; SSE2-SSSE3-NEXT: movd %eax, %xmm0
429; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
430; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
431; SSE2-SSSE3-NEXT: pand {{.*}}(%rip), %xmm2
432; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
433; SSE2-SSSE3-NEXT: psllq $63, %xmm0
434; SSE2-SSSE3-NEXT: psrad $31, %xmm0
435; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
436; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3]
437; SSE2-SSSE3-NEXT: psllq $63, %xmm1
438; SSE2-SSSE3-NEXT: psrad $31, %xmm1
439; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
440; SSE2-SSSE3-NEXT: retq
441;
442; AVX1-LABEL: ext_i4_4i64:
443; AVX1: # BB#0:
444; AVX1-NEXT: andb $15, %dil
445; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
446; AVX1-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
447; AVX1-NEXT: movq %rax, %rcx
448; AVX1-NEXT: shlq $60, %rcx
449; AVX1-NEXT: sarq $63, %rcx
450; AVX1-NEXT: vmovq %rcx, %xmm0
451; AVX1-NEXT: movq %rax, %rcx
452; AVX1-NEXT: shlq $61, %rcx
453; AVX1-NEXT: sarq $63, %rcx
454; AVX1-NEXT: vmovq %rcx, %xmm1
455; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
456; AVX1-NEXT: movq %rax, %rcx
457; AVX1-NEXT: shlq $62, %rcx
458; AVX1-NEXT: sarq $63, %rcx
459; AVX1-NEXT: vmovq %rcx, %xmm1
460; AVX1-NEXT: shlq $63, %rax
461; AVX1-NEXT: sarq $63, %rax
462; AVX1-NEXT: vmovq %rax, %xmm2
463; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
464; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
465; AVX1-NEXT: retq
466;
467; AVX2-LABEL: ext_i4_4i64:
468; AVX2: # BB#0:
469; AVX2-NEXT: andb $15, %dil
470; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
471; AVX2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
472; AVX2-NEXT: movq %rax, %rcx
473; AVX2-NEXT: shlq $60, %rcx
474; AVX2-NEXT: sarq $63, %rcx
475; AVX2-NEXT: vmovq %rcx, %xmm0
476; AVX2-NEXT: movq %rax, %rcx
477; AVX2-NEXT: shlq $61, %rcx
478; AVX2-NEXT: sarq $63, %rcx
479; AVX2-NEXT: vmovq %rcx, %xmm1
480; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
481; AVX2-NEXT: movq %rax, %rcx
482; AVX2-NEXT: shlq $62, %rcx
483; AVX2-NEXT: sarq $63, %rcx
484; AVX2-NEXT: vmovq %rcx, %xmm1
485; AVX2-NEXT: shlq $63, %rax
486; AVX2-NEXT: sarq $63, %rax
487; AVX2-NEXT: vmovq %rax, %xmm2
488; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
489; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
490; AVX2-NEXT: retq
491;
492; AVX512-LABEL: ext_i4_4i64:
493; AVX512: # BB#0:
494; AVX512-NEXT: andb $15, %dil
495; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
496; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
497; AVX512-NEXT: kmovd %eax, %k1
498; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
499; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
500; AVX512-NEXT: retq
501 %1 = bitcast i4 %a0 to <4 x i1>
502 %2 = sext <4 x i1> %1 to <4 x i64>
503 ret <4 x i64> %2
504}
505
506define <8 x i32> @ext_i8_8i32(i8 %a0) {
507; SSE2-SSSE3-LABEL: ext_i8_8i32:
508; SSE2-SSSE3: # BB#0:
509; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
510; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
511; SSE2-SSSE3-NEXT: movl %eax, %ecx
512; SSE2-SSSE3-NEXT: shrl $3, %ecx
513; SSE2-SSSE3-NEXT: andl $1, %ecx
514; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
515; SSE2-SSSE3-NEXT: movl %eax, %ecx
516; SSE2-SSSE3-NEXT: shrl $2, %ecx
517; SSE2-SSSE3-NEXT: andl $1, %ecx
518; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
519; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
520; SSE2-SSSE3-NEXT: movl %eax, %ecx
521; SSE2-SSSE3-NEXT: andl $1, %ecx
522; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
523; SSE2-SSSE3-NEXT: movl %eax, %ecx
524; SSE2-SSSE3-NEXT: shrl %ecx
525; SSE2-SSSE3-NEXT: andl $1, %ecx
526; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
527; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
528; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
529; SSE2-SSSE3-NEXT: movl %eax, %ecx
530; SSE2-SSSE3-NEXT: shrl $5, %ecx
531; SSE2-SSSE3-NEXT: andl $1, %ecx
532; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
533; SSE2-SSSE3-NEXT: movl %eax, %ecx
534; SSE2-SSSE3-NEXT: shrl $4, %ecx
535; SSE2-SSSE3-NEXT: andl $1, %ecx
536; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
537; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
538; SSE2-SSSE3-NEXT: movl %eax, %ecx
539; SSE2-SSSE3-NEXT: shrl $6, %ecx
540; SSE2-SSSE3-NEXT: andl $1, %ecx
541; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
542; SSE2-SSSE3-NEXT: shrl $7, %eax
543; SSE2-SSSE3-NEXT: movzwl %ax, %eax
544; SSE2-SSSE3-NEXT: movd %eax, %xmm3
545; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
546; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
547; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
548; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
549; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
550; SSE2-SSSE3-NEXT: pslld $31, %xmm0
551; SSE2-SSSE3-NEXT: psrad $31, %xmm0
552; SSE2-SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
553; SSE2-SSSE3-NEXT: pslld $31, %xmm1
554; SSE2-SSSE3-NEXT: psrad $31, %xmm1
555; SSE2-SSSE3-NEXT: retq
556;
557; AVX1-LABEL: ext_i8_8i32:
558; AVX1: # BB#0:
559; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
560; AVX1-NEXT: movsbq -{{[0-9]+}}(%rsp), %rax
561; AVX1-NEXT: movq %rax, %rcx
562; AVX1-NEXT: shlq $58, %rcx
563; AVX1-NEXT: sarq $63, %rcx
564; AVX1-NEXT: movq %rax, %rdx
565; AVX1-NEXT: shlq $59, %rdx
566; AVX1-NEXT: sarq $63, %rdx
567; AVX1-NEXT: vmovd %edx, %xmm0
568; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
569; AVX1-NEXT: movq %rax, %rcx
570; AVX1-NEXT: shlq $57, %rcx
571; AVX1-NEXT: sarq $63, %rcx
572; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
573; AVX1-NEXT: movq %rax, %rcx
574; AVX1-NEXT: shrq $7, %rcx
575; AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
576; AVX1-NEXT: movq %rax, %rcx
577; AVX1-NEXT: shlq $62, %rcx
578; AVX1-NEXT: sarq $63, %rcx
579; AVX1-NEXT: movq %rax, %rdx
580; AVX1-NEXT: shlq $63, %rdx
581; AVX1-NEXT: sarq $63, %rdx
582; AVX1-NEXT: vmovd %edx, %xmm1
583; AVX1-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
584; AVX1-NEXT: movq %rax, %rcx
585; AVX1-NEXT: shlq $61, %rcx
586; AVX1-NEXT: sarq $63, %rcx
587; AVX1-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
588; AVX1-NEXT: shlq $60, %rax
589; AVX1-NEXT: sarq $63, %rax
590; AVX1-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
591; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
592; AVX1-NEXT: retq
593;
594; AVX2-LABEL: ext_i8_8i32:
595; AVX2: # BB#0:
596; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
597; AVX2-NEXT: movsbq -{{[0-9]+}}(%rsp), %rax
598; AVX2-NEXT: movq %rax, %rcx
599; AVX2-NEXT: shlq $58, %rcx
600; AVX2-NEXT: sarq $63, %rcx
601; AVX2-NEXT: movq %rax, %rdx
602; AVX2-NEXT: shlq $59, %rdx
603; AVX2-NEXT: sarq $63, %rdx
604; AVX2-NEXT: vmovd %edx, %xmm0
605; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0
606; AVX2-NEXT: movq %rax, %rcx
607; AVX2-NEXT: shlq $57, %rcx
608; AVX2-NEXT: sarq $63, %rcx
609; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
610; AVX2-NEXT: movq %rax, %rcx
611; AVX2-NEXT: shrq $7, %rcx
612; AVX2-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0
613; AVX2-NEXT: movq %rax, %rcx
614; AVX2-NEXT: shlq $62, %rcx
615; AVX2-NEXT: sarq $63, %rcx
616; AVX2-NEXT: movq %rax, %rdx
617; AVX2-NEXT: shlq $63, %rdx
618; AVX2-NEXT: sarq $63, %rdx
619; AVX2-NEXT: vmovd %edx, %xmm1
620; AVX2-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
621; AVX2-NEXT: movq %rax, %rcx
622; AVX2-NEXT: shlq $61, %rcx
623; AVX2-NEXT: sarq $63, %rcx
624; AVX2-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
625; AVX2-NEXT: shlq $60, %rax
626; AVX2-NEXT: sarq $63, %rax
627; AVX2-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1
628; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
629; AVX2-NEXT: retq
630;
631; AVX512-LABEL: ext_i8_8i32:
632; AVX512: # BB#0:
633; AVX512-NEXT: kmovd %edi, %k1
634; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
635; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
636; AVX512-NEXT: retq
637 %1 = bitcast i8 %a0 to <8 x i1>
638 %2 = sext <8 x i1> %1 to <8 x i32>
639 ret <8 x i32> %2
640}
641
642define <16 x i16> @ext_i16_16i16(i16 %a0) {
643; SSE2-SSSE3-LABEL: ext_i16_16i16:
644; SSE2-SSSE3: # BB#0:
645; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
646; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
647; SSE2-SSSE3-NEXT: movl %eax, %ecx
648; SSE2-SSSE3-NEXT: shrl $7, %ecx
649; SSE2-SSSE3-NEXT: andl $1, %ecx
650; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
651; SSE2-SSSE3-NEXT: movl %eax, %ecx
652; SSE2-SSSE3-NEXT: shrl $6, %ecx
653; SSE2-SSSE3-NEXT: andl $1, %ecx
654; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
655; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
656; SSE2-SSSE3-NEXT: movl %eax, %ecx
657; SSE2-SSSE3-NEXT: shrl $5, %ecx
658; SSE2-SSSE3-NEXT: andl $1, %ecx
659; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
660; SSE2-SSSE3-NEXT: movl %eax, %ecx
661; SSE2-SSSE3-NEXT: shrl $4, %ecx
662; SSE2-SSSE3-NEXT: andl $1, %ecx
663; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
664; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
665; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
666; SSE2-SSSE3-NEXT: movl %eax, %ecx
667; SSE2-SSSE3-NEXT: shrl $3, %ecx
668; SSE2-SSSE3-NEXT: andl $1, %ecx
669; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
670; SSE2-SSSE3-NEXT: movl %eax, %ecx
671; SSE2-SSSE3-NEXT: shrl $2, %ecx
672; SSE2-SSSE3-NEXT: andl $1, %ecx
673; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
674; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
675; SSE2-SSSE3-NEXT: movl %eax, %ecx
676; SSE2-SSSE3-NEXT: andl $1, %ecx
677; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
678; SSE2-SSSE3-NEXT: movl %eax, %ecx
679; SSE2-SSSE3-NEXT: shrl %ecx
680; SSE2-SSSE3-NEXT: andl $1, %ecx
681; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
682; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
683; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
684; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
685; SSE2-SSSE3-NEXT: movl %eax, %ecx
686; SSE2-SSSE3-NEXT: shrl $11, %ecx
687; SSE2-SSSE3-NEXT: andl $1, %ecx
688; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
689; SSE2-SSSE3-NEXT: movl %eax, %ecx
690; SSE2-SSSE3-NEXT: shrl $10, %ecx
691; SSE2-SSSE3-NEXT: andl $1, %ecx
692; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
693; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
694; SSE2-SSSE3-NEXT: movl %eax, %ecx
695; SSE2-SSSE3-NEXT: shrl $9, %ecx
696; SSE2-SSSE3-NEXT: andl $1, %ecx
697; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
698; SSE2-SSSE3-NEXT: movl %eax, %ecx
699; SSE2-SSSE3-NEXT: shrl $8, %ecx
700; SSE2-SSSE3-NEXT: andl $1, %ecx
701; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
702; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
703; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
704; SSE2-SSSE3-NEXT: movl %eax, %ecx
705; SSE2-SSSE3-NEXT: shrl $13, %ecx
706; SSE2-SSSE3-NEXT: andl $1, %ecx
707; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
708; SSE2-SSSE3-NEXT: movl %eax, %ecx
709; SSE2-SSSE3-NEXT: shrl $12, %ecx
710; SSE2-SSSE3-NEXT: andl $1, %ecx
711; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
712; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
713; SSE2-SSSE3-NEXT: movl %eax, %ecx
714; SSE2-SSSE3-NEXT: shrl $14, %ecx
715; SSE2-SSSE3-NEXT: andl $1, %ecx
716; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
717; SSE2-SSSE3-NEXT: shrl $15, %eax
718; SSE2-SSSE3-NEXT: movzwl %ax, %eax
719; SSE2-SSSE3-NEXT: movd %eax, %xmm4
720; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
721; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
722; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
723; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
724; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
725; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
726; SSE2-SSSE3-NEXT: psllw $15, %xmm0
727; SSE2-SSSE3-NEXT: psraw $15, %xmm0
728; SSE2-SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
729; SSE2-SSSE3-NEXT: psllw $15, %xmm1
730; SSE2-SSSE3-NEXT: psraw $15, %xmm1
731; SSE2-SSSE3-NEXT: retq
732;
733; AVX1-LABEL: ext_i16_16i16:
734; AVX1: # BB#0:
735; AVX1-NEXT: pushq %rbp
736; AVX1-NEXT: .Lcfi0:
737; AVX1-NEXT: .cfi_def_cfa_offset 16
738; AVX1-NEXT: pushq %r15
739; AVX1-NEXT: .Lcfi1:
740; AVX1-NEXT: .cfi_def_cfa_offset 24
741; AVX1-NEXT: pushq %r14
742; AVX1-NEXT: .Lcfi2:
743; AVX1-NEXT: .cfi_def_cfa_offset 32
744; AVX1-NEXT: pushq %r13
745; AVX1-NEXT: .Lcfi3:
746; AVX1-NEXT: .cfi_def_cfa_offset 40
747; AVX1-NEXT: pushq %r12
748; AVX1-NEXT: .Lcfi4:
749; AVX1-NEXT: .cfi_def_cfa_offset 48
750; AVX1-NEXT: pushq %rbx
751; AVX1-NEXT: .Lcfi5:
752; AVX1-NEXT: .cfi_def_cfa_offset 56
753; AVX1-NEXT: .Lcfi6:
754; AVX1-NEXT: .cfi_offset %rbx, -56
755; AVX1-NEXT: .Lcfi7:
756; AVX1-NEXT: .cfi_offset %r12, -48
757; AVX1-NEXT: .Lcfi8:
758; AVX1-NEXT: .cfi_offset %r13, -40
759; AVX1-NEXT: .Lcfi9:
760; AVX1-NEXT: .cfi_offset %r14, -32
761; AVX1-NEXT: .Lcfi10:
762; AVX1-NEXT: .cfi_offset %r15, -24
763; AVX1-NEXT: .Lcfi11:
764; AVX1-NEXT: .cfi_offset %rbp, -16
765; AVX1-NEXT: movw %di, -{{[0-9]+}}(%rsp)
766; AVX1-NEXT: movswq -{{[0-9]+}}(%rsp), %rax
767; AVX1-NEXT: movq %rax, %rcx
768; AVX1-NEXT: shlq $55, %rcx
769; AVX1-NEXT: sarq $63, %rcx
770; AVX1-NEXT: vmovd %ecx, %xmm0
771; AVX1-NEXT: movq %rax, %r8
772; AVX1-NEXT: movq %rax, %r10
773; AVX1-NEXT: movq %rax, %r11
774; AVX1-NEXT: movq %rax, %r14
775; AVX1-NEXT: movq %rax, %r15
776; AVX1-NEXT: movq %rax, %r9
777; AVX1-NEXT: movq %rax, %r12
778; AVX1-NEXT: movq %rax, %r13
779; AVX1-NEXT: movq %rax, %rbx
780; AVX1-NEXT: movq %rax, %rdi
781; AVX1-NEXT: movq %rax, %rcx
782; AVX1-NEXT: movq %rax, %rdx
783; AVX1-NEXT: movq %rax, %rsi
784; AVX1-NEXT: movsbq %al, %rbp
785; AVX1-NEXT: shlq $54, %rax
786; AVX1-NEXT: sarq $63, %rax
787; AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
788; AVX1-NEXT: shlq $53, %r8
789; AVX1-NEXT: sarq $63, %r8
790; AVX1-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0
791; AVX1-NEXT: shlq $52, %r10
792; AVX1-NEXT: sarq $63, %r10
793; AVX1-NEXT: vpinsrw $3, %r10d, %xmm0, %xmm0
794; AVX1-NEXT: shlq $51, %r11
795; AVX1-NEXT: sarq $63, %r11
796; AVX1-NEXT: vpinsrw $4, %r11d, %xmm0, %xmm0
797; AVX1-NEXT: shlq $50, %r14
798; AVX1-NEXT: sarq $63, %r14
799; AVX1-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0
800; AVX1-NEXT: shlq $49, %r15
801; AVX1-NEXT: sarq $63, %r15
802; AVX1-NEXT: vpinsrw $6, %r15d, %xmm0, %xmm0
803; AVX1-NEXT: shrq $15, %r9
804; AVX1-NEXT: vpinsrw $7, %r9d, %xmm0, %xmm0
805; AVX1-NEXT: shlq $63, %r13
806; AVX1-NEXT: sarq $63, %r13
807; AVX1-NEXT: vmovd %r13d, %xmm1
808; AVX1-NEXT: shlq $62, %r12
809; AVX1-NEXT: sarq $63, %r12
810; AVX1-NEXT: vpinsrw $1, %r12d, %xmm1, %xmm1
811; AVX1-NEXT: shlq $61, %rbx
812; AVX1-NEXT: sarq $63, %rbx
813; AVX1-NEXT: vpinsrw $2, %ebx, %xmm1, %xmm1
814; AVX1-NEXT: shlq $60, %rdi
815; AVX1-NEXT: sarq $63, %rdi
816; AVX1-NEXT: vpinsrw $3, %edi, %xmm1, %xmm1
817; AVX1-NEXT: shlq $59, %rcx
818; AVX1-NEXT: sarq $63, %rcx
819; AVX1-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
820; AVX1-NEXT: shlq $58, %rdx
821; AVX1-NEXT: sarq $63, %rdx
822; AVX1-NEXT: vpinsrw $5, %edx, %xmm1, %xmm1
823; AVX1-NEXT: shlq $57, %rsi
824; AVX1-NEXT: sarq $63, %rsi
825; AVX1-NEXT: vpinsrw $6, %esi, %xmm1, %xmm1
826; AVX1-NEXT: shrq $7, %rbp
827; AVX1-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1
828; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
829; AVX1-NEXT: popq %rbx
830; AVX1-NEXT: popq %r12
831; AVX1-NEXT: popq %r13
832; AVX1-NEXT: popq %r14
833; AVX1-NEXT: popq %r15
834; AVX1-NEXT: popq %rbp
835; AVX1-NEXT: retq
836;
837; AVX2-LABEL: ext_i16_16i16:
838; AVX2: # BB#0:
839; AVX2-NEXT: pushq %rbp
840; AVX2-NEXT: .Lcfi0:
841; AVX2-NEXT: .cfi_def_cfa_offset 16
842; AVX2-NEXT: pushq %r15
843; AVX2-NEXT: .Lcfi1:
844; AVX2-NEXT: .cfi_def_cfa_offset 24
845; AVX2-NEXT: pushq %r14
846; AVX2-NEXT: .Lcfi2:
847; AVX2-NEXT: .cfi_def_cfa_offset 32
848; AVX2-NEXT: pushq %r13
849; AVX2-NEXT: .Lcfi3:
850; AVX2-NEXT: .cfi_def_cfa_offset 40
851; AVX2-NEXT: pushq %r12
852; AVX2-NEXT: .Lcfi4:
853; AVX2-NEXT: .cfi_def_cfa_offset 48
854; AVX2-NEXT: pushq %rbx
855; AVX2-NEXT: .Lcfi5:
856; AVX2-NEXT: .cfi_def_cfa_offset 56
857; AVX2-NEXT: .Lcfi6:
858; AVX2-NEXT: .cfi_offset %rbx, -56
859; AVX2-NEXT: .Lcfi7:
860; AVX2-NEXT: .cfi_offset %r12, -48
861; AVX2-NEXT: .Lcfi8:
862; AVX2-NEXT: .cfi_offset %r13, -40
863; AVX2-NEXT: .Lcfi9:
864; AVX2-NEXT: .cfi_offset %r14, -32
865; AVX2-NEXT: .Lcfi10:
866; AVX2-NEXT: .cfi_offset %r15, -24
867; AVX2-NEXT: .Lcfi11:
868; AVX2-NEXT: .cfi_offset %rbp, -16
869; AVX2-NEXT: movw %di, -{{[0-9]+}}(%rsp)
870; AVX2-NEXT: movswq -{{[0-9]+}}(%rsp), %rax
871; AVX2-NEXT: movq %rax, %rcx
872; AVX2-NEXT: shlq $55, %rcx
873; AVX2-NEXT: sarq $63, %rcx
874; AVX2-NEXT: vmovd %ecx, %xmm0
875; AVX2-NEXT: movq %rax, %r8
876; AVX2-NEXT: movq %rax, %r10
877; AVX2-NEXT: movq %rax, %r11
878; AVX2-NEXT: movq %rax, %r14
879; AVX2-NEXT: movq %rax, %r15
880; AVX2-NEXT: movq %rax, %r9
881; AVX2-NEXT: movq %rax, %r12
882; AVX2-NEXT: movq %rax, %r13
883; AVX2-NEXT: movq %rax, %rbx
884; AVX2-NEXT: movq %rax, %rdi
885; AVX2-NEXT: movq %rax, %rcx
886; AVX2-NEXT: movq %rax, %rdx
887; AVX2-NEXT: movq %rax, %rsi
888; AVX2-NEXT: movsbq %al, %rbp
889; AVX2-NEXT: shlq $54, %rax
890; AVX2-NEXT: sarq $63, %rax
891; AVX2-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
892; AVX2-NEXT: shlq $53, %r8
893; AVX2-NEXT: sarq $63, %r8
894; AVX2-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0
895; AVX2-NEXT: shlq $52, %r10
896; AVX2-NEXT: sarq $63, %r10
897; AVX2-NEXT: vpinsrw $3, %r10d, %xmm0, %xmm0
898; AVX2-NEXT: shlq $51, %r11
899; AVX2-NEXT: sarq $63, %r11
900; AVX2-NEXT: vpinsrw $4, %r11d, %xmm0, %xmm0
901; AVX2-NEXT: shlq $50, %r14
902; AVX2-NEXT: sarq $63, %r14
903; AVX2-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0
904; AVX2-NEXT: shlq $49, %r15
905; AVX2-NEXT: sarq $63, %r15
906; AVX2-NEXT: vpinsrw $6, %r15d, %xmm0, %xmm0
907; AVX2-NEXT: shrq $15, %r9
908; AVX2-NEXT: vpinsrw $7, %r9d, %xmm0, %xmm0
909; AVX2-NEXT: shlq $63, %r13
910; AVX2-NEXT: sarq $63, %r13
911; AVX2-NEXT: vmovd %r13d, %xmm1
912; AVX2-NEXT: shlq $62, %r12
913; AVX2-NEXT: sarq $63, %r12
914; AVX2-NEXT: vpinsrw $1, %r12d, %xmm1, %xmm1
915; AVX2-NEXT: shlq $61, %rbx
916; AVX2-NEXT: sarq $63, %rbx
917; AVX2-NEXT: vpinsrw $2, %ebx, %xmm1, %xmm1
918; AVX2-NEXT: shlq $60, %rdi
919; AVX2-NEXT: sarq $63, %rdi
920; AVX2-NEXT: vpinsrw $3, %edi, %xmm1, %xmm1
921; AVX2-NEXT: shlq $59, %rcx
922; AVX2-NEXT: sarq $63, %rcx
923; AVX2-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1
924; AVX2-NEXT: shlq $58, %rdx
925; AVX2-NEXT: sarq $63, %rdx
926; AVX2-NEXT: vpinsrw $5, %edx, %xmm1, %xmm1
927; AVX2-NEXT: shlq $57, %rsi
928; AVX2-NEXT: sarq $63, %rsi
929; AVX2-NEXT: vpinsrw $6, %esi, %xmm1, %xmm1
930; AVX2-NEXT: shrq $7, %rbp
931; AVX2-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1
932; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
933; AVX2-NEXT: popq %rbx
934; AVX2-NEXT: popq %r12
935; AVX2-NEXT: popq %r13
936; AVX2-NEXT: popq %r14
937; AVX2-NEXT: popq %r15
938; AVX2-NEXT: popq %rbp
939; AVX2-NEXT: retq
940;
941; AVX512-LABEL: ext_i16_16i16:
942; AVX512: # BB#0:
943; AVX512-NEXT: kmovd %edi, %k0
944; AVX512-NEXT: vpmovm2w %k0, %ymm0
945; AVX512-NEXT: retq
946 %1 = bitcast i16 %a0 to <16 x i1>
947 %2 = sext <16 x i1> %1 to <16 x i16>
948 ret <16 x i16> %2
949}
950
951define <32 x i8> @ext_i32_32i8(i32 %a0) {
952; SSE2-SSSE3-LABEL: ext_i32_32i8:
953; SSE2-SSSE3: # BB#0:
954; SSE2-SSSE3-NEXT: pushq %rbp
955; SSE2-SSSE3-NEXT: .Lcfi12:
956; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 16
957; SSE2-SSSE3-NEXT: pushq %r15
958; SSE2-SSSE3-NEXT: .Lcfi13:
959; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 24
960; SSE2-SSSE3-NEXT: pushq %r14
961; SSE2-SSSE3-NEXT: .Lcfi14:
962; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 32
963; SSE2-SSSE3-NEXT: pushq %r13
964; SSE2-SSSE3-NEXT: .Lcfi15:
965; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 40
966; SSE2-SSSE3-NEXT: pushq %r12
967; SSE2-SSSE3-NEXT: .Lcfi16:
968; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 48
969; SSE2-SSSE3-NEXT: pushq %rbx
970; SSE2-SSSE3-NEXT: .Lcfi17:
971; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 56
972; SSE2-SSSE3-NEXT: .Lcfi18:
973; SSE2-SSSE3-NEXT: .cfi_offset %rbx, -56
974; SSE2-SSSE3-NEXT: .Lcfi19:
975; SSE2-SSSE3-NEXT: .cfi_offset %r12, -48
976; SSE2-SSSE3-NEXT: .Lcfi20:
977; SSE2-SSSE3-NEXT: .cfi_offset %r13, -40
978; SSE2-SSSE3-NEXT: .Lcfi21:
979; SSE2-SSSE3-NEXT: .cfi_offset %r14, -32
980; SSE2-SSSE3-NEXT: .Lcfi22:
981; SSE2-SSSE3-NEXT: .cfi_offset %r15, -24
982; SSE2-SSSE3-NEXT: .Lcfi23:
983; SSE2-SSSE3-NEXT: .cfi_offset %rbp, -16
984; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
985; SSE2-SSSE3-NEXT: shrl $16, %edi
986; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
987; SSE2-SSSE3-NEXT: movswq -{{[0-9]+}}(%rsp), %rbx
988; SSE2-SSSE3-NEXT: movq %rbx, %r8
989; SSE2-SSSE3-NEXT: movq %rbx, %r9
990; SSE2-SSSE3-NEXT: movq %rbx, %r10
991; SSE2-SSSE3-NEXT: movq %rbx, %r11
992; SSE2-SSSE3-NEXT: movq %rbx, %r14
993; SSE2-SSSE3-NEXT: movq %rbx, %r15
994; SSE2-SSSE3-NEXT: movq %rbx, %r12
995; SSE2-SSSE3-NEXT: movq %rbx, %r13
996; SSE2-SSSE3-NEXT: movq %rbx, %rdi
997; SSE2-SSSE3-NEXT: movq %rbx, %rcx
998; SSE2-SSSE3-NEXT: movq %rbx, %rdx
999; SSE2-SSSE3-NEXT: movq %rbx, %rbp
1000; SSE2-SSSE3-NEXT: movq %rbx, %rsi
1001; SSE2-SSSE3-NEXT: movq %rbx, %rax
1002; SSE2-SSSE3-NEXT: shrq $15, %rax
1003; SSE2-SSSE3-NEXT: movd %eax, %xmm0
1004; SSE2-SSSE3-NEXT: movq %rbx, %rax
1005; SSE2-SSSE3-NEXT: movsbq %bl, %rbx
1006; SSE2-SSSE3-NEXT: shlq $49, %r8
1007; SSE2-SSSE3-NEXT: sarq $63, %r8
1008; SSE2-SSSE3-NEXT: movd %r8d, %xmm15
1009; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
1010; SSE2-SSSE3-NEXT: shlq $50, %r9
1011; SSE2-SSSE3-NEXT: sarq $63, %r9
1012; SSE2-SSSE3-NEXT: movd %r9d, %xmm8
1013; SSE2-SSSE3-NEXT: shlq $51, %r10
1014; SSE2-SSSE3-NEXT: sarq $63, %r10
1015; SSE2-SSSE3-NEXT: movd %r10d, %xmm3
1016; SSE2-SSSE3-NEXT: shlq $52, %r11
1017; SSE2-SSSE3-NEXT: sarq $63, %r11
1018; SSE2-SSSE3-NEXT: movd %r11d, %xmm9
1019; SSE2-SSSE3-NEXT: shlq $53, %r14
1020; SSE2-SSSE3-NEXT: sarq $63, %r14
1021; SSE2-SSSE3-NEXT: movd %r14d, %xmm6
1022; SSE2-SSSE3-NEXT: shlq $54, %r15
1023; SSE2-SSSE3-NEXT: sarq $63, %r15
1024; SSE2-SSSE3-NEXT: movd %r15d, %xmm10
1025; SSE2-SSSE3-NEXT: shlq $55, %r12
1026; SSE2-SSSE3-NEXT: sarq $63, %r12
1027; SSE2-SSSE3-NEXT: movd %r12d, %xmm1
1028; SSE2-SSSE3-NEXT: shlq $60, %r13
1029; SSE2-SSSE3-NEXT: sarq $63, %r13
1030; SSE2-SSSE3-NEXT: movd %r13d, %xmm11
1031; SSE2-SSSE3-NEXT: shlq $61, %rdi
1032; SSE2-SSSE3-NEXT: sarq $63, %rdi
1033; SSE2-SSSE3-NEXT: movd %edi, %xmm5
1034; SSE2-SSSE3-NEXT: shlq $62, %rcx
1035; SSE2-SSSE3-NEXT: sarq $63, %rcx
1036; SSE2-SSSE3-NEXT: movd %ecx, %xmm12
1037; SSE2-SSSE3-NEXT: shlq $63, %rdx
1038; SSE2-SSSE3-NEXT: sarq $63, %rdx
1039; SSE2-SSSE3-NEXT: movd %edx, %xmm0
1040; SSE2-SSSE3-NEXT: shlq $58, %rbp
1041; SSE2-SSSE3-NEXT: sarq $63, %rbp
1042; SSE2-SSSE3-NEXT: movd %ebp, %xmm13
1043; SSE2-SSSE3-NEXT: shlq $59, %rsi
1044; SSE2-SSSE3-NEXT: sarq $63, %rsi
1045; SSE2-SSSE3-NEXT: movd %esi, %xmm7
1046; SSE2-SSSE3-NEXT: shlq $57, %rax
1047; SSE2-SSSE3-NEXT: sarq $63, %rax
1048; SSE2-SSSE3-NEXT: movd %eax, %xmm4
1049; SSE2-SSSE3-NEXT: shrq $7, %rbx
1050; SSE2-SSSE3-NEXT: movd %ebx, %xmm14
1051; SSE2-SSSE3-NEXT: movswq -{{[0-9]+}}(%rsp), %rsi
1052; SSE2-SSSE3-NEXT: movq %rsi, %r8
1053; SSE2-SSSE3-NEXT: movq %rsi, %r9
1054; SSE2-SSSE3-NEXT: movq %rsi, %r10
1055; SSE2-SSSE3-NEXT: movq %rsi, %r11
1056; SSE2-SSSE3-NEXT: movq %rsi, %r14
1057; SSE2-SSSE3-NEXT: movq %rsi, %r15
1058; SSE2-SSSE3-NEXT: movq %rsi, %r12
1059; SSE2-SSSE3-NEXT: movq %rsi, %r13
1060; SSE2-SSSE3-NEXT: movq %rsi, %rbx
1061; SSE2-SSSE3-NEXT: movq %rsi, %rax
1062; SSE2-SSSE3-NEXT: movq %rsi, %rcx
1063; SSE2-SSSE3-NEXT: movq %rsi, %rdx
1064; SSE2-SSSE3-NEXT: movq %rsi, %rdi
1065; SSE2-SSSE3-NEXT: movq %rsi, %rbp
1066; SSE2-SSSE3-NEXT: shrq $15, %rbp
1067; SSE2-SSSE3-NEXT: movd %ebp, %xmm2
1068; SSE2-SSSE3-NEXT: movq %rsi, %rbp
1069; SSE2-SSSE3-NEXT: movsbq %sil, %rsi
1070; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm8[0],xmm3[1],xmm8[1],xmm3[2],xmm8[2],xmm3[3],xmm8[3],xmm3[4],xmm8[4],xmm3[5],xmm8[5],xmm3[6],xmm8[6],xmm3[7],xmm8[7]
1071; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
1072; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7]
1073; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1],xmm1[2],xmm10[2],xmm1[3],xmm10[3],xmm1[4],xmm10[4],xmm1[5],xmm10[5],xmm1[6],xmm10[6],xmm1[7],xmm10[7]
1074; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
1075; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
1076; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7]
1077; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7]
1078; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
1079; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
1080; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm14[0],xmm4[1],xmm14[1],xmm4[2],xmm14[2],xmm4[3],xmm14[3],xmm4[4],xmm14[4],xmm4[5],xmm14[5],xmm4[6],xmm14[6],xmm4[7],xmm14[7]
1081; SSE2-SSSE3-NEXT: shlq $49, %r8
1082; SSE2-SSSE3-NEXT: sarq $63, %r8
1083; SSE2-SSSE3-NEXT: movd %r8d, %xmm3
1084; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
1085; SSE2-SSSE3-NEXT: shlq $50, %r9
1086; SSE2-SSSE3-NEXT: sarq $63, %r9
1087; SSE2-SSSE3-NEXT: movd %r9d, %xmm4
1088; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
1089; SSE2-SSSE3-NEXT: shlq $51, %r10
1090; SSE2-SSSE3-NEXT: sarq $63, %r10
1091; SSE2-SSSE3-NEXT: movd %r10d, %xmm5
1092; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1093; SSE2-SSSE3-NEXT: shlq $52, %r11
1094; SSE2-SSSE3-NEXT: sarq $63, %r11
1095; SSE2-SSSE3-NEXT: movd %r11d, %xmm1
1096; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
1097; SSE2-SSSE3-NEXT: shlq $53, %r14
1098; SSE2-SSSE3-NEXT: sarq $63, %r14
1099; SSE2-SSSE3-NEXT: movd %r14d, %xmm2
1100; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
1101; SSE2-SSSE3-NEXT: shlq $54, %r15
1102; SSE2-SSSE3-NEXT: sarq $63, %r15
1103; SSE2-SSSE3-NEXT: movd %r15d, %xmm4
1104; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
1105; SSE2-SSSE3-NEXT: shlq $55, %r12
1106; SSE2-SSSE3-NEXT: sarq $63, %r12
1107; SSE2-SSSE3-NEXT: movd %r12d, %xmm3
1108; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
1109; SSE2-SSSE3-NEXT: shlq $60, %r13
1110; SSE2-SSSE3-NEXT: sarq $63, %r13
1111; SSE2-SSSE3-NEXT: movd %r13d, %xmm6
1112; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
1113; SSE2-SSSE3-NEXT: shlq $61, %rbx
1114; SSE2-SSSE3-NEXT: sarq $63, %rbx
1115; SSE2-SSSE3-NEXT: movd %ebx, %xmm4
1116; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
1117; SSE2-SSSE3-NEXT: shlq $62, %rax
1118; SSE2-SSSE3-NEXT: sarq $63, %rax
1119; SSE2-SSSE3-NEXT: movd %eax, %xmm2
1120; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
1121; SSE2-SSSE3-NEXT: shlq $63, %rcx
1122; SSE2-SSSE3-NEXT: sarq $63, %rcx
1123; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1124; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1],xmm4[2],xmm6[2],xmm4[3],xmm6[3],xmm4[4],xmm6[4],xmm4[5],xmm6[5],xmm4[6],xmm6[6],xmm4[7],xmm6[7]
1125; SSE2-SSSE3-NEXT: shlq $58, %rdx
1126; SSE2-SSSE3-NEXT: sarq $63, %rdx
1127; SSE2-SSSE3-NEXT: movd %edx, %xmm5
1128; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
1129; SSE2-SSSE3-NEXT: shlq $59, %rdi
1130; SSE2-SSSE3-NEXT: sarq $63, %rdi
1131; SSE2-SSSE3-NEXT: movd %edi, %xmm2
1132; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
1133; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
1134; SSE2-SSSE3-NEXT: shlq $57, %rbp
1135; SSE2-SSSE3-NEXT: sarq $63, %rbp
1136; SSE2-SSSE3-NEXT: movd %ebp, %xmm4
1137; SSE2-SSSE3-NEXT: shrq $7, %rsi
1138; SSE2-SSSE3-NEXT: movd %esi, %xmm5
1139; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
1140; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
1141; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
1142; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
1143; SSE2-SSSE3-NEXT: popq %rbx
1144; SSE2-SSSE3-NEXT: popq %r12
1145; SSE2-SSSE3-NEXT: popq %r13
1146; SSE2-SSSE3-NEXT: popq %r14
1147; SSE2-SSSE3-NEXT: popq %r15
1148; SSE2-SSSE3-NEXT: popq %rbp
1149; SSE2-SSSE3-NEXT: retq
1150;
1151; AVX1-LABEL: ext_i32_32i8:
1152; AVX1: # BB#0:
1153; AVX1-NEXT: pushq %rbp
1154; AVX1-NEXT: .Lcfi12:
1155; AVX1-NEXT: .cfi_def_cfa_offset 16
1156; AVX1-NEXT: .Lcfi13:
1157; AVX1-NEXT: .cfi_offset %rbp, -16
1158; AVX1-NEXT: movq %rsp, %rbp
1159; AVX1-NEXT: .Lcfi14:
1160; AVX1-NEXT: .cfi_def_cfa_register %rbp
1161; AVX1-NEXT: pushq %r15
1162; AVX1-NEXT: pushq %r14
1163; AVX1-NEXT: pushq %r13
1164; AVX1-NEXT: pushq %r12
1165; AVX1-NEXT: pushq %rbx
1166; AVX1-NEXT: andq $-32, %rsp
1167; AVX1-NEXT: subq $64, %rsp
1168; AVX1-NEXT: .Lcfi15:
1169; AVX1-NEXT: .cfi_offset %rbx, -56
1170; AVX1-NEXT: .Lcfi16:
1171; AVX1-NEXT: .cfi_offset %r12, -48
1172; AVX1-NEXT: .Lcfi17:
1173; AVX1-NEXT: .cfi_offset %r13, -40
1174; AVX1-NEXT: .Lcfi18:
1175; AVX1-NEXT: .cfi_offset %r14, -32
1176; AVX1-NEXT: .Lcfi19:
1177; AVX1-NEXT: .cfi_offset %r15, -24
1178; AVX1-NEXT: movl %edi, (%rsp)
1179; AVX1-NEXT: movslq (%rsp), %rdx
1180; AVX1-NEXT: movq %rdx, %rcx
1181; AVX1-NEXT: shlq $47, %rcx
1182; AVX1-NEXT: sarq $63, %rcx
1183; AVX1-NEXT: vmovd %ecx, %xmm0
1184; AVX1-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
1185; AVX1-NEXT: movq %rdx, %r8
1186; AVX1-NEXT: movq %rdx, %rcx
1187; AVX1-NEXT: movq %rdx, %rdi
1188; AVX1-NEXT: movq %rdx, %r13
1189; AVX1-NEXT: movq %rdx, %rsi
1190; AVX1-NEXT: movq %rdx, %r10
1191; AVX1-NEXT: movq %rdx, %r11
1192; AVX1-NEXT: movq %rdx, %r9
1193; AVX1-NEXT: movq %rdx, %rbx
1194; AVX1-NEXT: movq %rdx, %r14
1195; AVX1-NEXT: movq %rdx, %r15
1196; AVX1-NEXT: movq %rdx, %r12
1197; AVX1-NEXT: movq %rdx, %rax
1198; AVX1-NEXT: shlq $46, %rax
1199; AVX1-NEXT: sarq $63, %rax
1200; AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
1201; AVX1-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
1202; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
1203; AVX1-NEXT: shlq $45, %rax
1204; AVX1-NEXT: sarq $63, %rax
1205; AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1206; AVX1-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
1207; AVX1-NEXT: shlq $44, %r8
1208; AVX1-NEXT: sarq $63, %r8
1209; AVX1-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0
1210; AVX1-NEXT: movq %rdx, %r8
1211; AVX1-NEXT: shlq $43, %rcx
1212; AVX1-NEXT: sarq $63, %rcx
1213; AVX1-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
1214; AVX1-NEXT: movq %rdx, %rcx
1215; AVX1-NEXT: shlq $42, %rdi
1216; AVX1-NEXT: sarq $63, %rdi
1217; AVX1-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
1218; AVX1-NEXT: movq %rdx, %rdi
1219; AVX1-NEXT: shlq $41, %r13
1220; AVX1-NEXT: sarq $63, %r13
1221; AVX1-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0
1222; AVX1-NEXT: movq %rdx, %r13
1223; AVX1-NEXT: shlq $40, %rsi
1224; AVX1-NEXT: sarq $63, %rsi
1225; AVX1-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0
1226; AVX1-NEXT: movq %rdx, %rsi
1227; AVX1-NEXT: shlq $39, %r10
1228; AVX1-NEXT: sarq $63, %r10
1229; AVX1-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0
1230; AVX1-NEXT: movq %rdx, %r10
1231; AVX1-NEXT: shlq $38, %r11
1232; AVX1-NEXT: sarq $63, %r11
1233; AVX1-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0
1234; AVX1-NEXT: movsbq %dl, %rax
1235; AVX1-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
1236; AVX1-NEXT: shlq $37, %r9
1237; AVX1-NEXT: sarq $63, %r9
1238; AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0
1239; AVX1-NEXT: movq %rdx, %r9
1240; AVX1-NEXT: shlq $36, %rbx
1241; AVX1-NEXT: sarq $63, %rbx
1242; AVX1-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
1243; AVX1-NEXT: movq %rdx, %rbx
1244; AVX1-NEXT: shlq $35, %r14
1245; AVX1-NEXT: sarq $63, %r14
1246; AVX1-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0
1247; AVX1-NEXT: movq %rdx, %r14
1248; AVX1-NEXT: shlq $34, %r15
1249; AVX1-NEXT: sarq $63, %r15
1250; AVX1-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0
1251; AVX1-NEXT: movq %rdx, %r15
1252; AVX1-NEXT: shlq $33, %r12
1253; AVX1-NEXT: sarq $63, %r12
1254; AVX1-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0
1255; AVX1-NEXT: movq %rdx, %r12
1256; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
1257; AVX1-NEXT: shrq $31, %rax
1258; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
1259; AVX1-NEXT: movq %rdx, %rax
1260; AVX1-NEXT: shlq $63, %r8
1261; AVX1-NEXT: sarq $63, %r8
1262; AVX1-NEXT: vmovd %r8d, %xmm1
1263; AVX1-NEXT: movq %rdx, %r8
1264; AVX1-NEXT: movswq %dx, %rdx
1265; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload
1266; AVX1-NEXT: shlq $62, %r11
1267; AVX1-NEXT: sarq $63, %r11
1268; AVX1-NEXT: vpinsrb $1, %r11d, %xmm1, %xmm1
1269; AVX1-NEXT: shlq $61, %rcx
1270; AVX1-NEXT: sarq $63, %rcx
1271; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
1272; AVX1-NEXT: shlq $60, %rdi
1273; AVX1-NEXT: sarq $63, %rdi
1274; AVX1-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1
1275; AVX1-NEXT: shlq $59, %r13
1276; AVX1-NEXT: sarq $63, %r13
1277; AVX1-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1
1278; AVX1-NEXT: shlq $58, %rsi
1279; AVX1-NEXT: sarq $63, %rsi
1280; AVX1-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1
1281; AVX1-NEXT: shlq $57, %r10
1282; AVX1-NEXT: sarq $63, %r10
1283; AVX1-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1
1284; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
1285; AVX1-NEXT: shrq $7, %rcx
1286; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
1287; AVX1-NEXT: shlq $55, %r9
1288; AVX1-NEXT: sarq $63, %r9
1289; AVX1-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1
1290; AVX1-NEXT: shlq $54, %rbx
1291; AVX1-NEXT: sarq $63, %rbx
1292; AVX1-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1
1293; AVX1-NEXT: shlq $53, %r14
1294; AVX1-NEXT: sarq $63, %r14
1295; AVX1-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1
1296; AVX1-NEXT: shlq $52, %r15
1297; AVX1-NEXT: sarq $63, %r15
1298; AVX1-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1
1299; AVX1-NEXT: shlq $51, %r12
1300; AVX1-NEXT: sarq $63, %r12
1301; AVX1-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1
1302; AVX1-NEXT: shlq $50, %rax
1303; AVX1-NEXT: sarq $63, %rax
1304; AVX1-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
1305; AVX1-NEXT: shlq $49, %r8
1306; AVX1-NEXT: sarq $63, %r8
1307; AVX1-NEXT: vpinsrb $14, %r8d, %xmm1, %xmm1
1308; AVX1-NEXT: shrq $15, %rdx
1309; AVX1-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
1310; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
1311; AVX1-NEXT: leaq -40(%rbp), %rsp
1312; AVX1-NEXT: popq %rbx
1313; AVX1-NEXT: popq %r12
1314; AVX1-NEXT: popq %r13
1315; AVX1-NEXT: popq %r14
1316; AVX1-NEXT: popq %r15
1317; AVX1-NEXT: popq %rbp
1318; AVX1-NEXT: retq
1319;
1320; AVX2-LABEL: ext_i32_32i8:
1321; AVX2: # BB#0:
1322; AVX2-NEXT: pushq %rbp
1323; AVX2-NEXT: .Lcfi12:
1324; AVX2-NEXT: .cfi_def_cfa_offset 16
1325; AVX2-NEXT: .Lcfi13:
1326; AVX2-NEXT: .cfi_offset %rbp, -16
1327; AVX2-NEXT: movq %rsp, %rbp
1328; AVX2-NEXT: .Lcfi14:
1329; AVX2-NEXT: .cfi_def_cfa_register %rbp
1330; AVX2-NEXT: pushq %r15
1331; AVX2-NEXT: pushq %r14
1332; AVX2-NEXT: pushq %r13
1333; AVX2-NEXT: pushq %r12
1334; AVX2-NEXT: pushq %rbx
1335; AVX2-NEXT: andq $-32, %rsp
1336; AVX2-NEXT: subq $64, %rsp
1337; AVX2-NEXT: .Lcfi15:
1338; AVX2-NEXT: .cfi_offset %rbx, -56
1339; AVX2-NEXT: .Lcfi16:
1340; AVX2-NEXT: .cfi_offset %r12, -48
1341; AVX2-NEXT: .Lcfi17:
1342; AVX2-NEXT: .cfi_offset %r13, -40
1343; AVX2-NEXT: .Lcfi18:
1344; AVX2-NEXT: .cfi_offset %r14, -32
1345; AVX2-NEXT: .Lcfi19:
1346; AVX2-NEXT: .cfi_offset %r15, -24
1347; AVX2-NEXT: movl %edi, (%rsp)
1348; AVX2-NEXT: movslq (%rsp), %rdx
1349; AVX2-NEXT: movq %rdx, %rcx
1350; AVX2-NEXT: shlq $47, %rcx
1351; AVX2-NEXT: sarq $63, %rcx
1352; AVX2-NEXT: vmovd %ecx, %xmm0
1353; AVX2-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
1354; AVX2-NEXT: movq %rdx, %r8
1355; AVX2-NEXT: movq %rdx, %rcx
1356; AVX2-NEXT: movq %rdx, %rdi
1357; AVX2-NEXT: movq %rdx, %r13
1358; AVX2-NEXT: movq %rdx, %rsi
1359; AVX2-NEXT: movq %rdx, %r10
1360; AVX2-NEXT: movq %rdx, %r11
1361; AVX2-NEXT: movq %rdx, %r9
1362; AVX2-NEXT: movq %rdx, %rbx
1363; AVX2-NEXT: movq %rdx, %r14
1364; AVX2-NEXT: movq %rdx, %r15
1365; AVX2-NEXT: movq %rdx, %r12
1366; AVX2-NEXT: movq %rdx, %rax
1367; AVX2-NEXT: shlq $46, %rax
1368; AVX2-NEXT: sarq $63, %rax
1369; AVX2-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
1370; AVX2-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
1371; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
1372; AVX2-NEXT: shlq $45, %rax
1373; AVX2-NEXT: sarq $63, %rax
1374; AVX2-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1375; AVX2-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
1376; AVX2-NEXT: shlq $44, %r8
1377; AVX2-NEXT: sarq $63, %r8
1378; AVX2-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0
1379; AVX2-NEXT: movq %rdx, %r8
1380; AVX2-NEXT: shlq $43, %rcx
1381; AVX2-NEXT: sarq $63, %rcx
1382; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
1383; AVX2-NEXT: movq %rdx, %rcx
1384; AVX2-NEXT: shlq $42, %rdi
1385; AVX2-NEXT: sarq $63, %rdi
1386; AVX2-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
1387; AVX2-NEXT: movq %rdx, %rdi
1388; AVX2-NEXT: shlq $41, %r13
1389; AVX2-NEXT: sarq $63, %r13
1390; AVX2-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0
1391; AVX2-NEXT: movq %rdx, %r13
1392; AVX2-NEXT: shlq $40, %rsi
1393; AVX2-NEXT: sarq $63, %rsi
1394; AVX2-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0
1395; AVX2-NEXT: movq %rdx, %rsi
1396; AVX2-NEXT: shlq $39, %r10
1397; AVX2-NEXT: sarq $63, %r10
1398; AVX2-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0
1399; AVX2-NEXT: movq %rdx, %r10
1400; AVX2-NEXT: shlq $38, %r11
1401; AVX2-NEXT: sarq $63, %r11
1402; AVX2-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0
1403; AVX2-NEXT: movsbq %dl, %rax
1404; AVX2-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
1405; AVX2-NEXT: shlq $37, %r9
1406; AVX2-NEXT: sarq $63, %r9
1407; AVX2-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0
1408; AVX2-NEXT: movq %rdx, %r9
1409; AVX2-NEXT: shlq $36, %rbx
1410; AVX2-NEXT: sarq $63, %rbx
1411; AVX2-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
1412; AVX2-NEXT: movq %rdx, %rbx
1413; AVX2-NEXT: shlq $35, %r14
1414; AVX2-NEXT: sarq $63, %r14
1415; AVX2-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0
1416; AVX2-NEXT: movq %rdx, %r14
1417; AVX2-NEXT: shlq $34, %r15
1418; AVX2-NEXT: sarq $63, %r15
1419; AVX2-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0
1420; AVX2-NEXT: movq %rdx, %r15
1421; AVX2-NEXT: shlq $33, %r12
1422; AVX2-NEXT: sarq $63, %r12
1423; AVX2-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0
1424; AVX2-NEXT: movq %rdx, %r12
1425; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
1426; AVX2-NEXT: shrq $31, %rax
1427; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
1428; AVX2-NEXT: movq %rdx, %rax
1429; AVX2-NEXT: shlq $63, %r8
1430; AVX2-NEXT: sarq $63, %r8
1431; AVX2-NEXT: vmovd %r8d, %xmm1
1432; AVX2-NEXT: movq %rdx, %r8
1433; AVX2-NEXT: movswq %dx, %rdx
1434; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload
1435; AVX2-NEXT: shlq $62, %r11
1436; AVX2-NEXT: sarq $63, %r11
1437; AVX2-NEXT: vpinsrb $1, %r11d, %xmm1, %xmm1
1438; AVX2-NEXT: shlq $61, %rcx
1439; AVX2-NEXT: sarq $63, %rcx
1440; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
1441; AVX2-NEXT: shlq $60, %rdi
1442; AVX2-NEXT: sarq $63, %rdi
1443; AVX2-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1
1444; AVX2-NEXT: shlq $59, %r13
1445; AVX2-NEXT: sarq $63, %r13
1446; AVX2-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1
1447; AVX2-NEXT: shlq $58, %rsi
1448; AVX2-NEXT: sarq $63, %rsi
1449; AVX2-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1
1450; AVX2-NEXT: shlq $57, %r10
1451; AVX2-NEXT: sarq $63, %r10
1452; AVX2-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1
1453; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
1454; AVX2-NEXT: shrq $7, %rcx
1455; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
1456; AVX2-NEXT: shlq $55, %r9
1457; AVX2-NEXT: sarq $63, %r9
1458; AVX2-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1
1459; AVX2-NEXT: shlq $54, %rbx
1460; AVX2-NEXT: sarq $63, %rbx
1461; AVX2-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1
1462; AVX2-NEXT: shlq $53, %r14
1463; AVX2-NEXT: sarq $63, %r14
1464; AVX2-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1
1465; AVX2-NEXT: shlq $52, %r15
1466; AVX2-NEXT: sarq $63, %r15
1467; AVX2-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1
1468; AVX2-NEXT: shlq $51, %r12
1469; AVX2-NEXT: sarq $63, %r12
1470; AVX2-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1
1471; AVX2-NEXT: shlq $50, %rax
1472; AVX2-NEXT: sarq $63, %rax
1473; AVX2-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
1474; AVX2-NEXT: shlq $49, %r8
1475; AVX2-NEXT: sarq $63, %r8
1476; AVX2-NEXT: vpinsrb $14, %r8d, %xmm1, %xmm1
1477; AVX2-NEXT: shrq $15, %rdx
1478; AVX2-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
1479; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1480; AVX2-NEXT: leaq -40(%rbp), %rsp
1481; AVX2-NEXT: popq %rbx
1482; AVX2-NEXT: popq %r12
1483; AVX2-NEXT: popq %r13
1484; AVX2-NEXT: popq %r14
1485; AVX2-NEXT: popq %r15
1486; AVX2-NEXT: popq %rbp
1487; AVX2-NEXT: retq
1488;
1489; AVX512-LABEL: ext_i32_32i8:
1490; AVX512: # BB#0:
1491; AVX512-NEXT: kmovd %edi, %k0
1492; AVX512-NEXT: vpmovm2b %k0, %ymm0
1493; AVX512-NEXT: retq
1494 %1 = bitcast i32 %a0 to <32 x i1>
1495 %2 = sext <32 x i1> %1 to <32 x i8>
1496 ret <32 x i8> %2
1497}
1498
1499;
1500; 512-bit vectors
1501;
1502
1503define <8 x i64> @ext_i8_8i64(i8 %a0) {
1504; SSE2-SSSE3-LABEL: ext_i8_8i64:
1505; SSE2-SSSE3: # BB#0:
1506; SSE2-SSSE3-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1507; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1508; SSE2-SSSE3-NEXT: movl %eax, %ecx
1509; SSE2-SSSE3-NEXT: shrl $3, %ecx
1510; SSE2-SSSE3-NEXT: andl $1, %ecx
1511; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1512; SSE2-SSSE3-NEXT: movl %eax, %ecx
1513; SSE2-SSSE3-NEXT: shrl $2, %ecx
1514; SSE2-SSSE3-NEXT: andl $1, %ecx
1515; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1516; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1517; SSE2-SSSE3-NEXT: movl %eax, %ecx
1518; SSE2-SSSE3-NEXT: andl $1, %ecx
1519; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1520; SSE2-SSSE3-NEXT: movl %eax, %ecx
1521; SSE2-SSSE3-NEXT: shrl %ecx
1522; SSE2-SSSE3-NEXT: andl $1, %ecx
1523; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1524; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
1525; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1526; SSE2-SSSE3-NEXT: movl %eax, %ecx
1527; SSE2-SSSE3-NEXT: shrl $5, %ecx
1528; SSE2-SSSE3-NEXT: andl $1, %ecx
1529; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1530; SSE2-SSSE3-NEXT: movl %eax, %ecx
1531; SSE2-SSSE3-NEXT: shrl $4, %ecx
1532; SSE2-SSSE3-NEXT: andl $1, %ecx
1533; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1534; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
1535; SSE2-SSSE3-NEXT: movl %eax, %ecx
1536; SSE2-SSSE3-NEXT: shrl $6, %ecx
1537; SSE2-SSSE3-NEXT: andl $1, %ecx
1538; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1539; SSE2-SSSE3-NEXT: shrl $7, %eax
1540; SSE2-SSSE3-NEXT: movzwl %ax, %eax
1541; SSE2-SSSE3-NEXT: movd %eax, %xmm2
1542; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
1543; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
1544; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
1545; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,1,0,3]
1546; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
1547; SSE2-SSSE3-NEXT: psllq $63, %xmm0
1548; SSE2-SSSE3-NEXT: psrad $31, %xmm0
1549; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1550; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,3]
1551; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,6,7]
1552; SSE2-SSSE3-NEXT: psllq $63, %xmm1
1553; SSE2-SSSE3-NEXT: psrad $31, %xmm1
1554; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1555; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,1,2,3]
1556; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,6,7]
1557; SSE2-SSSE3-NEXT: psllq $63, %xmm2
1558; SSE2-SSSE3-NEXT: psrad $31, %xmm2
1559; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1560; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,1,3,3]
1561; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,6,7]
1562; SSE2-SSSE3-NEXT: psllq $63, %xmm3
1563; SSE2-SSSE3-NEXT: psrad $31, %xmm3
1564; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1565; SSE2-SSSE3-NEXT: retq
1566;
1567; AVX1-LABEL: ext_i8_8i64:
1568; AVX1: # BB#0:
1569; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1570; AVX1-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1571; AVX1-NEXT: movl %eax, %ecx
1572; AVX1-NEXT: shrl %ecx
1573; AVX1-NEXT: andl $1, %ecx
1574; AVX1-NEXT: movl %eax, %edx
1575; AVX1-NEXT: andl $1, %edx
1576; AVX1-NEXT: vmovd %edx, %xmm0
1577; AVX1-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
1578; AVX1-NEXT: movl %eax, %ecx
1579; AVX1-NEXT: shrl $2, %ecx
1580; AVX1-NEXT: andl $1, %ecx
1581; AVX1-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0
1582; AVX1-NEXT: movl %eax, %ecx
1583; AVX1-NEXT: shrl $3, %ecx
1584; AVX1-NEXT: andl $1, %ecx
1585; AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
1586; AVX1-NEXT: movl %eax, %ecx
1587; AVX1-NEXT: shrl $4, %ecx
1588; AVX1-NEXT: andl $1, %ecx
1589; AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
1590; AVX1-NEXT: movl %eax, %ecx
1591; AVX1-NEXT: shrl $5, %ecx
1592; AVX1-NEXT: andl $1, %ecx
1593; AVX1-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
1594; AVX1-NEXT: movl %eax, %ecx
1595; AVX1-NEXT: shrl $6, %ecx
1596; AVX1-NEXT: andl $1, %ecx
1597; AVX1-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1598; AVX1-NEXT: shrl $7, %eax
1599; AVX1-NEXT: movzwl %ax, %eax
1600; AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm1
1601; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1602; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
1603; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
1604; AVX1-NEXT: vpmovsxdq %xmm0, %xmm2
1605; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
1606; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
1607; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1608; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1609; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
1610; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1
1611; AVX1-NEXT: vpmovsxdq %xmm1, %xmm2
1612; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
1613; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1
1614; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1615; AVX1-NEXT: retq
1616;
1617; AVX2-LABEL: ext_i8_8i64:
1618; AVX2: # BB#0:
1619; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
1620; AVX2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
1621; AVX2-NEXT: movl %eax, %ecx
1622; AVX2-NEXT: shrl %ecx
1623; AVX2-NEXT: andl $1, %ecx
1624; AVX2-NEXT: movl %eax, %edx
1625; AVX2-NEXT: andl $1, %edx
1626; AVX2-NEXT: vmovd %edx, %xmm0
1627; AVX2-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0
1628; AVX2-NEXT: movl %eax, %ecx
1629; AVX2-NEXT: shrl $2, %ecx
1630; AVX2-NEXT: andl $1, %ecx
1631; AVX2-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0
1632; AVX2-NEXT: movl %eax, %ecx
1633; AVX2-NEXT: shrl $3, %ecx
1634; AVX2-NEXT: andl $1, %ecx
1635; AVX2-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
1636; AVX2-NEXT: movl %eax, %ecx
1637; AVX2-NEXT: shrl $4, %ecx
1638; AVX2-NEXT: andl $1, %ecx
1639; AVX2-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
1640; AVX2-NEXT: movl %eax, %ecx
1641; AVX2-NEXT: shrl $5, %ecx
1642; AVX2-NEXT: andl $1, %ecx
1643; AVX2-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
1644; AVX2-NEXT: movl %eax, %ecx
1645; AVX2-NEXT: shrl $6, %ecx
1646; AVX2-NEXT: andl $1, %ecx
1647; AVX2-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
1648; AVX2-NEXT: shrl $7, %eax
1649; AVX2-NEXT: movzwl %ax, %eax
1650; AVX2-NEXT: vpinsrw $7, %eax, %xmm0, %xmm1
1651; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1652; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
1653; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
1654; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
1655; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1656; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
1657; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
1658; AVX2-NEXT: vpmovsxdq %xmm1, %ymm1
1659; AVX2-NEXT: retq
1660;
1661; AVX512-LABEL: ext_i8_8i64:
1662; AVX512: # BB#0:
1663; AVX512-NEXT: kmovd %edi, %k1
1664; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1665; AVX512-NEXT: retq
1666 %1 = bitcast i8 %a0 to <8 x i1>
1667 %2 = sext <8 x i1> %1 to <8 x i64>
1668 ret <8 x i64> %2
1669}
1670
1671define <16 x i32> @ext_i16_16i32(i16 %a0) {
1672; SSE2-SSSE3-LABEL: ext_i16_16i32:
1673; SSE2-SSSE3: # BB#0:
1674; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
1675; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1676; SSE2-SSSE3-NEXT: movl %eax, %ecx
1677; SSE2-SSSE3-NEXT: shrl $7, %ecx
1678; SSE2-SSSE3-NEXT: andl $1, %ecx
1679; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1680; SSE2-SSSE3-NEXT: movl %eax, %ecx
1681; SSE2-SSSE3-NEXT: shrl $6, %ecx
1682; SSE2-SSSE3-NEXT: andl $1, %ecx
1683; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1684; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1685; SSE2-SSSE3-NEXT: movl %eax, %ecx
1686; SSE2-SSSE3-NEXT: shrl $5, %ecx
1687; SSE2-SSSE3-NEXT: andl $1, %ecx
1688; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1689; SSE2-SSSE3-NEXT: movl %eax, %ecx
1690; SSE2-SSSE3-NEXT: shrl $4, %ecx
1691; SSE2-SSSE3-NEXT: andl $1, %ecx
1692; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1693; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
1694; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
1695; SSE2-SSSE3-NEXT: movl %eax, %ecx
1696; SSE2-SSSE3-NEXT: shrl $3, %ecx
1697; SSE2-SSSE3-NEXT: andl $1, %ecx
1698; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1699; SSE2-SSSE3-NEXT: movl %eax, %ecx
1700; SSE2-SSSE3-NEXT: shrl $2, %ecx
1701; SSE2-SSSE3-NEXT: andl $1, %ecx
1702; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1703; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1704; SSE2-SSSE3-NEXT: movl %eax, %ecx
1705; SSE2-SSSE3-NEXT: andl $1, %ecx
1706; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1707; SSE2-SSSE3-NEXT: movl %eax, %ecx
1708; SSE2-SSSE3-NEXT: shrl %ecx
1709; SSE2-SSSE3-NEXT: andl $1, %ecx
1710; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1711; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
1712; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
1713; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1714; SSE2-SSSE3-NEXT: movl %eax, %ecx
1715; SSE2-SSSE3-NEXT: shrl $11, %ecx
1716; SSE2-SSSE3-NEXT: andl $1, %ecx
1717; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1718; SSE2-SSSE3-NEXT: movl %eax, %ecx
1719; SSE2-SSSE3-NEXT: shrl $10, %ecx
1720; SSE2-SSSE3-NEXT: andl $1, %ecx
1721; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1722; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1723; SSE2-SSSE3-NEXT: movl %eax, %ecx
1724; SSE2-SSSE3-NEXT: shrl $9, %ecx
1725; SSE2-SSSE3-NEXT: andl $1, %ecx
1726; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1727; SSE2-SSSE3-NEXT: movl %eax, %ecx
1728; SSE2-SSSE3-NEXT: shrl $8, %ecx
1729; SSE2-SSSE3-NEXT: andl $1, %ecx
1730; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1731; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
1732; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1733; SSE2-SSSE3-NEXT: movl %eax, %ecx
1734; SSE2-SSSE3-NEXT: shrl $13, %ecx
1735; SSE2-SSSE3-NEXT: andl $1, %ecx
1736; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1737; SSE2-SSSE3-NEXT: movl %eax, %ecx
1738; SSE2-SSSE3-NEXT: shrl $12, %ecx
1739; SSE2-SSSE3-NEXT: andl $1, %ecx
1740; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1741; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
1742; SSE2-SSSE3-NEXT: movl %eax, %ecx
1743; SSE2-SSSE3-NEXT: shrl $14, %ecx
1744; SSE2-SSSE3-NEXT: andl $1, %ecx
1745; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1746; SSE2-SSSE3-NEXT: shrl $15, %eax
1747; SSE2-SSSE3-NEXT: movzwl %ax, %eax
1748; SSE2-SSSE3-NEXT: movd %eax, %xmm4
1749; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
1750; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
1751; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
1752; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
1753; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
1754; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1755; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
1756; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
1757; SSE2-SSSE3-NEXT: pslld $31, %xmm0
1758; SSE2-SSSE3-NEXT: psrad $31, %xmm0
1759; SSE2-SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1760; SSE2-SSSE3-NEXT: pslld $31, %xmm1
1761; SSE2-SSSE3-NEXT: psrad $31, %xmm1
1762; SSE2-SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
1763; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
1764; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
1765; SSE2-SSSE3-NEXT: pslld $31, %xmm2
1766; SSE2-SSSE3-NEXT: psrad $31, %xmm2
1767; SSE2-SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
1768; SSE2-SSSE3-NEXT: pslld $31, %xmm3
1769; SSE2-SSSE3-NEXT: psrad $31, %xmm3
1770; SSE2-SSSE3-NEXT: retq
1771;
1772; AVX1-LABEL: ext_i16_16i32:
1773; AVX1: # BB#0:
1774; AVX1-NEXT: movw %di, -{{[0-9]+}}(%rsp)
1775; AVX1-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1776; AVX1-NEXT: movl %eax, %ecx
1777; AVX1-NEXT: shrl %ecx
1778; AVX1-NEXT: andl $1, %ecx
1779; AVX1-NEXT: movl %eax, %edx
1780; AVX1-NEXT: andl $1, %edx
1781; AVX1-NEXT: vmovd %edx, %xmm0
1782; AVX1-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
1783; AVX1-NEXT: movl %eax, %ecx
1784; AVX1-NEXT: shrl $2, %ecx
1785; AVX1-NEXT: andl $1, %ecx
1786; AVX1-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
1787; AVX1-NEXT: movl %eax, %ecx
1788; AVX1-NEXT: shrl $3, %ecx
1789; AVX1-NEXT: andl $1, %ecx
1790; AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
1791; AVX1-NEXT: movl %eax, %ecx
1792; AVX1-NEXT: shrl $4, %ecx
1793; AVX1-NEXT: andl $1, %ecx
1794; AVX1-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
1795; AVX1-NEXT: movl %eax, %ecx
1796; AVX1-NEXT: shrl $5, %ecx
1797; AVX1-NEXT: andl $1, %ecx
1798; AVX1-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
1799; AVX1-NEXT: movl %eax, %ecx
1800; AVX1-NEXT: shrl $6, %ecx
1801; AVX1-NEXT: andl $1, %ecx
1802; AVX1-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
1803; AVX1-NEXT: movl %eax, %ecx
1804; AVX1-NEXT: shrl $7, %ecx
1805; AVX1-NEXT: andl $1, %ecx
1806; AVX1-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
1807; AVX1-NEXT: movl %eax, %ecx
1808; AVX1-NEXT: shrl $8, %ecx
1809; AVX1-NEXT: andl $1, %ecx
1810; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
1811; AVX1-NEXT: movl %eax, %ecx
1812; AVX1-NEXT: shrl $9, %ecx
1813; AVX1-NEXT: andl $1, %ecx
1814; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
1815; AVX1-NEXT: movl %eax, %ecx
1816; AVX1-NEXT: shrl $10, %ecx
1817; AVX1-NEXT: andl $1, %ecx
1818; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
1819; AVX1-NEXT: movl %eax, %ecx
1820; AVX1-NEXT: shrl $11, %ecx
1821; AVX1-NEXT: andl $1, %ecx
1822; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
1823; AVX1-NEXT: movl %eax, %ecx
1824; AVX1-NEXT: shrl $12, %ecx
1825; AVX1-NEXT: andl $1, %ecx
1826; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
1827; AVX1-NEXT: movl %eax, %ecx
1828; AVX1-NEXT: shrl $13, %ecx
1829; AVX1-NEXT: andl $1, %ecx
1830; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
1831; AVX1-NEXT: movl %eax, %ecx
1832; AVX1-NEXT: shrl $14, %ecx
1833; AVX1-NEXT: andl $1, %ecx
1834; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
1835; AVX1-NEXT: shrl $15, %eax
1836; AVX1-NEXT: movzwl %ax, %eax
1837; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
1838; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
1839; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
1840; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
1841; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2
1842; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
1843; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
1844; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
1845; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
1846; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1847; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
1848; AVX1-NEXT: vpslld $31, %xmm2, %xmm2
1849; AVX1-NEXT: vpsrad $31, %xmm2, %xmm2
1850; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1851; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
1852; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1
1853; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
1854; AVX1-NEXT: retq
1855;
1856; AVX2-LABEL: ext_i16_16i32:
1857; AVX2: # BB#0:
1858; AVX2-NEXT: movw %di, -{{[0-9]+}}(%rsp)
1859; AVX2-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1860; AVX2-NEXT: movl %eax, %ecx
1861; AVX2-NEXT: shrl %ecx
1862; AVX2-NEXT: andl $1, %ecx
1863; AVX2-NEXT: movl %eax, %edx
1864; AVX2-NEXT: andl $1, %edx
1865; AVX2-NEXT: vmovd %edx, %xmm0
1866; AVX2-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
1867; AVX2-NEXT: movl %eax, %ecx
1868; AVX2-NEXT: shrl $2, %ecx
1869; AVX2-NEXT: andl $1, %ecx
1870; AVX2-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
1871; AVX2-NEXT: movl %eax, %ecx
1872; AVX2-NEXT: shrl $3, %ecx
1873; AVX2-NEXT: andl $1, %ecx
1874; AVX2-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
1875; AVX2-NEXT: movl %eax, %ecx
1876; AVX2-NEXT: shrl $4, %ecx
1877; AVX2-NEXT: andl $1, %ecx
1878; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
1879; AVX2-NEXT: movl %eax, %ecx
1880; AVX2-NEXT: shrl $5, %ecx
1881; AVX2-NEXT: andl $1, %ecx
1882; AVX2-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
1883; AVX2-NEXT: movl %eax, %ecx
1884; AVX2-NEXT: shrl $6, %ecx
1885; AVX2-NEXT: andl $1, %ecx
1886; AVX2-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
1887; AVX2-NEXT: movl %eax, %ecx
1888; AVX2-NEXT: shrl $7, %ecx
1889; AVX2-NEXT: andl $1, %ecx
1890; AVX2-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
1891; AVX2-NEXT: movl %eax, %ecx
1892; AVX2-NEXT: shrl $8, %ecx
1893; AVX2-NEXT: andl $1, %ecx
1894; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
1895; AVX2-NEXT: movl %eax, %ecx
1896; AVX2-NEXT: shrl $9, %ecx
1897; AVX2-NEXT: andl $1, %ecx
1898; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0
1899; AVX2-NEXT: movl %eax, %ecx
1900; AVX2-NEXT: shrl $10, %ecx
1901; AVX2-NEXT: andl $1, %ecx
1902; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
1903; AVX2-NEXT: movl %eax, %ecx
1904; AVX2-NEXT: shrl $11, %ecx
1905; AVX2-NEXT: andl $1, %ecx
1906; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0
1907; AVX2-NEXT: movl %eax, %ecx
1908; AVX2-NEXT: shrl $12, %ecx
1909; AVX2-NEXT: andl $1, %ecx
1910; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
1911; AVX2-NEXT: movl %eax, %ecx
1912; AVX2-NEXT: shrl $13, %ecx
1913; AVX2-NEXT: andl $1, %ecx
1914; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
1915; AVX2-NEXT: movl %eax, %ecx
1916; AVX2-NEXT: shrl $14, %ecx
1917; AVX2-NEXT: andl $1, %ecx
1918; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
1919; AVX2-NEXT: shrl $15, %eax
1920; AVX2-NEXT: movzwl %ax, %eax
1921; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm1
1922; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
1923; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1924; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
1925; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
1926; AVX2-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1927; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
1928; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
1929; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
1930; AVX2-NEXT: retq
1931;
1932; AVX512-LABEL: ext_i16_16i32:
1933; AVX512: # BB#0:
1934; AVX512-NEXT: kmovd %edi, %k1
1935; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1936; AVX512-NEXT: retq
1937 %1 = bitcast i16 %a0 to <16 x i1>
1938 %2 = sext <16 x i1> %1 to <16 x i32>
1939 ret <16 x i32> %2
1940}
1941
1942define <32 x i16> @ext_i32_32i16(i32 %a0) {
1943; SSE2-SSSE3-LABEL: ext_i32_32i16:
1944; SSE2-SSSE3: # BB#0:
1945; SSE2-SSSE3-NEXT: movl %edi, %eax
1946; SSE2-SSSE3-NEXT: shrl $16, %eax
1947; SSE2-SSSE3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
1948; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
1949; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
1950; SSE2-SSSE3-NEXT: movl %eax, %ecx
1951; SSE2-SSSE3-NEXT: shrl $7, %ecx
1952; SSE2-SSSE3-NEXT: andl $1, %ecx
1953; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1954; SSE2-SSSE3-NEXT: movl %eax, %ecx
1955; SSE2-SSSE3-NEXT: shrl $6, %ecx
1956; SSE2-SSSE3-NEXT: andl $1, %ecx
1957; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1958; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1959; SSE2-SSSE3-NEXT: movl %eax, %ecx
1960; SSE2-SSSE3-NEXT: shrl $5, %ecx
1961; SSE2-SSSE3-NEXT: andl $1, %ecx
1962; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1963; SSE2-SSSE3-NEXT: movl %eax, %ecx
1964; SSE2-SSSE3-NEXT: shrl $4, %ecx
1965; SSE2-SSSE3-NEXT: andl $1, %ecx
1966; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
1967; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
1968; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
1969; SSE2-SSSE3-NEXT: movl %eax, %ecx
1970; SSE2-SSSE3-NEXT: shrl $3, %ecx
1971; SSE2-SSSE3-NEXT: andl $1, %ecx
1972; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1973; SSE2-SSSE3-NEXT: movl %eax, %ecx
1974; SSE2-SSSE3-NEXT: shrl $2, %ecx
1975; SSE2-SSSE3-NEXT: andl $1, %ecx
1976; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1977; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1978; SSE2-SSSE3-NEXT: movl %eax, %ecx
1979; SSE2-SSSE3-NEXT: andl $1, %ecx
1980; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
1981; SSE2-SSSE3-NEXT: movl %eax, %ecx
1982; SSE2-SSSE3-NEXT: shrl %ecx
1983; SSE2-SSSE3-NEXT: andl $1, %ecx
1984; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1985; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
1986; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
1987; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1988; SSE2-SSSE3-NEXT: movl %eax, %ecx
1989; SSE2-SSSE3-NEXT: shrl $11, %ecx
1990; SSE2-SSSE3-NEXT: andl $1, %ecx
1991; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
1992; SSE2-SSSE3-NEXT: movl %eax, %ecx
1993; SSE2-SSSE3-NEXT: shrl $10, %ecx
1994; SSE2-SSSE3-NEXT: andl $1, %ecx
1995; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
1996; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
1997; SSE2-SSSE3-NEXT: movl %eax, %ecx
1998; SSE2-SSSE3-NEXT: shrl $9, %ecx
1999; SSE2-SSSE3-NEXT: andl $1, %ecx
2000; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2001; SSE2-SSSE3-NEXT: movl %eax, %ecx
2002; SSE2-SSSE3-NEXT: shrl $8, %ecx
2003; SSE2-SSSE3-NEXT: andl $1, %ecx
2004; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
2005; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2006; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
2007; SSE2-SSSE3-NEXT: movl %eax, %ecx
2008; SSE2-SSSE3-NEXT: shrl $13, %ecx
2009; SSE2-SSSE3-NEXT: andl $1, %ecx
2010; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2011; SSE2-SSSE3-NEXT: movl %eax, %ecx
2012; SSE2-SSSE3-NEXT: shrl $12, %ecx
2013; SSE2-SSSE3-NEXT: andl $1, %ecx
2014; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2015; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
2016; SSE2-SSSE3-NEXT: movl %eax, %ecx
2017; SSE2-SSSE3-NEXT: shrl $14, %ecx
2018; SSE2-SSSE3-NEXT: andl $1, %ecx
2019; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2020; SSE2-SSSE3-NEXT: shrl $15, %eax
2021; SSE2-SSSE3-NEXT: movzwl %ax, %eax
2022; SSE2-SSSE3-NEXT: movd %eax, %xmm4
2023; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
2024; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2025; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
2026; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
2027; SSE2-SSSE3-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
2028; SSE2-SSSE3-NEXT: movl %eax, %ecx
2029; SSE2-SSSE3-NEXT: shrl $7, %ecx
2030; SSE2-SSSE3-NEXT: andl $1, %ecx
2031; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
2032; SSE2-SSSE3-NEXT: movl %eax, %ecx
2033; SSE2-SSSE3-NEXT: shrl $6, %ecx
2034; SSE2-SSSE3-NEXT: andl $1, %ecx
2035; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2036; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2037; SSE2-SSSE3-NEXT: movl %eax, %ecx
2038; SSE2-SSSE3-NEXT: shrl $5, %ecx
2039; SSE2-SSSE3-NEXT: andl $1, %ecx
2040; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
2041; SSE2-SSSE3-NEXT: movl %eax, %ecx
2042; SSE2-SSSE3-NEXT: shrl $4, %ecx
2043; SSE2-SSSE3-NEXT: andl $1, %ecx
2044; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2045; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2046; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
2047; SSE2-SSSE3-NEXT: movl %eax, %ecx
2048; SSE2-SSSE3-NEXT: shrl $3, %ecx
2049; SSE2-SSSE3-NEXT: andl $1, %ecx
2050; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
2051; SSE2-SSSE3-NEXT: movl %eax, %ecx
2052; SSE2-SSSE3-NEXT: shrl $2, %ecx
2053; SSE2-SSSE3-NEXT: andl $1, %ecx
2054; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2055; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7]
2056; SSE2-SSSE3-NEXT: movl %eax, %ecx
2057; SSE2-SSSE3-NEXT: andl $1, %ecx
2058; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2059; SSE2-SSSE3-NEXT: movl %eax, %ecx
2060; SSE2-SSSE3-NEXT: shrl %ecx
2061; SSE2-SSSE3-NEXT: andl $1, %ecx
2062; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
2063; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
2064; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
2065; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
2066; SSE2-SSSE3-NEXT: movl %eax, %ecx
2067; SSE2-SSSE3-NEXT: shrl $11, %ecx
2068; SSE2-SSSE3-NEXT: andl $1, %ecx
2069; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
2070; SSE2-SSSE3-NEXT: movl %eax, %ecx
2071; SSE2-SSSE3-NEXT: shrl $10, %ecx
2072; SSE2-SSSE3-NEXT: andl $1, %ecx
2073; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2074; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2075; SSE2-SSSE3-NEXT: movl %eax, %ecx
2076; SSE2-SSSE3-NEXT: shrl $9, %ecx
2077; SSE2-SSSE3-NEXT: andl $1, %ecx
2078; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2079; SSE2-SSSE3-NEXT: movl %eax, %ecx
2080; SSE2-SSSE3-NEXT: shrl $8, %ecx
2081; SSE2-SSSE3-NEXT: andl $1, %ecx
2082; SSE2-SSSE3-NEXT: movd %ecx, %xmm0
2083; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
2084; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2085; SSE2-SSSE3-NEXT: movl %eax, %ecx
2086; SSE2-SSSE3-NEXT: shrl $13, %ecx
2087; SSE2-SSSE3-NEXT: andl $1, %ecx
2088; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2089; SSE2-SSSE3-NEXT: movl %eax, %ecx
2090; SSE2-SSSE3-NEXT: shrl $12, %ecx
2091; SSE2-SSSE3-NEXT: andl $1, %ecx
2092; SSE2-SSSE3-NEXT: movd %ecx, %xmm4
2093; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
2094; SSE2-SSSE3-NEXT: movl %eax, %ecx
2095; SSE2-SSSE3-NEXT: shrl $14, %ecx
2096; SSE2-SSSE3-NEXT: andl $1, %ecx
2097; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2098; SSE2-SSSE3-NEXT: shrl $15, %eax
2099; SSE2-SSSE3-NEXT: movzwl %ax, %eax
2100; SSE2-SSSE3-NEXT: movd %eax, %xmm5
2101; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3],xmm2[4],xmm5[4],xmm2[5],xmm5[5],xmm2[6],xmm5[6],xmm2[7],xmm5[7]
2102; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
2103; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1]
2104; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
2105; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
2106; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
2107; SSE2-SSSE3-NEXT: psllw $15, %xmm0
2108; SSE2-SSSE3-NEXT: psraw $15, %xmm0
2109; SSE2-SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
2110; SSE2-SSSE3-NEXT: psllw $15, %xmm1
2111; SSE2-SSSE3-NEXT: psraw $15, %xmm1
2112; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
2113; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
2114; SSE2-SSSE3-NEXT: psllw $15, %xmm2
2115; SSE2-SSSE3-NEXT: psraw $15, %xmm2
2116; SSE2-SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15]
2117; SSE2-SSSE3-NEXT: psllw $15, %xmm3
2118; SSE2-SSSE3-NEXT: psraw $15, %xmm3
2119; SSE2-SSSE3-NEXT: retq
2120;
2121; AVX1-LABEL: ext_i32_32i16:
2122; AVX1: # BB#0:
2123; AVX1-NEXT: pushq %rbp
2124; AVX1-NEXT: .Lcfi20:
2125; AVX1-NEXT: .cfi_def_cfa_offset 16
2126; AVX1-NEXT: .Lcfi21:
2127; AVX1-NEXT: .cfi_offset %rbp, -16
2128; AVX1-NEXT: movq %rsp, %rbp
2129; AVX1-NEXT: .Lcfi22:
2130; AVX1-NEXT: .cfi_def_cfa_register %rbp
2131; AVX1-NEXT: pushq %r15
2132; AVX1-NEXT: pushq %r14
2133; AVX1-NEXT: pushq %r13
2134; AVX1-NEXT: pushq %r12
2135; AVX1-NEXT: pushq %rbx
2136; AVX1-NEXT: andq $-32, %rsp
2137; AVX1-NEXT: subq $128, %rsp
2138; AVX1-NEXT: .Lcfi23:
2139; AVX1-NEXT: .cfi_offset %rbx, -56
2140; AVX1-NEXT: .Lcfi24:
2141; AVX1-NEXT: .cfi_offset %r12, -48
2142; AVX1-NEXT: .Lcfi25:
2143; AVX1-NEXT: .cfi_offset %r13, -40
2144; AVX1-NEXT: .Lcfi26:
2145; AVX1-NEXT: .cfi_offset %r14, -32
2146; AVX1-NEXT: .Lcfi27:
2147; AVX1-NEXT: .cfi_offset %r15, -24
2148; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2149; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2150; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2151; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2152; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2153; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2154; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2155; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2156; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2157; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2158; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2159; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2160; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2161; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2162; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2163; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2164; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2165; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2166; AVX1-NEXT: movl %edi, %r13d
2167; AVX1-NEXT: movl %edi, %r12d
2168; AVX1-NEXT: movl %edi, %r15d
2169; AVX1-NEXT: movl %edi, %r14d
2170; AVX1-NEXT: movl %edi, %ebx
2171; AVX1-NEXT: movl %edi, %r11d
2172; AVX1-NEXT: movl %edi, %r10d
2173; AVX1-NEXT: movl %edi, %r9d
2174; AVX1-NEXT: movl %edi, %r8d
2175; AVX1-NEXT: movl %edi, %esi
2176; AVX1-NEXT: movl %edi, %edx
2177; AVX1-NEXT: movl %edi, %ecx
2178; AVX1-NEXT: movl %edi, %eax
2179; AVX1-NEXT: andl $1, %edi
2180; AVX1-NEXT: vmovd %edi, %xmm0
2181; AVX1-NEXT: shrl %eax
2182; AVX1-NEXT: andl $1, %eax
2183; AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
2184; AVX1-NEXT: shrl $2, %ecx
2185; AVX1-NEXT: andl $1, %ecx
2186; AVX1-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2187; AVX1-NEXT: shrl $3, %edx
2188; AVX1-NEXT: andl $1, %edx
2189; AVX1-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
2190; AVX1-NEXT: shrl $4, %esi
2191; AVX1-NEXT: andl $1, %esi
2192; AVX1-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
2193; AVX1-NEXT: shrl $5, %r8d
2194; AVX1-NEXT: andl $1, %r8d
2195; AVX1-NEXT: vpinsrb $5, %r8d, %xmm0, %xmm0
2196; AVX1-NEXT: shrl $6, %r9d
2197; AVX1-NEXT: andl $1, %r9d
2198; AVX1-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
2199; AVX1-NEXT: shrl $7, %r10d
2200; AVX1-NEXT: andl $1, %r10d
2201; AVX1-NEXT: vpinsrb $7, %r10d, %xmm0, %xmm0
2202; AVX1-NEXT: shrl $8, %r11d
2203; AVX1-NEXT: andl $1, %r11d
2204; AVX1-NEXT: vpinsrb $8, %r11d, %xmm0, %xmm0
2205; AVX1-NEXT: shrl $9, %ebx
2206; AVX1-NEXT: andl $1, %ebx
2207; AVX1-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
2208; AVX1-NEXT: shrl $10, %r14d
2209; AVX1-NEXT: andl $1, %r14d
2210; AVX1-NEXT: vpinsrb $10, %r14d, %xmm0, %xmm0
2211; AVX1-NEXT: shrl $11, %r15d
2212; AVX1-NEXT: andl $1, %r15d
2213; AVX1-NEXT: vpinsrb $11, %r15d, %xmm0, %xmm0
2214; AVX1-NEXT: shrl $12, %r12d
2215; AVX1-NEXT: andl $1, %r12d
2216; AVX1-NEXT: vpinsrb $12, %r12d, %xmm0, %xmm0
2217; AVX1-NEXT: shrl $13, %r13d
2218; AVX1-NEXT: andl $1, %r13d
2219; AVX1-NEXT: vpinsrb $13, %r13d, %xmm0, %xmm0
2220; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2221; AVX1-NEXT: shrl $14, %eax
2222; AVX1-NEXT: andl $1, %eax
2223; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
2224; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2225; AVX1-NEXT: shrl $15, %eax
2226; AVX1-NEXT: andl $1, %eax
2227; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
2228; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2229; AVX1-NEXT: shrl $16, %eax
2230; AVX1-NEXT: andl $1, %eax
2231; AVX1-NEXT: vmovd %eax, %xmm1
2232; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2233; AVX1-NEXT: shrl $17, %eax
2234; AVX1-NEXT: andl $1, %eax
2235; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
2236; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2237; AVX1-NEXT: shrl $18, %eax
2238; AVX1-NEXT: andl $1, %eax
2239; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
2240; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2241; AVX1-NEXT: shrl $19, %eax
2242; AVX1-NEXT: andl $1, %eax
2243; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
2244; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2245; AVX1-NEXT: shrl $20, %eax
2246; AVX1-NEXT: andl $1, %eax
2247; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
2248; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2249; AVX1-NEXT: shrl $21, %eax
2250; AVX1-NEXT: andl $1, %eax
2251; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
2252; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2253; AVX1-NEXT: shrl $22, %eax
2254; AVX1-NEXT: andl $1, %eax
2255; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
2256; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2257; AVX1-NEXT: shrl $23, %eax
2258; AVX1-NEXT: andl $1, %eax
2259; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
2260; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2261; AVX1-NEXT: shrl $24, %eax
2262; AVX1-NEXT: andl $1, %eax
2263; AVX1-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
2264; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2265; AVX1-NEXT: shrl $25, %eax
2266; AVX1-NEXT: andl $1, %eax
2267; AVX1-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
2268; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2269; AVX1-NEXT: shrl $26, %eax
2270; AVX1-NEXT: andl $1, %eax
2271; AVX1-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
2272; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2273; AVX1-NEXT: shrl $27, %eax
2274; AVX1-NEXT: andl $1, %eax
2275; AVX1-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
2276; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2277; AVX1-NEXT: shrl $28, %eax
2278; AVX1-NEXT: andl $1, %eax
2279; AVX1-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
2280; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2281; AVX1-NEXT: shrl $29, %eax
2282; AVX1-NEXT: andl $1, %eax
2283; AVX1-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
2284; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2285; AVX1-NEXT: shrl $30, %eax
2286; AVX1-NEXT: andl $1, %eax
2287; AVX1-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
2288; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2289; AVX1-NEXT: shrl $31, %eax
2290; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
2291; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2292; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
2293; AVX1-NEXT: vpsraw $15, %xmm2, %xmm2
2294; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2295; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
2296; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
2297; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
2298; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
2299; AVX1-NEXT: vpsllw $15, %xmm2, %xmm2
2300; AVX1-NEXT: vpsraw $15, %xmm2, %xmm2
2301; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
2302; AVX1-NEXT: vpsllw $15, %xmm1, %xmm1
2303; AVX1-NEXT: vpsraw $15, %xmm1, %xmm1
2304; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
2305; AVX1-NEXT: leaq -40(%rbp), %rsp
2306; AVX1-NEXT: popq %rbx
2307; AVX1-NEXT: popq %r12
2308; AVX1-NEXT: popq %r13
2309; AVX1-NEXT: popq %r14
2310; AVX1-NEXT: popq %r15
2311; AVX1-NEXT: popq %rbp
2312; AVX1-NEXT: retq
2313;
2314; AVX2-LABEL: ext_i32_32i16:
2315; AVX2: # BB#0:
2316; AVX2-NEXT: pushq %rbp
2317; AVX2-NEXT: .Lcfi20:
2318; AVX2-NEXT: .cfi_def_cfa_offset 16
2319; AVX2-NEXT: .Lcfi21:
2320; AVX2-NEXT: .cfi_offset %rbp, -16
2321; AVX2-NEXT: movq %rsp, %rbp
2322; AVX2-NEXT: .Lcfi22:
2323; AVX2-NEXT: .cfi_def_cfa_register %rbp
2324; AVX2-NEXT: pushq %r15
2325; AVX2-NEXT: pushq %r14
2326; AVX2-NEXT: pushq %r13
2327; AVX2-NEXT: pushq %r12
2328; AVX2-NEXT: pushq %rbx
2329; AVX2-NEXT: andq $-32, %rsp
2330; AVX2-NEXT: subq $128, %rsp
2331; AVX2-NEXT: .Lcfi23:
2332; AVX2-NEXT: .cfi_offset %rbx, -56
2333; AVX2-NEXT: .Lcfi24:
2334; AVX2-NEXT: .cfi_offset %r12, -48
2335; AVX2-NEXT: .Lcfi25:
2336; AVX2-NEXT: .cfi_offset %r13, -40
2337; AVX2-NEXT: .Lcfi26:
2338; AVX2-NEXT: .cfi_offset %r14, -32
2339; AVX2-NEXT: .Lcfi27:
2340; AVX2-NEXT: .cfi_offset %r15, -24
2341; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2342; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2343; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2344; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2345; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2346; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2347; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2348; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2349; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2350; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2351; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2352; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2353; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2354; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2355; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2356; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2357; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2358; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp) # 4-byte Spill
2359; AVX2-NEXT: movl %edi, %r13d
2360; AVX2-NEXT: movl %edi, %r12d
2361; AVX2-NEXT: movl %edi, %r15d
2362; AVX2-NEXT: movl %edi, %r14d
2363; AVX2-NEXT: movl %edi, %ebx
2364; AVX2-NEXT: movl %edi, %r11d
2365; AVX2-NEXT: movl %edi, %r10d
2366; AVX2-NEXT: movl %edi, %r9d
2367; AVX2-NEXT: movl %edi, %r8d
2368; AVX2-NEXT: movl %edi, %esi
2369; AVX2-NEXT: movl %edi, %edx
2370; AVX2-NEXT: movl %edi, %ecx
2371; AVX2-NEXT: movl %edi, %eax
2372; AVX2-NEXT: andl $1, %edi
2373; AVX2-NEXT: vmovd %edi, %xmm0
2374; AVX2-NEXT: shrl %eax
2375; AVX2-NEXT: andl $1, %eax
2376; AVX2-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
2377; AVX2-NEXT: shrl $2, %ecx
2378; AVX2-NEXT: andl $1, %ecx
2379; AVX2-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
2380; AVX2-NEXT: shrl $3, %edx
2381; AVX2-NEXT: andl $1, %edx
2382; AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0
2383; AVX2-NEXT: shrl $4, %esi
2384; AVX2-NEXT: andl $1, %esi
2385; AVX2-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
2386; AVX2-NEXT: shrl $5, %r8d
2387; AVX2-NEXT: andl $1, %r8d
2388; AVX2-NEXT: vpinsrb $5, %r8d, %xmm0, %xmm0
2389; AVX2-NEXT: shrl $6, %r9d
2390; AVX2-NEXT: andl $1, %r9d
2391; AVX2-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
2392; AVX2-NEXT: shrl $7, %r10d
2393; AVX2-NEXT: andl $1, %r10d
2394; AVX2-NEXT: vpinsrb $7, %r10d, %xmm0, %xmm0
2395; AVX2-NEXT: shrl $8, %r11d
2396; AVX2-NEXT: andl $1, %r11d
2397; AVX2-NEXT: vpinsrb $8, %r11d, %xmm0, %xmm0
2398; AVX2-NEXT: shrl $9, %ebx
2399; AVX2-NEXT: andl $1, %ebx
2400; AVX2-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
2401; AVX2-NEXT: shrl $10, %r14d
2402; AVX2-NEXT: andl $1, %r14d
2403; AVX2-NEXT: vpinsrb $10, %r14d, %xmm0, %xmm0
2404; AVX2-NEXT: shrl $11, %r15d
2405; AVX2-NEXT: andl $1, %r15d
2406; AVX2-NEXT: vpinsrb $11, %r15d, %xmm0, %xmm0
2407; AVX2-NEXT: shrl $12, %r12d
2408; AVX2-NEXT: andl $1, %r12d
2409; AVX2-NEXT: vpinsrb $12, %r12d, %xmm0, %xmm0
2410; AVX2-NEXT: shrl $13, %r13d
2411; AVX2-NEXT: andl $1, %r13d
2412; AVX2-NEXT: vpinsrb $13, %r13d, %xmm0, %xmm0
2413; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2414; AVX2-NEXT: shrl $14, %eax
2415; AVX2-NEXT: andl $1, %eax
2416; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
2417; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2418; AVX2-NEXT: shrl $15, %eax
2419; AVX2-NEXT: andl $1, %eax
2420; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
2421; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2422; AVX2-NEXT: shrl $16, %eax
2423; AVX2-NEXT: andl $1, %eax
2424; AVX2-NEXT: vmovd %eax, %xmm1
2425; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2426; AVX2-NEXT: shrl $17, %eax
2427; AVX2-NEXT: andl $1, %eax
2428; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
2429; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2430; AVX2-NEXT: shrl $18, %eax
2431; AVX2-NEXT: andl $1, %eax
2432; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
2433; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2434; AVX2-NEXT: shrl $19, %eax
2435; AVX2-NEXT: andl $1, %eax
2436; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
2437; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2438; AVX2-NEXT: shrl $20, %eax
2439; AVX2-NEXT: andl $1, %eax
2440; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
2441; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2442; AVX2-NEXT: shrl $21, %eax
2443; AVX2-NEXT: andl $1, %eax
2444; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
2445; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2446; AVX2-NEXT: shrl $22, %eax
2447; AVX2-NEXT: andl $1, %eax
2448; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
2449; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2450; AVX2-NEXT: shrl $23, %eax
2451; AVX2-NEXT: andl $1, %eax
2452; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
2453; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2454; AVX2-NEXT: shrl $24, %eax
2455; AVX2-NEXT: andl $1, %eax
2456; AVX2-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
2457; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2458; AVX2-NEXT: shrl $25, %eax
2459; AVX2-NEXT: andl $1, %eax
2460; AVX2-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
2461; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2462; AVX2-NEXT: shrl $26, %eax
2463; AVX2-NEXT: andl $1, %eax
2464; AVX2-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
2465; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2466; AVX2-NEXT: shrl $27, %eax
2467; AVX2-NEXT: andl $1, %eax
2468; AVX2-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
2469; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2470; AVX2-NEXT: shrl $28, %eax
2471; AVX2-NEXT: andl $1, %eax
2472; AVX2-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
2473; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2474; AVX2-NEXT: shrl $29, %eax
2475; AVX2-NEXT: andl $1, %eax
2476; AVX2-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
2477; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2478; AVX2-NEXT: shrl $30, %eax
2479; AVX2-NEXT: andl $1, %eax
2480; AVX2-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
2481; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax # 4-byte Reload
2482; AVX2-NEXT: shrl $31, %eax
2483; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
2484; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
2485; AVX2-NEXT: vpsllw $15, %ymm0, %ymm0
2486; AVX2-NEXT: vpsraw $15, %ymm0, %ymm0
2487; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
2488; AVX2-NEXT: vpsllw $15, %ymm1, %ymm1
2489; AVX2-NEXT: vpsraw $15, %ymm1, %ymm1
2490; AVX2-NEXT: leaq -40(%rbp), %rsp
2491; AVX2-NEXT: popq %rbx
2492; AVX2-NEXT: popq %r12
2493; AVX2-NEXT: popq %r13
2494; AVX2-NEXT: popq %r14
2495; AVX2-NEXT: popq %r15
2496; AVX2-NEXT: popq %rbp
2497; AVX2-NEXT: retq
2498;
2499; AVX512-LABEL: ext_i32_32i16:
2500; AVX512: # BB#0:
2501; AVX512-NEXT: kmovd %edi, %k0
2502; AVX512-NEXT: vpmovm2w %k0, %zmm0
2503; AVX512-NEXT: retq
2504 %1 = bitcast i32 %a0 to <32 x i1>
2505 %2 = sext <32 x i1> %1 to <32 x i16>
2506 ret <32 x i16> %2
2507}
2508
2509define <64 x i8> @ext_i64_64i8(i64 %a0) {
2510; SSE2-SSSE3-LABEL: ext_i64_64i8:
2511; SSE2-SSSE3: # BB#0:
2512; SSE2-SSSE3-NEXT: pushq %rbp
2513; SSE2-SSSE3-NEXT: .Lcfi24:
2514; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 16
2515; SSE2-SSSE3-NEXT: pushq %r15
2516; SSE2-SSSE3-NEXT: .Lcfi25:
2517; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 24
2518; SSE2-SSSE3-NEXT: pushq %r14
2519; SSE2-SSSE3-NEXT: .Lcfi26:
2520; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 32
2521; SSE2-SSSE3-NEXT: pushq %r13
2522; SSE2-SSSE3-NEXT: .Lcfi27:
2523; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 40
2524; SSE2-SSSE3-NEXT: pushq %r12
2525; SSE2-SSSE3-NEXT: .Lcfi28:
2526; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 48
2527; SSE2-SSSE3-NEXT: pushq %rbx
2528; SSE2-SSSE3-NEXT: .Lcfi29:
2529; SSE2-SSSE3-NEXT: .cfi_def_cfa_offset 56
2530; SSE2-SSSE3-NEXT: .Lcfi30:
2531; SSE2-SSSE3-NEXT: .cfi_offset %rbx, -56
2532; SSE2-SSSE3-NEXT: .Lcfi31:
2533; SSE2-SSSE3-NEXT: .cfi_offset %r12, -48
2534; SSE2-SSSE3-NEXT: .Lcfi32:
2535; SSE2-SSSE3-NEXT: .cfi_offset %r13, -40
2536; SSE2-SSSE3-NEXT: .Lcfi33:
2537; SSE2-SSSE3-NEXT: .cfi_offset %r14, -32
2538; SSE2-SSSE3-NEXT: .Lcfi34:
2539; SSE2-SSSE3-NEXT: .cfi_offset %r15, -24
2540; SSE2-SSSE3-NEXT: .Lcfi35:
2541; SSE2-SSSE3-NEXT: .cfi_offset %rbp, -16
2542; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
2543; SSE2-SSSE3-NEXT: movq %rdi, %rax
2544; SSE2-SSSE3-NEXT: shrq $32, %rax
2545; SSE2-SSSE3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2546; SSE2-SSSE3-NEXT: movq %rdi, %rax
2547; SSE2-SSSE3-NEXT: shrq $48, %rax
2548; SSE2-SSSE3-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
2549; SSE2-SSSE3-NEXT: shrl $16, %edi
2550; SSE2-SSSE3-NEXT: movw %di, -{{[0-9]+}}(%rsp)
2551; SSE2-SSSE3-NEXT: movswq -{{[0-9]+}}(%rsp), %rbx
2552; SSE2-SSSE3-NEXT: movq %rbx, %r8
2553; SSE2-SSSE3-NEXT: movq %rbx, %r9
2554; SSE2-SSSE3-NEXT: movq %rbx, %r10
2555; SSE2-SSSE3-NEXT: movq %rbx, %r11
2556; SSE2-SSSE3-NEXT: movq %rbx, %r14
2557; SSE2-SSSE3-NEXT: movq %rbx, %r15
2558; SSE2-SSSE3-NEXT: movq %rbx, %r12
2559; SSE2-SSSE3-NEXT: movq %rbx, %r13
2560; SSE2-SSSE3-NEXT: movq %rbx, %rdi
2561; SSE2-SSSE3-NEXT: movq %rbx, %rcx
2562; SSE2-SSSE3-NEXT: movq %rbx, %rdx
2563; SSE2-SSSE3-NEXT: movq %rbx, %rsi
2564; SSE2-SSSE3-NEXT: movq %rbx, %rbp
2565; SSE2-SSSE3-NEXT: movq %rbx, %rax
2566; SSE2-SSSE3-NEXT: shrq $15, %rax
2567; SSE2-SSSE3-NEXT: movd %eax, %xmm0
2568; SSE2-SSSE3-NEXT: movq %rbx, %rax
2569; SSE2-SSSE3-NEXT: movsbq %bl, %rbx
2570; SSE2-SSSE3-NEXT: shlq $49, %r8
2571; SSE2-SSSE3-NEXT: sarq $63, %r8
2572; SSE2-SSSE3-NEXT: movd %r8d, %xmm15
2573; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
2574; SSE2-SSSE3-NEXT: shlq $50, %r9
2575; SSE2-SSSE3-NEXT: sarq $63, %r9
2576; SSE2-SSSE3-NEXT: movd %r9d, %xmm8
2577; SSE2-SSSE3-NEXT: shlq $51, %r10
2578; SSE2-SSSE3-NEXT: sarq $63, %r10
2579; SSE2-SSSE3-NEXT: movd %r10d, %xmm2
2580; SSE2-SSSE3-NEXT: shlq $52, %r11
2581; SSE2-SSSE3-NEXT: sarq $63, %r11
2582; SSE2-SSSE3-NEXT: movd %r11d, %xmm9
2583; SSE2-SSSE3-NEXT: shlq $53, %r14
2584; SSE2-SSSE3-NEXT: sarq $63, %r14
2585; SSE2-SSSE3-NEXT: movd %r14d, %xmm6
2586; SSE2-SSSE3-NEXT: shlq $54, %r15
2587; SSE2-SSSE3-NEXT: sarq $63, %r15
2588; SSE2-SSSE3-NEXT: movd %r15d, %xmm10
2589; SSE2-SSSE3-NEXT: shlq $55, %r12
2590; SSE2-SSSE3-NEXT: sarq $63, %r12
2591; SSE2-SSSE3-NEXT: movd %r12d, %xmm4
2592; SSE2-SSSE3-NEXT: shlq $60, %r13
2593; SSE2-SSSE3-NEXT: sarq $63, %r13
2594; SSE2-SSSE3-NEXT: movd %r13d, %xmm11
2595; SSE2-SSSE3-NEXT: shlq $61, %rdi
2596; SSE2-SSSE3-NEXT: sarq $63, %rdi
2597; SSE2-SSSE3-NEXT: movd %edi, %xmm5
2598; SSE2-SSSE3-NEXT: shlq $62, %rcx
2599; SSE2-SSSE3-NEXT: sarq $63, %rcx
2600; SSE2-SSSE3-NEXT: movd %ecx, %xmm12
2601; SSE2-SSSE3-NEXT: shlq $63, %rdx
2602; SSE2-SSSE3-NEXT: sarq $63, %rdx
2603; SSE2-SSSE3-NEXT: movd %edx, %xmm0
2604; SSE2-SSSE3-NEXT: shlq $58, %rsi
2605; SSE2-SSSE3-NEXT: sarq $63, %rsi
2606; SSE2-SSSE3-NEXT: movd %esi, %xmm13
2607; SSE2-SSSE3-NEXT: shlq $59, %rbp
2608; SSE2-SSSE3-NEXT: sarq $63, %rbp
2609; SSE2-SSSE3-NEXT: movd %ebp, %xmm7
2610; SSE2-SSSE3-NEXT: shlq $57, %rax
2611; SSE2-SSSE3-NEXT: sarq $63, %rax
2612; SSE2-SSSE3-NEXT: movd %eax, %xmm3
2613; SSE2-SSSE3-NEXT: shrq $7, %rbx
2614; SSE2-SSSE3-NEXT: movd %ebx, %xmm14
2615; SSE2-SSSE3-NEXT: movswq -{{[0-9]+}}(%rsp), %rsi
2616; SSE2-SSSE3-NEXT: movq %rsi, %r8
2617; SSE2-SSSE3-NEXT: movq %rsi, %r9
2618; SSE2-SSSE3-NEXT: movq %rsi, %r10
2619; SSE2-SSSE3-NEXT: movq %rsi, %r11
2620; SSE2-SSSE3-NEXT: movq %rsi, %r14
2621; SSE2-SSSE3-NEXT: movq %rsi, %r15
2622; SSE2-SSSE3-NEXT: movq %rsi, %r12
2623; SSE2-SSSE3-NEXT: movq %rsi, %r13
2624; SSE2-SSSE3-NEXT: movq %rsi, %rbx
2625; SSE2-SSSE3-NEXT: movq %rsi, %rax
2626; SSE2-SSSE3-NEXT: movq %rsi, %rcx
2627; SSE2-SSSE3-NEXT: movq %rsi, %rdx
2628; SSE2-SSSE3-NEXT: movq %rsi, %rdi
2629; SSE2-SSSE3-NEXT: movq %rsi, %rbp
2630; SSE2-SSSE3-NEXT: shrq $15, %rbp
2631; SSE2-SSSE3-NEXT: movd %ebp, %xmm1
2632; SSE2-SSSE3-NEXT: movq %rsi, %rbp
2633; SSE2-SSSE3-NEXT: movsbq %sil, %rsi
2634; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
2635; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3]
2636; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm9[0],xmm6[1],xmm9[1],xmm6[2],xmm9[2],xmm6[3],xmm9[3],xmm6[4],xmm9[4],xmm6[5],xmm9[5],xmm6[6],xmm9[6],xmm6[7],xmm9[7]
2637; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm10[0],xmm4[1],xmm10[1],xmm4[2],xmm10[2],xmm4[3],xmm10[3],xmm4[4],xmm10[4],xmm4[5],xmm10[5],xmm4[6],xmm10[6],xmm4[7],xmm10[7]
2638; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1],xmm4[2],xmm6[2],xmm4[3],xmm6[3]
2639; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
2640; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm11[0],xmm5[1],xmm11[1],xmm5[2],xmm11[2],xmm5[3],xmm11[3],xmm5[4],xmm11[4],xmm5[5],xmm11[5],xmm5[6],xmm11[6],xmm5[7],xmm11[7]
2641; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm12[0],xmm0[1],xmm12[1],xmm0[2],xmm12[2],xmm0[3],xmm12[3],xmm0[4],xmm12[4],xmm0[5],xmm12[5],xmm0[6],xmm12[6],xmm0[7],xmm12[7]
2642; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
2643; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
2644; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7]
2645; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3]
2646; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
2647; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
2648; SSE2-SSSE3-NEXT: shlq $49, %r8
2649; SSE2-SSSE3-NEXT: sarq $63, %r8
2650; SSE2-SSSE3-NEXT: movd %r8d, %xmm13
2651; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm13 = xmm13[0],xmm1[0],xmm13[1],xmm1[1],xmm13[2],xmm1[2],xmm13[3],xmm1[3],xmm13[4],xmm1[4],xmm13[5],xmm1[5],xmm13[6],xmm1[6],xmm13[7],xmm1[7]
2652; SSE2-SSSE3-NEXT: shlq $50, %r9
2653; SSE2-SSSE3-NEXT: sarq $63, %r9
2654; SSE2-SSSE3-NEXT: movd %r9d, %xmm1
2655; SSE2-SSSE3-NEXT: shlq $51, %r10
2656; SSE2-SSSE3-NEXT: sarq $63, %r10
2657; SSE2-SSSE3-NEXT: movd %r10d, %xmm3
2658; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
2659; SSE2-SSSE3-NEXT: shlq $52, %r11
2660; SSE2-SSSE3-NEXT: sarq $63, %r11
2661; SSE2-SSSE3-NEXT: movd %r11d, %xmm8
2662; SSE2-SSSE3-NEXT: shlq $53, %r14
2663; SSE2-SSSE3-NEXT: sarq $63, %r14
2664; SSE2-SSSE3-NEXT: movd %r14d, %xmm15
2665; SSE2-SSSE3-NEXT: shlq $54, %r15
2666; SSE2-SSSE3-NEXT: sarq $63, %r15
2667; SSE2-SSSE3-NEXT: movd %r15d, %xmm9
2668; SSE2-SSSE3-NEXT: shlq $55, %r12
2669; SSE2-SSSE3-NEXT: sarq $63, %r12
2670; SSE2-SSSE3-NEXT: movd %r12d, %xmm4
2671; SSE2-SSSE3-NEXT: shlq $60, %r13
2672; SSE2-SSSE3-NEXT: sarq $63, %r13
2673; SSE2-SSSE3-NEXT: movd %r13d, %xmm10
2674; SSE2-SSSE3-NEXT: shlq $61, %rbx
2675; SSE2-SSSE3-NEXT: sarq $63, %rbx
2676; SSE2-SSSE3-NEXT: movd %ebx, %xmm7
2677; SSE2-SSSE3-NEXT: shlq $62, %rax
2678; SSE2-SSSE3-NEXT: sarq $63, %rax
2679; SSE2-SSSE3-NEXT: movd %eax, %xmm11
2680; SSE2-SSSE3-NEXT: shlq $63, %rcx
2681; SSE2-SSSE3-NEXT: sarq $63, %rcx
2682; SSE2-SSSE3-NEXT: movd %ecx, %xmm2
2683; SSE2-SSSE3-NEXT: shlq $58, %rdx
2684; SSE2-SSSE3-NEXT: sarq $63, %rdx
2685; SSE2-SSSE3-NEXT: movd %edx, %xmm12
2686; SSE2-SSSE3-NEXT: shlq $59, %rdi
2687; SSE2-SSSE3-NEXT: sarq $63, %rdi
2688; SSE2-SSSE3-NEXT: movd %edi, %xmm5
2689; SSE2-SSSE3-NEXT: shlq $57, %rbp
2690; SSE2-SSSE3-NEXT: sarq $63, %rbp
2691; SSE2-SSSE3-NEXT: movd %ebp, %xmm1
2692; SSE2-SSSE3-NEXT: shrq $7, %rsi
2693; SSE2-SSSE3-NEXT: movd %esi, %xmm14
2694; SSE2-SSSE3-NEXT: movswq -{{[0-9]+}}(%rsp), %rsi
2695; SSE2-SSSE3-NEXT: movq %rsi, %r8
2696; SSE2-SSSE3-NEXT: movq %rsi, %r9
2697; SSE2-SSSE3-NEXT: movq %rsi, %r10
2698; SSE2-SSSE3-NEXT: movq %rsi, %r11
2699; SSE2-SSSE3-NEXT: movq %rsi, %r14
2700; SSE2-SSSE3-NEXT: movq %rsi, %r15
2701; SSE2-SSSE3-NEXT: movq %rsi, %r12
2702; SSE2-SSSE3-NEXT: movq %rsi, %r13
2703; SSE2-SSSE3-NEXT: movq %rsi, %rbx
2704; SSE2-SSSE3-NEXT: movq %rsi, %rax
2705; SSE2-SSSE3-NEXT: movq %rsi, %rcx
2706; SSE2-SSSE3-NEXT: movq %rsi, %rdx
2707; SSE2-SSSE3-NEXT: movq %rsi, %rdi
2708; SSE2-SSSE3-NEXT: movq %rsi, %rbp
2709; SSE2-SSSE3-NEXT: shrq $15, %rbp
2710; SSE2-SSSE3-NEXT: movd %ebp, %xmm6
2711; SSE2-SSSE3-NEXT: movq %rsi, %rbp
2712; SSE2-SSSE3-NEXT: movsbq %sil, %rsi
2713; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm13[0],xmm3[1],xmm13[1],xmm3[2],xmm13[2],xmm3[3],xmm13[3]
2714; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm8[0],xmm15[1],xmm8[1],xmm15[2],xmm8[2],xmm15[3],xmm8[3],xmm15[4],xmm8[4],xmm15[5],xmm8[5],xmm15[6],xmm8[6],xmm15[7],xmm8[7]
2715; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm9[0],xmm4[1],xmm9[1],xmm4[2],xmm9[2],xmm4[3],xmm9[3],xmm4[4],xmm9[4],xmm4[5],xmm9[5],xmm4[6],xmm9[6],xmm4[7],xmm9[7]
2716; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm15[0],xmm4[1],xmm15[1],xmm4[2],xmm15[2],xmm4[3],xmm15[3]
2717; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
2718; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm10[0],xmm7[1],xmm10[1],xmm7[2],xmm10[2],xmm7[3],xmm10[3],xmm7[4],xmm10[4],xmm7[5],xmm10[5],xmm7[6],xmm10[6],xmm7[7],xmm10[7]
2719; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm11[0],xmm2[1],xmm11[1],xmm2[2],xmm11[2],xmm2[3],xmm11[3],xmm2[4],xmm11[4],xmm2[5],xmm11[5],xmm2[6],xmm11[6],xmm2[7],xmm11[7]
2720; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm7[0],xmm2[1],xmm7[1],xmm2[2],xmm7[2],xmm2[3],xmm7[3]
2721; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
2722; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm14[0],xmm1[1],xmm14[1],xmm1[2],xmm14[2],xmm1[3],xmm14[3],xmm1[4],xmm14[4],xmm1[5],xmm14[5],xmm1[6],xmm14[6],xmm1[7],xmm14[7]
2723; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3]
2724; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
2725; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
2726; SSE2-SSSE3-NEXT: shlq $49, %r8
2727; SSE2-SSSE3-NEXT: sarq $63, %r8
2728; SSE2-SSSE3-NEXT: movd %r8d, %xmm1
2729; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
2730; SSE2-SSSE3-NEXT: shlq $50, %r9
2731; SSE2-SSSE3-NEXT: sarq $63, %r9
2732; SSE2-SSSE3-NEXT: movd %r9d, %xmm3
2733; SSE2-SSSE3-NEXT: shlq $51, %r10
2734; SSE2-SSSE3-NEXT: sarq $63, %r10
2735; SSE2-SSSE3-NEXT: movd %r10d, %xmm4
2736; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
2737; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3]
2738; SSE2-SSSE3-NEXT: shlq $52, %r11
2739; SSE2-SSSE3-NEXT: sarq $63, %r11
2740; SSE2-SSSE3-NEXT: movd %r11d, %xmm8
2741; SSE2-SSSE3-NEXT: shlq $53, %r14
2742; SSE2-SSSE3-NEXT: sarq $63, %r14
2743; SSE2-SSSE3-NEXT: movd %r14d, %xmm13
2744; SSE2-SSSE3-NEXT: shlq $54, %r15
2745; SSE2-SSSE3-NEXT: sarq $63, %r15
2746; SSE2-SSSE3-NEXT: movd %r15d, %xmm9
2747; SSE2-SSSE3-NEXT: shlq $55, %r12
2748; SSE2-SSSE3-NEXT: sarq $63, %r12
2749; SSE2-SSSE3-NEXT: movd %r12d, %xmm1
2750; SSE2-SSSE3-NEXT: shlq $60, %r13
2751; SSE2-SSSE3-NEXT: sarq $63, %r13
2752; SSE2-SSSE3-NEXT: movd %r13d, %xmm10
2753; SSE2-SSSE3-NEXT: shlq $61, %rbx
2754; SSE2-SSSE3-NEXT: sarq $63, %rbx
2755; SSE2-SSSE3-NEXT: movd %ebx, %xmm15
2756; SSE2-SSSE3-NEXT: shlq $62, %rax
2757; SSE2-SSSE3-NEXT: sarq $63, %rax
2758; SSE2-SSSE3-NEXT: movd %eax, %xmm11
2759; SSE2-SSSE3-NEXT: shlq $63, %rcx
2760; SSE2-SSSE3-NEXT: sarq $63, %rcx
2761; SSE2-SSSE3-NEXT: movd %ecx, %xmm3
2762; SSE2-SSSE3-NEXT: shlq $58, %rdx
2763; SSE2-SSSE3-NEXT: sarq $63, %rdx
2764; SSE2-SSSE3-NEXT: movd %edx, %xmm12
2765; SSE2-SSSE3-NEXT: shlq $59, %rdi
2766; SSE2-SSSE3-NEXT: sarq $63, %rdi
2767; SSE2-SSSE3-NEXT: movd %edi, %xmm5
2768; SSE2-SSSE3-NEXT: shlq $57, %rbp
2769; SSE2-SSSE3-NEXT: sarq $63, %rbp
2770; SSE2-SSSE3-NEXT: movd %ebp, %xmm6
2771; SSE2-SSSE3-NEXT: shrq $7, %rsi
2772; SSE2-SSSE3-NEXT: movd %esi, %xmm14
2773; SSE2-SSSE3-NEXT: movswq -{{[0-9]+}}(%rsp), %rsi
2774; SSE2-SSSE3-NEXT: movq %rsi, %r8
2775; SSE2-SSSE3-NEXT: movq %rsi, %r9
2776; SSE2-SSSE3-NEXT: movq %rsi, %r10
2777; SSE2-SSSE3-NEXT: movq %rsi, %r11
2778; SSE2-SSSE3-NEXT: movq %rsi, %r14
2779; SSE2-SSSE3-NEXT: movq %rsi, %r15
2780; SSE2-SSSE3-NEXT: movq %rsi, %r12
2781; SSE2-SSSE3-NEXT: movq %rsi, %r13
2782; SSE2-SSSE3-NEXT: movq %rsi, %rbx
2783; SSE2-SSSE3-NEXT: movq %rsi, %rax
2784; SSE2-SSSE3-NEXT: movq %rsi, %rcx
2785; SSE2-SSSE3-NEXT: movq %rsi, %rdx
2786; SSE2-SSSE3-NEXT: movq %rsi, %rdi
2787; SSE2-SSSE3-NEXT: movq %rsi, %rbp
2788; SSE2-SSSE3-NEXT: shrq $15, %rbp
2789; SSE2-SSSE3-NEXT: movd %ebp, %xmm7
2790; SSE2-SSSE3-NEXT: movq %rsi, %rbp
2791; SSE2-SSSE3-NEXT: movsbq %sil, %rsi
2792; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm13 = xmm13[0],xmm8[0],xmm13[1],xmm8[1],xmm13[2],xmm8[2],xmm13[3],xmm8[3],xmm13[4],xmm8[4],xmm13[5],xmm8[5],xmm13[6],xmm8[6],xmm13[7],xmm8[7]
2793; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm9[0],xmm1[1],xmm9[1],xmm1[2],xmm9[2],xmm1[3],xmm9[3],xmm1[4],xmm9[4],xmm1[5],xmm9[5],xmm1[6],xmm9[6],xmm1[7],xmm9[7]
2794; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm13[0],xmm1[1],xmm13[1],xmm1[2],xmm13[2],xmm1[3],xmm13[3]
2795; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1]
2796; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm10[0],xmm15[1],xmm10[1],xmm15[2],xmm10[2],xmm15[3],xmm10[3],xmm15[4],xmm10[4],xmm15[5],xmm10[5],xmm15[6],xmm10[6],xmm15[7],xmm10[7]
2797; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm11[0],xmm3[1],xmm11[1],xmm3[2],xmm11[2],xmm3[3],xmm11[3],xmm3[4],xmm11[4],xmm3[5],xmm11[5],xmm3[6],xmm11[6],xmm3[7],xmm11[7]
2798; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm15[0],xmm3[1],xmm15[1],xmm3[2],xmm15[2],xmm3[3],xmm15[3]
2799; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm12[0],xmm5[1],xmm12[1],xmm5[2],xmm12[2],xmm5[3],xmm12[3],xmm5[4],xmm12[4],xmm5[5],xmm12[5],xmm5[6],xmm12[6],xmm5[7],xmm12[7]
2800; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm14[0],xmm6[1],xmm14[1],xmm6[2],xmm14[2],xmm6[3],xmm14[3],xmm6[4],xmm14[4],xmm6[5],xmm14[5],xmm6[6],xmm14[6],xmm6[7],xmm14[7]
2801; SSE2-SSSE3-NEXT: shlq $49, %r8
2802; SSE2-SSSE3-NEXT: sarq $63, %r8
2803; SSE2-SSSE3-NEXT: movd %r8d, %xmm4
2804; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
2805; SSE2-SSSE3-NEXT: shlq $50, %r9
2806; SSE2-SSSE3-NEXT: sarq $63, %r9
2807; SSE2-SSSE3-NEXT: movd %r9d, %xmm6
2808; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
2809; SSE2-SSSE3-NEXT: shlq $51, %r10
2810; SSE2-SSSE3-NEXT: sarq $63, %r10
2811; SSE2-SSSE3-NEXT: movd %r10d, %xmm5
2812; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
2813; SSE2-SSSE3-NEXT: shlq $52, %r11
2814; SSE2-SSSE3-NEXT: sarq $63, %r11
2815; SSE2-SSSE3-NEXT: movd %r11d, %xmm1
2816; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1],xmm4[2],xmm7[2],xmm4[3],xmm7[3],xmm4[4],xmm7[4],xmm4[5],xmm7[5],xmm4[6],xmm7[6],xmm4[7],xmm7[7]
2817; SSE2-SSSE3-NEXT: shlq $53, %r14
2818; SSE2-SSSE3-NEXT: sarq $63, %r14
2819; SSE2-SSSE3-NEXT: movd %r14d, %xmm7
2820; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3],xmm5[4],xmm6[4],xmm5[5],xmm6[5],xmm5[6],xmm6[6],xmm5[7],xmm6[7]
2821; SSE2-SSSE3-NEXT: shlq $54, %r15
2822; SSE2-SSSE3-NEXT: sarq $63, %r15
2823; SSE2-SSSE3-NEXT: movd %r15d, %xmm6
2824; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3]
2825; SSE2-SSSE3-NEXT: shlq $55, %r12
2826; SSE2-SSSE3-NEXT: sarq $63, %r12
2827; SSE2-SSSE3-NEXT: movd %r12d, %xmm4
2828; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm1[0],xmm7[1],xmm1[1],xmm7[2],xmm1[2],xmm7[3],xmm1[3],xmm7[4],xmm1[4],xmm7[5],xmm1[5],xmm7[6],xmm1[6],xmm7[7],xmm1[7]
2829; SSE2-SSSE3-NEXT: shlq $60, %r13
2830; SSE2-SSSE3-NEXT: sarq $63, %r13
2831; SSE2-SSSE3-NEXT: movd %r13d, %xmm8
2832; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1],xmm4[2],xmm6[2],xmm4[3],xmm6[3],xmm4[4],xmm6[4],xmm4[5],xmm6[5],xmm4[6],xmm6[6],xmm4[7],xmm6[7]
2833; SSE2-SSSE3-NEXT: shlq $61, %rbx
2834; SSE2-SSSE3-NEXT: sarq $63, %rbx
2835; SSE2-SSSE3-NEXT: movd %ebx, %xmm6
2836; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm7[0],xmm4[1],xmm7[1],xmm4[2],xmm7[2],xmm4[3],xmm7[3]
2837; SSE2-SSSE3-NEXT: shlq $62, %rax
2838; SSE2-SSSE3-NEXT: sarq $63, %rax
2839; SSE2-SSSE3-NEXT: movd %eax, %xmm7
2840; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
2841; SSE2-SSSE3-NEXT: shlq $63, %rcx
2842; SSE2-SSSE3-NEXT: sarq $63, %rcx
2843; SSE2-SSSE3-NEXT: movd %ecx, %xmm1
2844; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm8[0],xmm6[1],xmm8[1],xmm6[2],xmm8[2],xmm6[3],xmm8[3],xmm6[4],xmm8[4],xmm6[5],xmm8[5],xmm6[6],xmm8[6],xmm6[7],xmm8[7]
2845; SSE2-SSSE3-NEXT: shlq $58, %rdx
2846; SSE2-SSSE3-NEXT: sarq $63, %rdx
2847; SSE2-SSSE3-NEXT: movd %edx, %xmm5
2848; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3],xmm1[4],xmm7[4],xmm1[5],xmm7[5],xmm1[6],xmm7[6],xmm1[7],xmm7[7]
2849; SSE2-SSSE3-NEXT: shlq $59, %rdi
2850; SSE2-SSSE3-NEXT: sarq $63, %rdi
2851; SSE2-SSSE3-NEXT: movd %edi, %xmm7
2852; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
2853; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm5[0],xmm7[1],xmm5[1],xmm7[2],xmm5[2],xmm7[3],xmm5[3],xmm7[4],xmm5[4],xmm7[5],xmm5[5],xmm7[6],xmm5[6],xmm7[7],xmm5[7]
2854; SSE2-SSSE3-NEXT: shlq $57, %rbp
2855; SSE2-SSSE3-NEXT: sarq $63, %rbp
2856; SSE2-SSSE3-NEXT: movd %ebp, %xmm5
2857; SSE2-SSSE3-NEXT: shrq $7, %rsi
2858; SSE2-SSSE3-NEXT: movd %esi, %xmm6
2859; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3],xmm5[4],xmm6[4],xmm5[5],xmm6[5],xmm5[6],xmm6[6],xmm5[7],xmm6[7]
2860; SSE2-SSSE3-NEXT: punpcklwd {{.*#+}} xmm7 = xmm7[0],xmm5[0],xmm7[1],xmm5[1],xmm7[2],xmm5[2],xmm7[3],xmm5[3]
2861; SSE2-SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1]
2862; SSE2-SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
2863; SSE2-SSSE3-NEXT: popq %rbx
2864; SSE2-SSSE3-NEXT: popq %r12
2865; SSE2-SSSE3-NEXT: popq %r13
2866; SSE2-SSSE3-NEXT: popq %r14
2867; SSE2-SSSE3-NEXT: popq %r15
2868; SSE2-SSSE3-NEXT: popq %rbp
2869; SSE2-SSSE3-NEXT: retq
2870;
2871; AVX1-LABEL: ext_i64_64i8:
2872; AVX1: # BB#0:
2873; AVX1-NEXT: pushq %rbp
2874; AVX1-NEXT: .Lcfi28:
2875; AVX1-NEXT: .cfi_def_cfa_offset 16
2876; AVX1-NEXT: .Lcfi29:
2877; AVX1-NEXT: .cfi_offset %rbp, -16
2878; AVX1-NEXT: movq %rsp, %rbp
2879; AVX1-NEXT: .Lcfi30:
2880; AVX1-NEXT: .cfi_def_cfa_register %rbp
2881; AVX1-NEXT: pushq %r15
2882; AVX1-NEXT: pushq %r14
2883; AVX1-NEXT: pushq %r13
2884; AVX1-NEXT: pushq %r12
2885; AVX1-NEXT: pushq %rbx
2886; AVX1-NEXT: andq $-32, %rsp
2887; AVX1-NEXT: subq $128, %rsp
2888; AVX1-NEXT: .Lcfi31:
2889; AVX1-NEXT: .cfi_offset %rbx, -56
2890; AVX1-NEXT: .Lcfi32:
2891; AVX1-NEXT: .cfi_offset %r12, -48
2892; AVX1-NEXT: .Lcfi33:
2893; AVX1-NEXT: .cfi_offset %r13, -40
2894; AVX1-NEXT: .Lcfi34:
2895; AVX1-NEXT: .cfi_offset %r14, -32
2896; AVX1-NEXT: .Lcfi35:
2897; AVX1-NEXT: .cfi_offset %r15, -24
2898; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp)
2899; AVX1-NEXT: shrq $32, %rdi
2900; AVX1-NEXT: movl %edi, {{[0-9]+}}(%rsp)
2901; AVX1-NEXT: movslq {{[0-9]+}}(%rsp), %rdx
2902; AVX1-NEXT: movq %rdx, %rcx
2903; AVX1-NEXT: shlq $47, %rcx
2904; AVX1-NEXT: sarq $63, %rcx
2905; AVX1-NEXT: vmovd %ecx, %xmm0
2906; AVX1-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
2907; AVX1-NEXT: movq %rdx, %r8
2908; AVX1-NEXT: movq %rdx, %rcx
2909; AVX1-NEXT: movq %rdx, %rdi
2910; AVX1-NEXT: movq %rdx, %r13
2911; AVX1-NEXT: movq %rdx, %rsi
2912; AVX1-NEXT: movq %rdx, %r10
2913; AVX1-NEXT: movq %rdx, %r11
2914; AVX1-NEXT: movq %rdx, %r9
2915; AVX1-NEXT: movq %rdx, %rbx
2916; AVX1-NEXT: movq %rdx, %r14
2917; AVX1-NEXT: movq %rdx, %r15
2918; AVX1-NEXT: movq %rdx, %r12
2919; AVX1-NEXT: movq %rdx, %rax
2920; AVX1-NEXT: shlq $46, %rax
2921; AVX1-NEXT: sarq $63, %rax
2922; AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
2923; AVX1-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
2924; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
2925; AVX1-NEXT: shlq $45, %rax
2926; AVX1-NEXT: sarq $63, %rax
2927; AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
2928; AVX1-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
2929; AVX1-NEXT: shlq $44, %r8
2930; AVX1-NEXT: sarq $63, %r8
2931; AVX1-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0
2932; AVX1-NEXT: movq %rdx, %r8
2933; AVX1-NEXT: shlq $43, %rcx
2934; AVX1-NEXT: sarq $63, %rcx
2935; AVX1-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
2936; AVX1-NEXT: movq %rdx, %rcx
2937; AVX1-NEXT: shlq $42, %rdi
2938; AVX1-NEXT: sarq $63, %rdi
2939; AVX1-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
2940; AVX1-NEXT: movq %rdx, %rdi
2941; AVX1-NEXT: shlq $41, %r13
2942; AVX1-NEXT: sarq $63, %r13
2943; AVX1-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0
2944; AVX1-NEXT: movq %rdx, %r13
2945; AVX1-NEXT: shlq $40, %rsi
2946; AVX1-NEXT: sarq $63, %rsi
2947; AVX1-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0
2948; AVX1-NEXT: movq %rdx, %rsi
2949; AVX1-NEXT: shlq $39, %r10
2950; AVX1-NEXT: sarq $63, %r10
2951; AVX1-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0
2952; AVX1-NEXT: movq %rdx, %r10
2953; AVX1-NEXT: shlq $38, %r11
2954; AVX1-NEXT: sarq $63, %r11
2955; AVX1-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0
2956; AVX1-NEXT: movsbq %dl, %rax
2957; AVX1-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
2958; AVX1-NEXT: shlq $37, %r9
2959; AVX1-NEXT: sarq $63, %r9
2960; AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0
2961; AVX1-NEXT: movq %rdx, %r9
2962; AVX1-NEXT: shlq $36, %rbx
2963; AVX1-NEXT: sarq $63, %rbx
2964; AVX1-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
2965; AVX1-NEXT: movq %rdx, %rbx
2966; AVX1-NEXT: shlq $35, %r14
2967; AVX1-NEXT: sarq $63, %r14
2968; AVX1-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0
2969; AVX1-NEXT: movq %rdx, %r14
2970; AVX1-NEXT: shlq $34, %r15
2971; AVX1-NEXT: sarq $63, %r15
2972; AVX1-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0
2973; AVX1-NEXT: movq %rdx, %r15
2974; AVX1-NEXT: shlq $33, %r12
2975; AVX1-NEXT: sarq $63, %r12
2976; AVX1-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0
2977; AVX1-NEXT: movq %rdx, %r12
2978; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
2979; AVX1-NEXT: shrq $31, %rax
2980; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
2981; AVX1-NEXT: movq %rdx, %rax
2982; AVX1-NEXT: shlq $63, %r8
2983; AVX1-NEXT: sarq $63, %r8
2984; AVX1-NEXT: vmovd %r8d, %xmm1
2985; AVX1-NEXT: movq %rdx, %r8
2986; AVX1-NEXT: movswq %dx, %rdx
2987; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload
2988; AVX1-NEXT: shlq $62, %r11
2989; AVX1-NEXT: sarq $63, %r11
2990; AVX1-NEXT: vpinsrb $1, %r11d, %xmm1, %xmm1
2991; AVX1-NEXT: shlq $61, %rcx
2992; AVX1-NEXT: sarq $63, %rcx
2993; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
2994; AVX1-NEXT: shlq $60, %rdi
2995; AVX1-NEXT: sarq $63, %rdi
2996; AVX1-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1
2997; AVX1-NEXT: shlq $59, %r13
2998; AVX1-NEXT: sarq $63, %r13
2999; AVX1-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1
3000; AVX1-NEXT: shlq $58, %rsi
3001; AVX1-NEXT: sarq $63, %rsi
3002; AVX1-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1
3003; AVX1-NEXT: shlq $57, %r10
3004; AVX1-NEXT: sarq $63, %r10
3005; AVX1-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1
3006; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
3007; AVX1-NEXT: shrq $7, %rcx
3008; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
3009; AVX1-NEXT: shlq $55, %r9
3010; AVX1-NEXT: sarq $63, %r9
3011; AVX1-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1
3012; AVX1-NEXT: shlq $54, %rbx
3013; AVX1-NEXT: sarq $63, %rbx
3014; AVX1-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1
3015; AVX1-NEXT: shlq $53, %r14
3016; AVX1-NEXT: sarq $63, %r14
3017; AVX1-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1
3018; AVX1-NEXT: shlq $52, %r15
3019; AVX1-NEXT: sarq $63, %r15
3020; AVX1-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1
3021; AVX1-NEXT: shlq $51, %r12
3022; AVX1-NEXT: sarq $63, %r12
3023; AVX1-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1
3024; AVX1-NEXT: shlq $50, %rax
3025; AVX1-NEXT: sarq $63, %rax
3026; AVX1-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
3027; AVX1-NEXT: shlq $49, %r8
3028; AVX1-NEXT: sarq $63, %r8
3029; AVX1-NEXT: vpinsrb $14, %r8d, %xmm1, %xmm1
3030; AVX1-NEXT: shrq $15, %rdx
3031; AVX1-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
3032; AVX1-NEXT: movslq {{[0-9]+}}(%rsp), %rdx
3033; AVX1-NEXT: movq %rdx, %rcx
3034; AVX1-NEXT: shlq $47, %rcx
3035; AVX1-NEXT: sarq $63, %rcx
3036; AVX1-NEXT: vmovd %ecx, %xmm2
3037; AVX1-NEXT: movq %rdx, %r13
3038; AVX1-NEXT: movq %rdx, %rcx
3039; AVX1-NEXT: movq %rdx, %r9
3040; AVX1-NEXT: movq %rdx, %r12
3041; AVX1-NEXT: movq %rdx, %rdi
3042; AVX1-NEXT: movq %rdx, %rbx
3043; AVX1-NEXT: movq %rdx, %r8
3044; AVX1-NEXT: movq %rdx, %r10
3045; AVX1-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
3046; AVX1-NEXT: movq %rdx, %rsi
3047; AVX1-NEXT: movq %rdx, %r11
3048; AVX1-NEXT: movq %rdx, %r14
3049; AVX1-NEXT: movq %rdx, %r15
3050; AVX1-NEXT: movq %rdx, %rax
3051; AVX1-NEXT: shlq $46, %rax
3052; AVX1-NEXT: sarq $63, %rax
3053; AVX1-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
3054; AVX1-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
3055; AVX1-NEXT: shlq $45, %r13
3056; AVX1-NEXT: sarq $63, %r13
3057; AVX1-NEXT: vpinsrb $2, %r13d, %xmm2, %xmm2
3058; AVX1-NEXT: movq %rdx, %r13
3059; AVX1-NEXT: shlq $44, %rcx
3060; AVX1-NEXT: sarq $63, %rcx
3061; AVX1-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2
3062; AVX1-NEXT: movq %rdx, %rcx
3063; AVX1-NEXT: shlq $43, %r9
3064; AVX1-NEXT: sarq $63, %r9
3065; AVX1-NEXT: vpinsrb $4, %r9d, %xmm2, %xmm2
3066; AVX1-NEXT: movq %rdx, %r9
3067; AVX1-NEXT: shlq $42, %r12
3068; AVX1-NEXT: sarq $63, %r12
3069; AVX1-NEXT: vpinsrb $5, %r12d, %xmm2, %xmm2
3070; AVX1-NEXT: movq %rdx, %r12
3071; AVX1-NEXT: shlq $41, %rdi
3072; AVX1-NEXT: sarq $63, %rdi
3073; AVX1-NEXT: vpinsrb $6, %edi, %xmm2, %xmm2
3074; AVX1-NEXT: movq %rdx, %rdi
3075; AVX1-NEXT: shlq $40, %rbx
3076; AVX1-NEXT: sarq $63, %rbx
3077; AVX1-NEXT: vpinsrb $7, %ebx, %xmm2, %xmm2
3078; AVX1-NEXT: movq %rdx, %rbx
3079; AVX1-NEXT: shlq $39, %r8
3080; AVX1-NEXT: sarq $63, %r8
3081; AVX1-NEXT: vpinsrb $8, %r8d, %xmm2, %xmm2
3082; AVX1-NEXT: movq %rdx, %r8
3083; AVX1-NEXT: shlq $38, %r10
3084; AVX1-NEXT: sarq $63, %r10
3085; AVX1-NEXT: vpinsrb $9, %r10d, %xmm2, %xmm2
3086; AVX1-NEXT: movsbq %dl, %rax
3087; AVX1-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
3088; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
3089; AVX1-NEXT: shlq $37, %rax
3090; AVX1-NEXT: sarq $63, %rax
3091; AVX1-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
3092; AVX1-NEXT: movq %rdx, %r10
3093; AVX1-NEXT: shlq $36, %rsi
3094; AVX1-NEXT: sarq $63, %rsi
3095; AVX1-NEXT: vpinsrb $11, %esi, %xmm2, %xmm2
3096; AVX1-NEXT: movq %rdx, %rsi
3097; AVX1-NEXT: shlq $35, %r11
3098; AVX1-NEXT: sarq $63, %r11
3099; AVX1-NEXT: vpinsrb $12, %r11d, %xmm2, %xmm2
3100; AVX1-NEXT: movq %rdx, %r11
3101; AVX1-NEXT: shlq $34, %r14
3102; AVX1-NEXT: sarq $63, %r14
3103; AVX1-NEXT: vpinsrb $13, %r14d, %xmm2, %xmm2
3104; AVX1-NEXT: movq %rdx, %r14
3105; AVX1-NEXT: shlq $33, %r15
3106; AVX1-NEXT: sarq $63, %r15
3107; AVX1-NEXT: vpinsrb $14, %r15d, %xmm2, %xmm2
3108; AVX1-NEXT: movq %rdx, %r15
3109; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
3110; AVX1-NEXT: shrq $31, %rax
3111; AVX1-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
3112; AVX1-NEXT: movq %rdx, %rax
3113; AVX1-NEXT: shlq $63, %rcx
3114; AVX1-NEXT: sarq $63, %rcx
3115; AVX1-NEXT: vmovd %ecx, %xmm3
3116; AVX1-NEXT: movq %rdx, %rcx
3117; AVX1-NEXT: movswq %dx, %rdx
3118; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
3119; AVX1-NEXT: shlq $62, %r13
3120; AVX1-NEXT: sarq $63, %r13
3121; AVX1-NEXT: vpinsrb $1, %r13d, %xmm3, %xmm1
3122; AVX1-NEXT: shlq $61, %r9
3123; AVX1-NEXT: sarq $63, %r9
3124; AVX1-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
3125; AVX1-NEXT: shlq $60, %r12
3126; AVX1-NEXT: sarq $63, %r12
3127; AVX1-NEXT: vpinsrb $3, %r12d, %xmm1, %xmm1
3128; AVX1-NEXT: shlq $59, %rdi
3129; AVX1-NEXT: sarq $63, %rdi
3130; AVX1-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
3131; AVX1-NEXT: shlq $58, %rbx
3132; AVX1-NEXT: sarq $63, %rbx
3133; AVX1-NEXT: vpinsrb $5, %ebx, %xmm1, %xmm1
3134; AVX1-NEXT: shlq $57, %r8
3135; AVX1-NEXT: sarq $63, %r8
3136; AVX1-NEXT: vpinsrb $6, %r8d, %xmm1, %xmm1
3137; AVX1-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
3138; AVX1-NEXT: shrq $7, %rdi
3139; AVX1-NEXT: vpinsrb $7, %edi, %xmm1, %xmm1
3140; AVX1-NEXT: shlq $55, %r10
3141; AVX1-NEXT: sarq $63, %r10
3142; AVX1-NEXT: vpinsrb $8, %r10d, %xmm1, %xmm1
3143; AVX1-NEXT: shlq $54, %rsi
3144; AVX1-NEXT: sarq $63, %rsi
3145; AVX1-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
3146; AVX1-NEXT: shlq $53, %r11
3147; AVX1-NEXT: sarq $63, %r11
3148; AVX1-NEXT: vpinsrb $10, %r11d, %xmm1, %xmm1
3149; AVX1-NEXT: shlq $52, %r14
3150; AVX1-NEXT: sarq $63, %r14
3151; AVX1-NEXT: vpinsrb $11, %r14d, %xmm1, %xmm1
3152; AVX1-NEXT: shlq $51, %r15
3153; AVX1-NEXT: sarq $63, %r15
3154; AVX1-NEXT: vpinsrb $12, %r15d, %xmm1, %xmm1
3155; AVX1-NEXT: shlq $50, %rax
3156; AVX1-NEXT: sarq $63, %rax
3157; AVX1-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
3158; AVX1-NEXT: shlq $49, %rcx
3159; AVX1-NEXT: sarq $63, %rcx
3160; AVX1-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
3161; AVX1-NEXT: shrq $15, %rdx
3162; AVX1-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
3163; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
3164; AVX1-NEXT: leaq -40(%rbp), %rsp
3165; AVX1-NEXT: popq %rbx
3166; AVX1-NEXT: popq %r12
3167; AVX1-NEXT: popq %r13
3168; AVX1-NEXT: popq %r14
3169; AVX1-NEXT: popq %r15
3170; AVX1-NEXT: popq %rbp
3171; AVX1-NEXT: retq
3172;
3173; AVX2-LABEL: ext_i64_64i8:
3174; AVX2: # BB#0:
3175; AVX2-NEXT: pushq %rbp
3176; AVX2-NEXT: .Lcfi28:
3177; AVX2-NEXT: .cfi_def_cfa_offset 16
3178; AVX2-NEXT: .Lcfi29:
3179; AVX2-NEXT: .cfi_offset %rbp, -16
3180; AVX2-NEXT: movq %rsp, %rbp
3181; AVX2-NEXT: .Lcfi30:
3182; AVX2-NEXT: .cfi_def_cfa_register %rbp
3183; AVX2-NEXT: pushq %r15
3184; AVX2-NEXT: pushq %r14
3185; AVX2-NEXT: pushq %r13
3186; AVX2-NEXT: pushq %r12
3187; AVX2-NEXT: pushq %rbx
3188; AVX2-NEXT: andq $-32, %rsp
3189; AVX2-NEXT: subq $128, %rsp
3190; AVX2-NEXT: .Lcfi31:
3191; AVX2-NEXT: .cfi_offset %rbx, -56
3192; AVX2-NEXT: .Lcfi32:
3193; AVX2-NEXT: .cfi_offset %r12, -48
3194; AVX2-NEXT: .Lcfi33:
3195; AVX2-NEXT: .cfi_offset %r13, -40
3196; AVX2-NEXT: .Lcfi34:
3197; AVX2-NEXT: .cfi_offset %r14, -32
3198; AVX2-NEXT: .Lcfi35:
3199; AVX2-NEXT: .cfi_offset %r15, -24
3200; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp)
3201; AVX2-NEXT: shrq $32, %rdi
3202; AVX2-NEXT: movl %edi, {{[0-9]+}}(%rsp)
3203; AVX2-NEXT: movslq {{[0-9]+}}(%rsp), %rdx
3204; AVX2-NEXT: movq %rdx, %rcx
3205; AVX2-NEXT: shlq $47, %rcx
3206; AVX2-NEXT: sarq $63, %rcx
3207; AVX2-NEXT: vmovd %ecx, %xmm0
3208; AVX2-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
3209; AVX2-NEXT: movq %rdx, %r8
3210; AVX2-NEXT: movq %rdx, %rcx
3211; AVX2-NEXT: movq %rdx, %rdi
3212; AVX2-NEXT: movq %rdx, %r13
3213; AVX2-NEXT: movq %rdx, %rsi
3214; AVX2-NEXT: movq %rdx, %r10
3215; AVX2-NEXT: movq %rdx, %r11
3216; AVX2-NEXT: movq %rdx, %r9
3217; AVX2-NEXT: movq %rdx, %rbx
3218; AVX2-NEXT: movq %rdx, %r14
3219; AVX2-NEXT: movq %rdx, %r15
3220; AVX2-NEXT: movq %rdx, %r12
3221; AVX2-NEXT: movq %rdx, %rax
3222; AVX2-NEXT: shlq $46, %rax
3223; AVX2-NEXT: sarq $63, %rax
3224; AVX2-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
3225; AVX2-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
3226; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
3227; AVX2-NEXT: shlq $45, %rax
3228; AVX2-NEXT: sarq $63, %rax
3229; AVX2-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
3230; AVX2-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
3231; AVX2-NEXT: shlq $44, %r8
3232; AVX2-NEXT: sarq $63, %r8
3233; AVX2-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0
3234; AVX2-NEXT: movq %rdx, %r8
3235; AVX2-NEXT: shlq $43, %rcx
3236; AVX2-NEXT: sarq $63, %rcx
3237; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
3238; AVX2-NEXT: movq %rdx, %rcx
3239; AVX2-NEXT: shlq $42, %rdi
3240; AVX2-NEXT: sarq $63, %rdi
3241; AVX2-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0
3242; AVX2-NEXT: movq %rdx, %rdi
3243; AVX2-NEXT: shlq $41, %r13
3244; AVX2-NEXT: sarq $63, %r13
3245; AVX2-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0
3246; AVX2-NEXT: movq %rdx, %r13
3247; AVX2-NEXT: shlq $40, %rsi
3248; AVX2-NEXT: sarq $63, %rsi
3249; AVX2-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0
3250; AVX2-NEXT: movq %rdx, %rsi
3251; AVX2-NEXT: shlq $39, %r10
3252; AVX2-NEXT: sarq $63, %r10
3253; AVX2-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0
3254; AVX2-NEXT: movq %rdx, %r10
3255; AVX2-NEXT: shlq $38, %r11
3256; AVX2-NEXT: sarq $63, %r11
3257; AVX2-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0
3258; AVX2-NEXT: movsbq %dl, %rax
3259; AVX2-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
3260; AVX2-NEXT: shlq $37, %r9
3261; AVX2-NEXT: sarq $63, %r9
3262; AVX2-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0
3263; AVX2-NEXT: movq %rdx, %r9
3264; AVX2-NEXT: shlq $36, %rbx
3265; AVX2-NEXT: sarq $63, %rbx
3266; AVX2-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0
3267; AVX2-NEXT: movq %rdx, %rbx
3268; AVX2-NEXT: shlq $35, %r14
3269; AVX2-NEXT: sarq $63, %r14
3270; AVX2-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0
3271; AVX2-NEXT: movq %rdx, %r14
3272; AVX2-NEXT: shlq $34, %r15
3273; AVX2-NEXT: sarq $63, %r15
3274; AVX2-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0
3275; AVX2-NEXT: movq %rdx, %r15
3276; AVX2-NEXT: shlq $33, %r12
3277; AVX2-NEXT: sarq $63, %r12
3278; AVX2-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0
3279; AVX2-NEXT: movq %rdx, %r12
3280; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
3281; AVX2-NEXT: shrq $31, %rax
3282; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
3283; AVX2-NEXT: movq %rdx, %rax
3284; AVX2-NEXT: shlq $63, %r8
3285; AVX2-NEXT: sarq $63, %r8
3286; AVX2-NEXT: vmovd %r8d, %xmm1
3287; AVX2-NEXT: movq %rdx, %r8
3288; AVX2-NEXT: movswq %dx, %rdx
3289; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r11 # 8-byte Reload
3290; AVX2-NEXT: shlq $62, %r11
3291; AVX2-NEXT: sarq $63, %r11
3292; AVX2-NEXT: vpinsrb $1, %r11d, %xmm1, %xmm1
3293; AVX2-NEXT: shlq $61, %rcx
3294; AVX2-NEXT: sarq $63, %rcx
3295; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1
3296; AVX2-NEXT: shlq $60, %rdi
3297; AVX2-NEXT: sarq $63, %rdi
3298; AVX2-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1
3299; AVX2-NEXT: shlq $59, %r13
3300; AVX2-NEXT: sarq $63, %r13
3301; AVX2-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1
3302; AVX2-NEXT: shlq $58, %rsi
3303; AVX2-NEXT: sarq $63, %rsi
3304; AVX2-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1
3305; AVX2-NEXT: shlq $57, %r10
3306; AVX2-NEXT: sarq $63, %r10
3307; AVX2-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1
3308; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload
3309; AVX2-NEXT: shrq $7, %rcx
3310; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1
3311; AVX2-NEXT: shlq $55, %r9
3312; AVX2-NEXT: sarq $63, %r9
3313; AVX2-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1
3314; AVX2-NEXT: shlq $54, %rbx
3315; AVX2-NEXT: sarq $63, %rbx
3316; AVX2-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1
3317; AVX2-NEXT: shlq $53, %r14
3318; AVX2-NEXT: sarq $63, %r14
3319; AVX2-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1
3320; AVX2-NEXT: shlq $52, %r15
3321; AVX2-NEXT: sarq $63, %r15
3322; AVX2-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1
3323; AVX2-NEXT: shlq $51, %r12
3324; AVX2-NEXT: sarq $63, %r12
3325; AVX2-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1
3326; AVX2-NEXT: shlq $50, %rax
3327; AVX2-NEXT: sarq $63, %rax
3328; AVX2-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
3329; AVX2-NEXT: shlq $49, %r8
3330; AVX2-NEXT: sarq $63, %r8
3331; AVX2-NEXT: vpinsrb $14, %r8d, %xmm1, %xmm1
3332; AVX2-NEXT: shrq $15, %rdx
3333; AVX2-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
3334; AVX2-NEXT: movslq {{[0-9]+}}(%rsp), %rdx
3335; AVX2-NEXT: movq %rdx, %rcx
3336; AVX2-NEXT: shlq $47, %rcx
3337; AVX2-NEXT: sarq $63, %rcx
3338; AVX2-NEXT: vmovd %ecx, %xmm2
3339; AVX2-NEXT: movq %rdx, %r13
3340; AVX2-NEXT: movq %rdx, %rcx
3341; AVX2-NEXT: movq %rdx, %r9
3342; AVX2-NEXT: movq %rdx, %r12
3343; AVX2-NEXT: movq %rdx, %rdi
3344; AVX2-NEXT: movq %rdx, %rbx
3345; AVX2-NEXT: movq %rdx, %r8
3346; AVX2-NEXT: movq %rdx, %r10
3347; AVX2-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
3348; AVX2-NEXT: movq %rdx, %rsi
3349; AVX2-NEXT: movq %rdx, %r11
3350; AVX2-NEXT: movq %rdx, %r14
3351; AVX2-NEXT: movq %rdx, %r15
3352; AVX2-NEXT: movq %rdx, %rax
3353; AVX2-NEXT: shlq $46, %rax
3354; AVX2-NEXT: sarq $63, %rax
3355; AVX2-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
3356; AVX2-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill
3357; AVX2-NEXT: shlq $45, %r13
3358; AVX2-NEXT: sarq $63, %r13
3359; AVX2-NEXT: vpinsrb $2, %r13d, %xmm2, %xmm2
3360; AVX2-NEXT: movq %rdx, %r13
3361; AVX2-NEXT: shlq $44, %rcx
3362; AVX2-NEXT: sarq $63, %rcx
3363; AVX2-NEXT: vpinsrb $3, %ecx, %xmm2, %xmm2
3364; AVX2-NEXT: movq %rdx, %rcx
3365; AVX2-NEXT: shlq $43, %r9
3366; AVX2-NEXT: sarq $63, %r9
3367; AVX2-NEXT: vpinsrb $4, %r9d, %xmm2, %xmm2
3368; AVX2-NEXT: movq %rdx, %r9
3369; AVX2-NEXT: shlq $42, %r12
3370; AVX2-NEXT: sarq $63, %r12
3371; AVX2-NEXT: vpinsrb $5, %r12d, %xmm2, %xmm2
3372; AVX2-NEXT: movq %rdx, %r12
3373; AVX2-NEXT: shlq $41, %rdi
3374; AVX2-NEXT: sarq $63, %rdi
3375; AVX2-NEXT: vpinsrb $6, %edi, %xmm2, %xmm2
3376; AVX2-NEXT: movq %rdx, %rdi
3377; AVX2-NEXT: shlq $40, %rbx
3378; AVX2-NEXT: sarq $63, %rbx
3379; AVX2-NEXT: vpinsrb $7, %ebx, %xmm2, %xmm2
3380; AVX2-NEXT: movq %rdx, %rbx
3381; AVX2-NEXT: shlq $39, %r8
3382; AVX2-NEXT: sarq $63, %r8
3383; AVX2-NEXT: vpinsrb $8, %r8d, %xmm2, %xmm2
3384; AVX2-NEXT: movq %rdx, %r8
3385; AVX2-NEXT: shlq $38, %r10
3386; AVX2-NEXT: sarq $63, %r10
3387; AVX2-NEXT: vpinsrb $9, %r10d, %xmm2, %xmm2
3388; AVX2-NEXT: movsbq %dl, %rax
3389; AVX2-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill
3390; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
3391; AVX2-NEXT: shlq $37, %rax
3392; AVX2-NEXT: sarq $63, %rax
3393; AVX2-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
3394; AVX2-NEXT: movq %rdx, %r10
3395; AVX2-NEXT: shlq $36, %rsi
3396; AVX2-NEXT: sarq $63, %rsi
3397; AVX2-NEXT: vpinsrb $11, %esi, %xmm2, %xmm2
3398; AVX2-NEXT: movq %rdx, %rsi
3399; AVX2-NEXT: shlq $35, %r11
3400; AVX2-NEXT: sarq $63, %r11
3401; AVX2-NEXT: vpinsrb $12, %r11d, %xmm2, %xmm2
3402; AVX2-NEXT: movq %rdx, %r11
3403; AVX2-NEXT: shlq $34, %r14
3404; AVX2-NEXT: sarq $63, %r14
3405; AVX2-NEXT: vpinsrb $13, %r14d, %xmm2, %xmm2
3406; AVX2-NEXT: movq %rdx, %r14
3407; AVX2-NEXT: shlq $33, %r15
3408; AVX2-NEXT: sarq $63, %r15
3409; AVX2-NEXT: vpinsrb $14, %r15d, %xmm2, %xmm2
3410; AVX2-NEXT: movq %rdx, %r15
3411; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload
3412; AVX2-NEXT: shrq $31, %rax
3413; AVX2-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
3414; AVX2-NEXT: movq %rdx, %rax
3415; AVX2-NEXT: shlq $63, %rcx
3416; AVX2-NEXT: sarq $63, %rcx
3417; AVX2-NEXT: vmovd %ecx, %xmm3
3418; AVX2-NEXT: movq %rdx, %rcx
3419; AVX2-NEXT: movswq %dx, %rdx
3420; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
3421; AVX2-NEXT: shlq $62, %r13
3422; AVX2-NEXT: sarq $63, %r13
3423; AVX2-NEXT: vpinsrb $1, %r13d, %xmm3, %xmm1
3424; AVX2-NEXT: shlq $61, %r9
3425; AVX2-NEXT: sarq $63, %r9
3426; AVX2-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1
3427; AVX2-NEXT: shlq $60, %r12
3428; AVX2-NEXT: sarq $63, %r12
3429; AVX2-NEXT: vpinsrb $3, %r12d, %xmm1, %xmm1
3430; AVX2-NEXT: shlq $59, %rdi
3431; AVX2-NEXT: sarq $63, %rdi
3432; AVX2-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
3433; AVX2-NEXT: shlq $58, %rbx
3434; AVX2-NEXT: sarq $63, %rbx
3435; AVX2-NEXT: vpinsrb $5, %ebx, %xmm1, %xmm1
3436; AVX2-NEXT: shlq $57, %r8
3437; AVX2-NEXT: sarq $63, %r8
3438; AVX2-NEXT: vpinsrb $6, %r8d, %xmm1, %xmm1
3439; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload
3440; AVX2-NEXT: shrq $7, %rdi
3441; AVX2-NEXT: vpinsrb $7, %edi, %xmm1, %xmm1
3442; AVX2-NEXT: shlq $55, %r10
3443; AVX2-NEXT: sarq $63, %r10
3444; AVX2-NEXT: vpinsrb $8, %r10d, %xmm1, %xmm1
3445; AVX2-NEXT: shlq $54, %rsi
3446; AVX2-NEXT: sarq $63, %rsi
3447; AVX2-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1
3448; AVX2-NEXT: shlq $53, %r11
3449; AVX2-NEXT: sarq $63, %r11
3450; AVX2-NEXT: vpinsrb $10, %r11d, %xmm1, %xmm1
3451; AVX2-NEXT: shlq $52, %r14
3452; AVX2-NEXT: sarq $63, %r14
3453; AVX2-NEXT: vpinsrb $11, %r14d, %xmm1, %xmm1
3454; AVX2-NEXT: shlq $51, %r15
3455; AVX2-NEXT: sarq $63, %r15
3456; AVX2-NEXT: vpinsrb $12, %r15d, %xmm1, %xmm1
3457; AVX2-NEXT: shlq $50, %rax
3458; AVX2-NEXT: sarq $63, %rax
3459; AVX2-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
3460; AVX2-NEXT: shlq $49, %rcx
3461; AVX2-NEXT: sarq $63, %rcx
3462; AVX2-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
3463; AVX2-NEXT: shrq $15, %rdx
3464; AVX2-NEXT: vpinsrb $15, %edx, %xmm1, %xmm1
3465; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
3466; AVX2-NEXT: leaq -40(%rbp), %rsp
3467; AVX2-NEXT: popq %rbx
3468; AVX2-NEXT: popq %r12
3469; AVX2-NEXT: popq %r13
3470; AVX2-NEXT: popq %r14
3471; AVX2-NEXT: popq %r15
3472; AVX2-NEXT: popq %rbp
3473; AVX2-NEXT: retq
3474;
3475; AVX512-LABEL: ext_i64_64i8:
3476; AVX512: # BB#0:
3477; AVX512-NEXT: kmovq %rdi, %k0
3478; AVX512-NEXT: vpmovm2b %k0, %zmm0
3479; AVX512-NEXT: retq
3480 %1 = bitcast i64 %a0 to <64 x i1>
3481 %2 = sext <64 x i1> %1 to <64 x i8>
3482 ret <64 x i8> %2
3483}