blob: 4ed55ac0919e059c9a36523ee8aa729908c81c58 [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8;
9; 128-bit vectors
10;
11
12define <2 x i64> @ext_i2_2i64(i2 %a0) {
13; SSE2-SSSE3-LABEL: ext_i2_2i64:
14; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000015; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
16; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
17; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
18; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
19; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
20; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
21; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
22; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
23; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000024; SSE2-SSSE3-NEXT: retq
25;
Simon Pilgrima705db92017-09-24 13:42:31 +000026; AVX1-LABEL: ext_i2_2i64:
27; AVX1: # BB#0:
28; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
29; AVX1-NEXT: vmovq %rdi, %xmm0
30; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
31; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
32; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
34; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
35; AVX1-NEXT: retq
36;
37; AVX2-LABEL: ext_i2_2i64:
38; AVX2: # BB#0:
39; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
40; AVX2-NEXT: vmovq %rdi, %xmm0
41; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
42; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
43; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
44; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
45; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0
46; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000047;
48; AVX512-LABEL: ext_i2_2i64:
49; AVX512: # BB#0:
50; AVX512-NEXT: andb $3, %dil
51; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
52; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
53; AVX512-NEXT: kmovd %eax, %k1
54; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
55; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
56; AVX512-NEXT: vzeroupper
57; AVX512-NEXT: retq
58 %1 = bitcast i2 %a0 to <2 x i1>
59 %2 = zext <2 x i1> %1 to <2 x i64>
60 ret <2 x i64> %2
61}
62
63define <4 x i32> @ext_i4_4i32(i4 %a0) {
64; SSE2-SSSE3-LABEL: ext_i4_4i32:
65; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000066; SSE2-SSSE3-NEXT: movd %edi, %xmm0
67; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
68; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
69; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
70; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
71; SSE2-SSSE3-NEXT: psrld $31, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000072; SSE2-SSSE3-NEXT: retq
73;
74; AVX1-LABEL: ext_i4_4i32:
75; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000076; AVX1-NEXT: vmovd %edi, %xmm0
77; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
78; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
79; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
80; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
81; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000082; AVX1-NEXT: retq
83;
84; AVX2-LABEL: ext_i4_4i32:
85; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000086; AVX2-NEXT: vmovd %edi, %xmm0
87; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
88; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000089; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +000090; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
91; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000092; AVX2-NEXT: retq
93;
94; AVX512-LABEL: ext_i4_4i32:
95; AVX512: # BB#0:
96; AVX512-NEXT: andb $15, %dil
97; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
98; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
99; AVX512-NEXT: kmovd %eax, %k1
100; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
101; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
102; AVX512-NEXT: vzeroupper
103; AVX512-NEXT: retq
104 %1 = bitcast i4 %a0 to <4 x i1>
105 %2 = zext <4 x i1> %1 to <4 x i32>
106 ret <4 x i32> %2
107}
108
109define <8 x i16> @ext_i8_8i16(i8 %a0) {
110; SSE2-SSSE3-LABEL: ext_i8_8i16:
111; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000112; SSE2-SSSE3-NEXT: movd %edi, %xmm0
113; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
114; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
115; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
116; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
117; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
118; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000119; SSE2-SSSE3-NEXT: retq
120;
Simon Pilgrima705db92017-09-24 13:42:31 +0000121; AVX1-LABEL: ext_i8_8i16:
122; AVX1: # BB#0:
123; AVX1-NEXT: vmovd %edi, %xmm0
124; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
125; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
126; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
127; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
128; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
129; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
130; AVX1-NEXT: retq
131;
132; AVX2-LABEL: ext_i8_8i16:
133; AVX2: # BB#0:
134; AVX2-NEXT: vmovd %edi, %xmm0
135; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
136; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
137; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
138; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
139; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
140; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000141;
142; AVX512-LABEL: ext_i8_8i16:
143; AVX512: # BB#0:
144; AVX512-NEXT: kmovd %edi, %k5
145; AVX512-NEXT: kshiftlw $8, %k5, %k0
146; AVX512-NEXT: kshiftrw $15, %k0, %k0
147; AVX512-NEXT: kshiftlw $9, %k5, %k1
148; AVX512-NEXT: kshiftrw $15, %k1, %k1
149; AVX512-NEXT: kshiftlw $10, %k5, %k2
150; AVX512-NEXT: kshiftrw $15, %k2, %k2
151; AVX512-NEXT: kshiftlw $11, %k5, %k3
152; AVX512-NEXT: kshiftrw $15, %k3, %k3
153; AVX512-NEXT: kshiftlw $12, %k5, %k4
154; AVX512-NEXT: kshiftrw $15, %k4, %k4
155; AVX512-NEXT: kshiftlw $13, %k5, %k6
156; AVX512-NEXT: kshiftrw $15, %k6, %k6
157; AVX512-NEXT: kshiftlw $15, %k5, %k7
158; AVX512-NEXT: kshiftrw $15, %k7, %k7
159; AVX512-NEXT: kshiftlw $14, %k5, %k5
160; AVX512-NEXT: kshiftrw $15, %k5, %k5
161; AVX512-NEXT: kmovd %k5, %eax
162; AVX512-NEXT: andl $1, %eax
163; AVX512-NEXT: kmovd %k7, %ecx
164; AVX512-NEXT: andl $1, %ecx
165; AVX512-NEXT: vmovd %ecx, %xmm0
166; AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
167; AVX512-NEXT: kmovd %k6, %eax
168; AVX512-NEXT: andl $1, %eax
169; AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
170; AVX512-NEXT: kmovd %k4, %eax
171; AVX512-NEXT: andl $1, %eax
172; AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
173; AVX512-NEXT: kmovd %k3, %eax
174; AVX512-NEXT: andl $1, %eax
175; AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
176; AVX512-NEXT: kmovd %k2, %eax
177; AVX512-NEXT: andl $1, %eax
178; AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
179; AVX512-NEXT: kmovd %k1, %eax
180; AVX512-NEXT: andl $1, %eax
181; AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
182; AVX512-NEXT: kmovd %k0, %eax
183; AVX512-NEXT: andl $1, %eax
184; AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
185; AVX512-NEXT: retq
186 %1 = bitcast i8 %a0 to <8 x i1>
187 %2 = zext <8 x i1> %1 to <8 x i16>
188 ret <8 x i16> %2
189}
190
191define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000192; SSE2-LABEL: ext_i16_16i8:
193; SSE2: # BB#0:
194; SSE2-NEXT: movd %edi, %xmm0
195; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
196; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
197; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
198; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
199; SSE2-NEXT: pand %xmm1, %xmm0
200; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
201; SSE2-NEXT: psrlw $7, %xmm0
202; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
203; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000204;
Simon Pilgrima705db92017-09-24 13:42:31 +0000205; SSSE3-LABEL: ext_i16_16i8:
206; SSSE3: # BB#0:
207; SSSE3-NEXT: movd %edi, %xmm0
208; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
209; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
210; SSSE3-NEXT: pand %xmm1, %xmm0
211; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
212; SSSE3-NEXT: psrlw $7, %xmm0
213; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
214; SSSE3-NEXT: retq
215;
216; AVX1-LABEL: ext_i16_16i8:
217; AVX1: # BB#0:
218; AVX1-NEXT: vmovd %edi, %xmm0
219; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
220; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
221; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
222; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
223; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
224; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
225; AVX1-NEXT: retq
226;
227; AVX2-LABEL: ext_i16_16i8:
228; AVX2: # BB#0:
229; AVX2-NEXT: vmovd %edi, %xmm0
230; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
231; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
232; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
233; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
234; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
235; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
236; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000237;
238; AVX512-LABEL: ext_i16_16i8:
239; AVX512: # BB#0:
240; AVX512-NEXT: pushq %rbp
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000241; AVX512-NEXT: .cfi_def_cfa_offset 16
242; AVX512-NEXT: pushq %r15
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000243; AVX512-NEXT: .cfi_def_cfa_offset 24
244; AVX512-NEXT: pushq %r14
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000245; AVX512-NEXT: .cfi_def_cfa_offset 32
246; AVX512-NEXT: pushq %r13
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000247; AVX512-NEXT: .cfi_def_cfa_offset 40
248; AVX512-NEXT: pushq %r12
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000249; AVX512-NEXT: .cfi_def_cfa_offset 48
250; AVX512-NEXT: pushq %rbx
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000251; AVX512-NEXT: .cfi_def_cfa_offset 56
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000252; AVX512-NEXT: .cfi_offset %rbx, -56
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000253; AVX512-NEXT: .cfi_offset %r12, -48
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000254; AVX512-NEXT: .cfi_offset %r13, -40
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000255; AVX512-NEXT: .cfi_offset %r14, -32
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000256; AVX512-NEXT: .cfi_offset %r15, -24
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000257; AVX512-NEXT: .cfi_offset %rbp, -16
258; AVX512-NEXT: kmovd %edi, %k0
259; AVX512-NEXT: kshiftlw $14, %k0, %k1
260; AVX512-NEXT: kshiftrw $15, %k1, %k1
261; AVX512-NEXT: kmovd %k1, %r8d
262; AVX512-NEXT: kshiftlw $15, %k0, %k1
263; AVX512-NEXT: kshiftrw $15, %k1, %k1
264; AVX512-NEXT: kmovd %k1, %r9d
265; AVX512-NEXT: kshiftlw $13, %k0, %k1
266; AVX512-NEXT: kshiftrw $15, %k1, %k1
267; AVX512-NEXT: kmovd %k1, %r10d
268; AVX512-NEXT: kshiftlw $12, %k0, %k1
269; AVX512-NEXT: kshiftrw $15, %k1, %k1
270; AVX512-NEXT: kmovd %k1, %r11d
271; AVX512-NEXT: kshiftlw $11, %k0, %k1
272; AVX512-NEXT: kshiftrw $15, %k1, %k1
273; AVX512-NEXT: kmovd %k1, %r14d
274; AVX512-NEXT: kshiftlw $10, %k0, %k1
275; AVX512-NEXT: kshiftrw $15, %k1, %k1
276; AVX512-NEXT: kmovd %k1, %r15d
277; AVX512-NEXT: kshiftlw $9, %k0, %k1
278; AVX512-NEXT: kshiftrw $15, %k1, %k1
279; AVX512-NEXT: kmovd %k1, %r12d
280; AVX512-NEXT: kshiftlw $8, %k0, %k1
281; AVX512-NEXT: kshiftrw $15, %k1, %k1
282; AVX512-NEXT: kmovd %k1, %r13d
283; AVX512-NEXT: kshiftlw $7, %k0, %k1
284; AVX512-NEXT: kshiftrw $15, %k1, %k1
285; AVX512-NEXT: kmovd %k1, %esi
286; AVX512-NEXT: kshiftlw $6, %k0, %k1
287; AVX512-NEXT: kshiftrw $15, %k1, %k1
288; AVX512-NEXT: kmovd %k1, %ebx
289; AVX512-NEXT: kshiftlw $5, %k0, %k1
290; AVX512-NEXT: kshiftrw $15, %k1, %k1
291; AVX512-NEXT: kmovd %k1, %ebp
292; AVX512-NEXT: kshiftlw $4, %k0, %k1
293; AVX512-NEXT: kshiftrw $15, %k1, %k1
294; AVX512-NEXT: kmovd %k1, %edi
295; AVX512-NEXT: kshiftlw $3, %k0, %k1
296; AVX512-NEXT: kshiftrw $15, %k1, %k1
297; AVX512-NEXT: kmovd %k1, %eax
298; AVX512-NEXT: kshiftlw $2, %k0, %k1
299; AVX512-NEXT: kshiftrw $15, %k1, %k1
300; AVX512-NEXT: kmovd %k1, %ecx
301; AVX512-NEXT: kshiftlw $1, %k0, %k1
302; AVX512-NEXT: kshiftrw $15, %k1, %k1
303; AVX512-NEXT: kmovd %k1, %edx
304; AVX512-NEXT: kshiftrw $15, %k0, %k0
305; AVX512-NEXT: vmovd %r9d, %xmm0
306; AVX512-NEXT: kmovd %k0, %r9d
307; AVX512-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
308; AVX512-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
309; AVX512-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
310; AVX512-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
311; AVX512-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
312; AVX512-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
313; AVX512-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
314; AVX512-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
315; AVX512-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
316; AVX512-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
317; AVX512-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0
318; AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
319; AVX512-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
320; AVX512-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0
321; AVX512-NEXT: vpinsrb $15, %r9d, %xmm0, %xmm0
322; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
323; AVX512-NEXT: popq %rbx
324; AVX512-NEXT: popq %r12
325; AVX512-NEXT: popq %r13
326; AVX512-NEXT: popq %r14
327; AVX512-NEXT: popq %r15
328; AVX512-NEXT: popq %rbp
329; AVX512-NEXT: retq
330 %1 = bitcast i16 %a0 to <16 x i1>
331 %2 = zext <16 x i1> %1 to <16 x i8>
332 ret <16 x i8> %2
333}
334
335;
336; 256-bit vectors
337;
338
339define <4 x i64> @ext_i4_4i64(i4 %a0) {
340; SSE2-SSSE3-LABEL: ext_i4_4i64:
341; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000342; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
343; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
344; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
345; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
346; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
347; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
348; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
349; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
350; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
351; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
352; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
353; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
354; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
355; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
356; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
357; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000358; SSE2-SSSE3-NEXT: retq
359;
360; AVX1-LABEL: ext_i4_4i64:
361; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000362; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
363; AVX1-NEXT: vmovq %rdi, %xmm0
364; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
365; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
366; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
367; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
368; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2
369; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
370; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
371; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
372; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
373; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
374; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
375; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
376; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000377; AVX1-NEXT: retq
378;
379; AVX2-LABEL: ext_i4_4i64:
380; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000381; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
382; AVX2-NEXT: vmovq %rdi, %xmm0
383; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
384; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
385; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
386; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
387; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000388; AVX2-NEXT: retq
389;
390; AVX512-LABEL: ext_i4_4i64:
391; AVX512: # BB#0:
392; AVX512-NEXT: andb $15, %dil
393; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
394; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
395; AVX512-NEXT: kmovd %eax, %k1
396; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
397; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
398; AVX512-NEXT: retq
399 %1 = bitcast i4 %a0 to <4 x i1>
400 %2 = zext <4 x i1> %1 to <4 x i64>
401 ret <4 x i64> %2
402}
403
404define <8 x i32> @ext_i8_8i32(i8 %a0) {
405; SSE2-SSSE3-LABEL: ext_i8_8i32:
406; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000407; SSE2-SSSE3-NEXT: movd %edi, %xmm0
408; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
409; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000410; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000411; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000412; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
413; SSE2-SSSE3-NEXT: psrld $31, %xmm0
414; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000415; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000416; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
417; SSE2-SSSE3-NEXT: psrld $31, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000418; SSE2-SSSE3-NEXT: retq
419;
420; AVX1-LABEL: ext_i8_8i32:
421; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000422; AVX1-NEXT: vmovd %edi, %xmm0
423; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
424; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000425; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000426; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
427; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2
428; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
429; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
430; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
431; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
432; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
433; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
434; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
435; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000436; AVX1-NEXT: retq
437;
438; AVX2-LABEL: ext_i8_8i32:
439; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000440; AVX2-NEXT: vmovd %edi, %xmm0
441; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
442; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000443; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000444; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
445; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000446; AVX2-NEXT: retq
447;
448; AVX512-LABEL: ext_i8_8i32:
449; AVX512: # BB#0:
450; AVX512-NEXT: kmovd %edi, %k1
451; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
452; AVX512-NEXT: retq
453 %1 = bitcast i8 %a0 to <8 x i1>
454 %2 = zext <8 x i1> %1 to <8 x i32>
455 ret <8 x i32> %2
456}
457
458define <16 x i16> @ext_i16_16i16(i16 %a0) {
459; SSE2-SSSE3-LABEL: ext_i16_16i16:
460; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000461; SSE2-SSSE3-NEXT: movd %edi, %xmm0
462; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
463; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
464; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000465; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000466; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000467; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
468; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
469; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000470; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000471; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
472; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000473; SSE2-SSSE3-NEXT: retq
474;
475; AVX1-LABEL: ext_i16_16i16:
476; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000477; AVX1-NEXT: vmovd %edi, %xmm0
478; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
479; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
480; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
481; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
482; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
483; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
484; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
485; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
486; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
487; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
488; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
489; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
490; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
491; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000492; AVX1-NEXT: retq
493;
494; AVX2-LABEL: ext_i16_16i16:
495; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000496; AVX2-NEXT: vmovd %edi, %xmm0
497; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
498; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
499; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
500; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
501; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000502; AVX2-NEXT: retq
503;
504; AVX512-LABEL: ext_i16_16i16:
505; AVX512: # BB#0:
506; AVX512-NEXT: kmovd %edi, %k1
507; AVX512-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z}
508; AVX512-NEXT: retq
509 %1 = bitcast i16 %a0 to <16 x i1>
510 %2 = zext <16 x i1> %1 to <16 x i16>
511 ret <16 x i16> %2
512}
513
514define <32 x i8> @ext_i32_32i8(i32 %a0) {
515; SSE2-SSSE3-LABEL: ext_i32_32i8:
516; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000517; SSE2-SSSE3-NEXT: movd %edi, %xmm1
518; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
519; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
520; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
521; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
522; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
523; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
524; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
525; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
526; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
527; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
528; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
529; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
530; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
531; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
532; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000533; SSE2-SSSE3-NEXT: retq
534;
535; AVX1-LABEL: ext_i32_32i8:
536; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000537; AVX1-NEXT: vmovd %edi, %xmm0
538; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
539; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
540; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
541; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
542; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000543; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000544; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
545; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
546; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
547; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
548; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
549; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
550; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
551; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
552; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
553; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
554; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
555; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
556; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
557; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000558; AVX1-NEXT: retq
559;
560; AVX2-LABEL: ext_i32_32i8:
561; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000562; AVX2-NEXT: vmovd %edi, %xmm0
563; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
564; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
565; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
566; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
567; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000568; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000569; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
570; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
571; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
572; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
573; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000574; AVX2-NEXT: retq
575;
576; AVX512-LABEL: ext_i32_32i8:
577; AVX512: # BB#0:
578; AVX512-NEXT: kmovd %edi, %k1
579; AVX512-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
580; AVX512-NEXT: retq
581 %1 = bitcast i32 %a0 to <32 x i1>
582 %2 = zext <32 x i1> %1 to <32 x i8>
583 ret <32 x i8> %2
584}
585
586;
587; 512-bit vectors
588;
589
590define <8 x i64> @ext_i8_8i64(i8 %a0) {
591; SSE2-SSSE3-LABEL: ext_i8_8i64:
592; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000593; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
594; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
595; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
596; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
597; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
598; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
599; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
600; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
601; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
602; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
603; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
604; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
605; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
606; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
607; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
608; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
609; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
610; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
611; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
612; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
613; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
614; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
615; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
616; SSE2-SSSE3-NEXT: psrlq $63, %xmm2
617; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
618; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
619; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
620; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000621; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000622; SSE2-SSSE3-NEXT: psrlq $63, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000623; SSE2-SSSE3-NEXT: retq
624;
625; AVX1-LABEL: ext_i8_8i64:
626; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000627; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
628; AVX1-NEXT: vmovq %rdi, %xmm0
629; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
630; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
631; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
632; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
633; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm3
634; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
635; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
636; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
637; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
638; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
639; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
640; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
641; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
642; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
643; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm3
644; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
645; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
646; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
647; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
648; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
649; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000650; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000651; AVX1-NEXT: retq
652;
653; AVX2-LABEL: ext_i8_8i64:
654; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000655; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
656; AVX2-NEXT: vmovq %rdi, %xmm0
657; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
658; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
659; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
660; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
661; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
662; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000663; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000664; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
665; AVX2-NEXT: vpsrlq $63, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000666; AVX2-NEXT: retq
667;
668; AVX512-LABEL: ext_i8_8i64:
669; AVX512: # BB#0:
670; AVX512-NEXT: kmovd %edi, %k1
671; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
672; AVX512-NEXT: retq
673 %1 = bitcast i8 %a0 to <8 x i1>
674 %2 = zext <8 x i1> %1 to <8 x i64>
675 ret <8 x i64> %2
676}
677
678define <16 x i32> @ext_i16_16i32(i16 %a0) {
679; SSE2-SSSE3-LABEL: ext_i16_16i32:
680; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000681; SSE2-SSSE3-NEXT: movd %edi, %xmm0
682; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
683; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
684; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
685; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
686; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
687; SSE2-SSSE3-NEXT: psrld $31, %xmm0
688; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000689; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000690; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
691; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
692; SSE2-SSSE3-NEXT: psrld $31, %xmm1
693; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000694; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000695; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000696; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
697; SSE2-SSSE3-NEXT: psrld $31, %xmm2
698; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000699; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000700; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
701; SSE2-SSSE3-NEXT: psrld $31, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000702; SSE2-SSSE3-NEXT: retq
703;
704; AVX1-LABEL: ext_i16_16i32:
705; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000706; AVX1-NEXT: vmovd %edi, %xmm0
707; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
708; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
709; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
710; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
711; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm3
712; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
713; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
714; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
715; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
716; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
717; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
718; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
719; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
720; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
721; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm3
722; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
723; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
724; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
725; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
726; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
727; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
728; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000729; AVX1-NEXT: retq
730;
731; AVX2-LABEL: ext_i16_16i32:
732; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000733; AVX2-NEXT: vmovd %edi, %xmm0
734; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
735; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
736; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
737; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
738; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
739; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000740; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000741; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
742; AVX2-NEXT: vpsrld $31, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000743; AVX2-NEXT: retq
744;
745; AVX512-LABEL: ext_i16_16i32:
746; AVX512: # BB#0:
747; AVX512-NEXT: kmovd %edi, %k1
748; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
749; AVX512-NEXT: retq
750 %1 = bitcast i16 %a0 to <16 x i1>
751 %2 = zext <16 x i1> %1 to <16 x i32>
752 ret <16 x i32> %2
753}
754
755define <32 x i16> @ext_i32_32i16(i32 %a0) {
756; SSE2-SSSE3-LABEL: ext_i32_32i16:
757; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000758; SSE2-SSSE3-NEXT: movd %edi, %xmm2
759; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
760; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
761; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000762; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000763; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000764; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
765; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
766; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
767; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
768; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
769; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
770; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
771; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000772; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000773; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000774; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
775; SSE2-SSSE3-NEXT: psrlw $15, %xmm2
776; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
777; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
778; SSE2-SSSE3-NEXT: psrlw $15, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000779; SSE2-SSSE3-NEXT: retq
780;
781; AVX1-LABEL: ext_i32_32i16:
782; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000783; AVX1-NEXT: vmovd %edi, %xmm1
784; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
785; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
786; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
787; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000788; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000789; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
790; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4
791; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
792; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
793; AVX1-NEXT: vpsrlw $15, %xmm4, %xmm4
794; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
795; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
796; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
797; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
798; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
799; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
800; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
801; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000802; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000803; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm2
804; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
805; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
806; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
807; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
808; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
809; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
810; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000811; AVX1-NEXT: retq
812;
813; AVX2-LABEL: ext_i32_32i16:
814; AVX2: # BB#0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000815; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000816; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
817; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
818; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
819; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
820; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
821; AVX2-NEXT: shrl $16, %edi
822; AVX2-NEXT: vmovd %edi, %xmm2
823; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
824; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
825; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
826; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000827; AVX2-NEXT: retq
828;
829; AVX512-LABEL: ext_i32_32i16:
830; AVX512: # BB#0:
831; AVX512-NEXT: kmovd %edi, %k1
832; AVX512-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
833; AVX512-NEXT: retq
834 %1 = bitcast i32 %a0 to <32 x i1>
835 %2 = zext <32 x i1> %1 to <32 x i16>
836 ret <32 x i16> %2
837}
838
839define <64 x i8> @ext_i64_64i8(i64 %a0) {
840; SSE2-SSSE3-LABEL: ext_i64_64i8:
841; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000842; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
843; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
844; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
845; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
846; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
847; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
848; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
849; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
850; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
851; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
852; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
853; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
854; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
855; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
856; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
857; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
858; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
859; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
860; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
861; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
862; SSE2-SSSE3-NEXT: psrlw $7, %xmm2
863; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2
864; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
865; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
866; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
867; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
868; SSE2-SSSE3-NEXT: psrlw $7, %xmm3
869; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000870; SSE2-SSSE3-NEXT: retq
871;
872; AVX1-LABEL: ext_i64_64i8:
873; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000874; AVX1-NEXT: vmovq %rdi, %xmm0
875; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
876; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
877; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
878; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
879; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
880; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
881; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
882; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
883; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
884; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
885; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
886; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
887; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
888; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
889; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
890; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
891; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
892; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
893; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
894; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
895; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
896; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
897; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
898; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
899; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
900; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
901; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
902; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
903; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
904; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
905; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm2
906; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
907; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
908; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
909; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
910; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
911; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000912; AVX1-NEXT: retq
913;
914; AVX2-LABEL: ext_i64_64i8:
915; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000916; AVX2-NEXT: vmovq %rdi, %xmm0
917; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
918; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
919; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
920; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
921; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
922; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
923; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
924; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
925; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
926; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
927; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
928; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
929; AVX2-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5]
930; AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
931; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
932; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
933; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm4, %ymm1
934; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
935; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
936; AVX2-NEXT: vpsrlw $7, %ymm1, %ymm1
937; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000938; AVX2-NEXT: retq
939;
940; AVX512-LABEL: ext_i64_64i8:
941; AVX512: # BB#0:
942; AVX512-NEXT: kmovq %rdi, %k1
943; AVX512-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
944; AVX512-NEXT: retq
945 %1 = bitcast i64 %a0 to <64 x i1>
946 %2 = zext <64 x i1> %1 to <64 x i8>
947 ret <64 x i8> %2
948}