blob: c7c391dc6b2793ca2dd29eb452e8a7b53105573a [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8;
9; 128-bit vectors
10;
11
12define <2 x i64> @ext_i2_2i64(i2 %a0) {
13; SSE2-SSSE3-LABEL: ext_i2_2i64:
14; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000015; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
16; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
17; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
18; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
19; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
20; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
21; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
22; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
23; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000024; SSE2-SSSE3-NEXT: retq
25;
Simon Pilgrima705db92017-09-24 13:42:31 +000026; AVX1-LABEL: ext_i2_2i64:
27; AVX1: # BB#0:
28; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
29; AVX1-NEXT: vmovq %rdi, %xmm0
30; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
31; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
32; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
34; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
35; AVX1-NEXT: retq
36;
37; AVX2-LABEL: ext_i2_2i64:
38; AVX2: # BB#0:
39; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
40; AVX2-NEXT: vmovq %rdi, %xmm0
41; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
42; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
43; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
44; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
45; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0
46; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000047;
48; AVX512-LABEL: ext_i2_2i64:
49; AVX512: # BB#0:
50; AVX512-NEXT: andb $3, %dil
51; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
52; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
53; AVX512-NEXT: kmovd %eax, %k1
54; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
55; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
56; AVX512-NEXT: vzeroupper
57; AVX512-NEXT: retq
58 %1 = bitcast i2 %a0 to <2 x i1>
59 %2 = zext <2 x i1> %1 to <2 x i64>
60 ret <2 x i64> %2
61}
62
63define <4 x i32> @ext_i4_4i32(i4 %a0) {
64; SSE2-SSSE3-LABEL: ext_i4_4i32:
65; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000066; SSE2-SSSE3-NEXT: movd %edi, %xmm0
67; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
68; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
69; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
70; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
71; SSE2-SSSE3-NEXT: psrld $31, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000072; SSE2-SSSE3-NEXT: retq
73;
74; AVX1-LABEL: ext_i4_4i32:
75; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000076; AVX1-NEXT: vmovd %edi, %xmm0
77; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
78; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
79; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
80; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
81; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000082; AVX1-NEXT: retq
83;
84; AVX2-LABEL: ext_i4_4i32:
85; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000086; AVX2-NEXT: vmovd %edi, %xmm0
87; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
88; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000089; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +000090; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
91; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000092; AVX2-NEXT: retq
93;
94; AVX512-LABEL: ext_i4_4i32:
95; AVX512: # BB#0:
96; AVX512-NEXT: andb $15, %dil
97; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
98; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
99; AVX512-NEXT: kmovd %eax, %k1
100; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
101; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
102; AVX512-NEXT: vzeroupper
103; AVX512-NEXT: retq
104 %1 = bitcast i4 %a0 to <4 x i1>
105 %2 = zext <4 x i1> %1 to <4 x i32>
106 ret <4 x i32> %2
107}
108
109define <8 x i16> @ext_i8_8i16(i8 %a0) {
110; SSE2-SSSE3-LABEL: ext_i8_8i16:
111; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000112; SSE2-SSSE3-NEXT: movd %edi, %xmm0
113; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
114; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
115; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
116; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
117; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
118; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000119; SSE2-SSSE3-NEXT: retq
120;
Simon Pilgrima705db92017-09-24 13:42:31 +0000121; AVX1-LABEL: ext_i8_8i16:
122; AVX1: # BB#0:
123; AVX1-NEXT: vmovd %edi, %xmm0
124; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
125; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
126; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
127; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
128; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
129; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
130; AVX1-NEXT: retq
131;
132; AVX2-LABEL: ext_i8_8i16:
133; AVX2: # BB#0:
134; AVX2-NEXT: vmovd %edi, %xmm0
135; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
136; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
137; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
138; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
139; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
140; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000141;
142; AVX512-LABEL: ext_i8_8i16:
143; AVX512: # BB#0:
144; AVX512-NEXT: kmovd %edi, %k5
145; AVX512-NEXT: kshiftlw $8, %k5, %k0
146; AVX512-NEXT: kshiftrw $15, %k0, %k0
147; AVX512-NEXT: kshiftlw $9, %k5, %k1
148; AVX512-NEXT: kshiftrw $15, %k1, %k1
149; AVX512-NEXT: kshiftlw $10, %k5, %k2
150; AVX512-NEXT: kshiftrw $15, %k2, %k2
151; AVX512-NEXT: kshiftlw $11, %k5, %k3
152; AVX512-NEXT: kshiftrw $15, %k3, %k3
153; AVX512-NEXT: kshiftlw $12, %k5, %k4
154; AVX512-NEXT: kshiftrw $15, %k4, %k4
155; AVX512-NEXT: kshiftlw $13, %k5, %k6
156; AVX512-NEXT: kshiftrw $15, %k6, %k6
157; AVX512-NEXT: kshiftlw $15, %k5, %k7
158; AVX512-NEXT: kshiftrw $15, %k7, %k7
159; AVX512-NEXT: kshiftlw $14, %k5, %k5
160; AVX512-NEXT: kshiftrw $15, %k5, %k5
161; AVX512-NEXT: kmovd %k5, %eax
162; AVX512-NEXT: andl $1, %eax
163; AVX512-NEXT: kmovd %k7, %ecx
164; AVX512-NEXT: andl $1, %ecx
165; AVX512-NEXT: vmovd %ecx, %xmm0
166; AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
167; AVX512-NEXT: kmovd %k6, %eax
168; AVX512-NEXT: andl $1, %eax
169; AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
170; AVX512-NEXT: kmovd %k4, %eax
171; AVX512-NEXT: andl $1, %eax
172; AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
173; AVX512-NEXT: kmovd %k3, %eax
174; AVX512-NEXT: andl $1, %eax
175; AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
176; AVX512-NEXT: kmovd %k2, %eax
177; AVX512-NEXT: andl $1, %eax
178; AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
179; AVX512-NEXT: kmovd %k1, %eax
180; AVX512-NEXT: andl $1, %eax
181; AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
182; AVX512-NEXT: kmovd %k0, %eax
183; AVX512-NEXT: andl $1, %eax
184; AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
185; AVX512-NEXT: retq
186 %1 = bitcast i8 %a0 to <8 x i1>
187 %2 = zext <8 x i1> %1 to <8 x i16>
188 ret <8 x i16> %2
189}
190
191define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000192; SSE2-LABEL: ext_i16_16i8:
193; SSE2: # BB#0:
194; SSE2-NEXT: movd %edi, %xmm0
195; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
196; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
197; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
198; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
199; SSE2-NEXT: pand %xmm1, %xmm0
200; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
201; SSE2-NEXT: psrlw $7, %xmm0
202; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
203; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000204;
Simon Pilgrima705db92017-09-24 13:42:31 +0000205; SSSE3-LABEL: ext_i16_16i8:
206; SSSE3: # BB#0:
207; SSSE3-NEXT: movd %edi, %xmm0
208; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
209; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
210; SSSE3-NEXT: pand %xmm1, %xmm0
211; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
212; SSSE3-NEXT: psrlw $7, %xmm0
213; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
214; SSSE3-NEXT: retq
215;
216; AVX1-LABEL: ext_i16_16i8:
217; AVX1: # BB#0:
218; AVX1-NEXT: vmovd %edi, %xmm0
219; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
220; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
221; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
222; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
223; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
224; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
225; AVX1-NEXT: retq
226;
227; AVX2-LABEL: ext_i16_16i8:
228; AVX2: # BB#0:
229; AVX2-NEXT: vmovd %edi, %xmm0
230; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
231; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
232; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
233; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
234; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
235; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
236; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000237;
238; AVX512-LABEL: ext_i16_16i8:
239; AVX512: # BB#0:
240; AVX512-NEXT: pushq %rbp
241; AVX512-NEXT: .Lcfi0:
242; AVX512-NEXT: .cfi_def_cfa_offset 16
243; AVX512-NEXT: pushq %r15
244; AVX512-NEXT: .Lcfi1:
245; AVX512-NEXT: .cfi_def_cfa_offset 24
246; AVX512-NEXT: pushq %r14
247; AVX512-NEXT: .Lcfi2:
248; AVX512-NEXT: .cfi_def_cfa_offset 32
249; AVX512-NEXT: pushq %r13
250; AVX512-NEXT: .Lcfi3:
251; AVX512-NEXT: .cfi_def_cfa_offset 40
252; AVX512-NEXT: pushq %r12
253; AVX512-NEXT: .Lcfi4:
254; AVX512-NEXT: .cfi_def_cfa_offset 48
255; AVX512-NEXT: pushq %rbx
256; AVX512-NEXT: .Lcfi5:
257; AVX512-NEXT: .cfi_def_cfa_offset 56
258; AVX512-NEXT: .Lcfi6:
259; AVX512-NEXT: .cfi_offset %rbx, -56
260; AVX512-NEXT: .Lcfi7:
261; AVX512-NEXT: .cfi_offset %r12, -48
262; AVX512-NEXT: .Lcfi8:
263; AVX512-NEXT: .cfi_offset %r13, -40
264; AVX512-NEXT: .Lcfi9:
265; AVX512-NEXT: .cfi_offset %r14, -32
266; AVX512-NEXT: .Lcfi10:
267; AVX512-NEXT: .cfi_offset %r15, -24
268; AVX512-NEXT: .Lcfi11:
269; AVX512-NEXT: .cfi_offset %rbp, -16
270; AVX512-NEXT: kmovd %edi, %k0
271; AVX512-NEXT: kshiftlw $14, %k0, %k1
272; AVX512-NEXT: kshiftrw $15, %k1, %k1
273; AVX512-NEXT: kmovd %k1, %r8d
274; AVX512-NEXT: kshiftlw $15, %k0, %k1
275; AVX512-NEXT: kshiftrw $15, %k1, %k1
276; AVX512-NEXT: kmovd %k1, %r9d
277; AVX512-NEXT: kshiftlw $13, %k0, %k1
278; AVX512-NEXT: kshiftrw $15, %k1, %k1
279; AVX512-NEXT: kmovd %k1, %r10d
280; AVX512-NEXT: kshiftlw $12, %k0, %k1
281; AVX512-NEXT: kshiftrw $15, %k1, %k1
282; AVX512-NEXT: kmovd %k1, %r11d
283; AVX512-NEXT: kshiftlw $11, %k0, %k1
284; AVX512-NEXT: kshiftrw $15, %k1, %k1
285; AVX512-NEXT: kmovd %k1, %r14d
286; AVX512-NEXT: kshiftlw $10, %k0, %k1
287; AVX512-NEXT: kshiftrw $15, %k1, %k1
288; AVX512-NEXT: kmovd %k1, %r15d
289; AVX512-NEXT: kshiftlw $9, %k0, %k1
290; AVX512-NEXT: kshiftrw $15, %k1, %k1
291; AVX512-NEXT: kmovd %k1, %r12d
292; AVX512-NEXT: kshiftlw $8, %k0, %k1
293; AVX512-NEXT: kshiftrw $15, %k1, %k1
294; AVX512-NEXT: kmovd %k1, %r13d
295; AVX512-NEXT: kshiftlw $7, %k0, %k1
296; AVX512-NEXT: kshiftrw $15, %k1, %k1
297; AVX512-NEXT: kmovd %k1, %esi
298; AVX512-NEXT: kshiftlw $6, %k0, %k1
299; AVX512-NEXT: kshiftrw $15, %k1, %k1
300; AVX512-NEXT: kmovd %k1, %ebx
301; AVX512-NEXT: kshiftlw $5, %k0, %k1
302; AVX512-NEXT: kshiftrw $15, %k1, %k1
303; AVX512-NEXT: kmovd %k1, %ebp
304; AVX512-NEXT: kshiftlw $4, %k0, %k1
305; AVX512-NEXT: kshiftrw $15, %k1, %k1
306; AVX512-NEXT: kmovd %k1, %edi
307; AVX512-NEXT: kshiftlw $3, %k0, %k1
308; AVX512-NEXT: kshiftrw $15, %k1, %k1
309; AVX512-NEXT: kmovd %k1, %eax
310; AVX512-NEXT: kshiftlw $2, %k0, %k1
311; AVX512-NEXT: kshiftrw $15, %k1, %k1
312; AVX512-NEXT: kmovd %k1, %ecx
313; AVX512-NEXT: kshiftlw $1, %k0, %k1
314; AVX512-NEXT: kshiftrw $15, %k1, %k1
315; AVX512-NEXT: kmovd %k1, %edx
316; AVX512-NEXT: kshiftrw $15, %k0, %k0
317; AVX512-NEXT: vmovd %r9d, %xmm0
318; AVX512-NEXT: kmovd %k0, %r9d
319; AVX512-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
320; AVX512-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
321; AVX512-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
322; AVX512-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
323; AVX512-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
324; AVX512-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
325; AVX512-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
326; AVX512-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
327; AVX512-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
328; AVX512-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
329; AVX512-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0
330; AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
331; AVX512-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
332; AVX512-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0
333; AVX512-NEXT: vpinsrb $15, %r9d, %xmm0, %xmm0
334; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
335; AVX512-NEXT: popq %rbx
336; AVX512-NEXT: popq %r12
337; AVX512-NEXT: popq %r13
338; AVX512-NEXT: popq %r14
339; AVX512-NEXT: popq %r15
340; AVX512-NEXT: popq %rbp
341; AVX512-NEXT: retq
342 %1 = bitcast i16 %a0 to <16 x i1>
343 %2 = zext <16 x i1> %1 to <16 x i8>
344 ret <16 x i8> %2
345}
346
347;
348; 256-bit vectors
349;
350
351define <4 x i64> @ext_i4_4i64(i4 %a0) {
352; SSE2-SSSE3-LABEL: ext_i4_4i64:
353; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000354; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
355; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
356; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
357; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
358; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
359; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
360; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
361; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
362; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
363; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
364; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
365; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
366; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
367; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
368; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
369; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000370; SSE2-SSSE3-NEXT: retq
371;
372; AVX1-LABEL: ext_i4_4i64:
373; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000374; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
375; AVX1-NEXT: vmovq %rdi, %xmm0
376; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
377; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
378; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
379; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
380; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2
381; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
382; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
383; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
384; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
385; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
386; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
387; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
388; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000389; AVX1-NEXT: retq
390;
391; AVX2-LABEL: ext_i4_4i64:
392; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000393; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
394; AVX2-NEXT: vmovq %rdi, %xmm0
395; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
396; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
397; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
398; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
399; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000400; AVX2-NEXT: retq
401;
402; AVX512-LABEL: ext_i4_4i64:
403; AVX512: # BB#0:
404; AVX512-NEXT: andb $15, %dil
405; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
406; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
407; AVX512-NEXT: kmovd %eax, %k1
408; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
409; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
410; AVX512-NEXT: retq
411 %1 = bitcast i4 %a0 to <4 x i1>
412 %2 = zext <4 x i1> %1 to <4 x i64>
413 ret <4 x i64> %2
414}
415
416define <8 x i32> @ext_i8_8i32(i8 %a0) {
417; SSE2-SSSE3-LABEL: ext_i8_8i32:
418; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000419; SSE2-SSSE3-NEXT: movd %edi, %xmm0
420; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
421; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000422; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000423; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000424; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
425; SSE2-SSSE3-NEXT: psrld $31, %xmm0
426; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000427; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000428; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
429; SSE2-SSSE3-NEXT: psrld $31, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000430; SSE2-SSSE3-NEXT: retq
431;
432; AVX1-LABEL: ext_i8_8i32:
433; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000434; AVX1-NEXT: vmovd %edi, %xmm0
435; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
436; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000437; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000438; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
439; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2
440; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
441; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
442; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
443; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
444; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
445; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
446; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
447; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000448; AVX1-NEXT: retq
449;
450; AVX2-LABEL: ext_i8_8i32:
451; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000452; AVX2-NEXT: vmovd %edi, %xmm0
453; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
454; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000455; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000456; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
457; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000458; AVX2-NEXT: retq
459;
460; AVX512-LABEL: ext_i8_8i32:
461; AVX512: # BB#0:
462; AVX512-NEXT: kmovd %edi, %k1
463; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
464; AVX512-NEXT: retq
465 %1 = bitcast i8 %a0 to <8 x i1>
466 %2 = zext <8 x i1> %1 to <8 x i32>
467 ret <8 x i32> %2
468}
469
470define <16 x i16> @ext_i16_16i16(i16 %a0) {
471; SSE2-SSSE3-LABEL: ext_i16_16i16:
472; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000473; SSE2-SSSE3-NEXT: movd %edi, %xmm0
474; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
475; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
476; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000477; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000478; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000479; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
480; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
481; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000482; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000483; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
484; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000485; SSE2-SSSE3-NEXT: retq
486;
487; AVX1-LABEL: ext_i16_16i16:
488; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000489; AVX1-NEXT: vmovd %edi, %xmm0
490; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
491; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
492; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
493; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
494; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
495; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
496; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
497; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
498; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
499; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
500; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
501; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
502; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
503; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000504; AVX1-NEXT: retq
505;
506; AVX2-LABEL: ext_i16_16i16:
507; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000508; AVX2-NEXT: vmovd %edi, %xmm0
509; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
510; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
511; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
512; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
513; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000514; AVX2-NEXT: retq
515;
516; AVX512-LABEL: ext_i16_16i16:
517; AVX512: # BB#0:
518; AVX512-NEXT: kmovd %edi, %k1
519; AVX512-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z}
520; AVX512-NEXT: retq
521 %1 = bitcast i16 %a0 to <16 x i1>
522 %2 = zext <16 x i1> %1 to <16 x i16>
523 ret <16 x i16> %2
524}
525
526define <32 x i8> @ext_i32_32i8(i32 %a0) {
527; SSE2-SSSE3-LABEL: ext_i32_32i8:
528; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000529; SSE2-SSSE3-NEXT: movd %edi, %xmm1
530; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
531; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
532; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
533; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
534; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
535; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
536; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
537; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
538; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
539; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
540; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
541; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
542; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
543; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
544; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000545; SSE2-SSSE3-NEXT: retq
546;
547; AVX1-LABEL: ext_i32_32i8:
548; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000549; AVX1-NEXT: vmovd %edi, %xmm0
550; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
551; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
552; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
553; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
554; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000555; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000556; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
557; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
558; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
559; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
560; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
561; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
562; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
563; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
564; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
565; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
566; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
567; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
568; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
569; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000570; AVX1-NEXT: retq
571;
572; AVX2-LABEL: ext_i32_32i8:
573; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000574; AVX2-NEXT: vmovd %edi, %xmm0
575; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
576; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
577; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
578; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
579; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000580; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000581; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
582; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
583; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
584; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
585; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000586; AVX2-NEXT: retq
587;
588; AVX512-LABEL: ext_i32_32i8:
589; AVX512: # BB#0:
590; AVX512-NEXT: kmovd %edi, %k1
591; AVX512-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
592; AVX512-NEXT: retq
593 %1 = bitcast i32 %a0 to <32 x i1>
594 %2 = zext <32 x i1> %1 to <32 x i8>
595 ret <32 x i8> %2
596}
597
598;
599; 512-bit vectors
600;
601
602define <8 x i64> @ext_i8_8i64(i8 %a0) {
603; SSE2-SSSE3-LABEL: ext_i8_8i64:
604; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000605; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
606; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
607; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
608; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
609; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
610; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
611; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
612; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
613; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
614; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
615; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
616; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
617; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
618; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
619; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
620; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
621; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
622; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
623; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
624; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
625; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
626; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
627; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
628; SSE2-SSSE3-NEXT: psrlq $63, %xmm2
629; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
630; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
631; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
632; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000633; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000634; SSE2-SSSE3-NEXT: psrlq $63, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000635; SSE2-SSSE3-NEXT: retq
636;
637; AVX1-LABEL: ext_i8_8i64:
638; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000639; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
640; AVX1-NEXT: vmovq %rdi, %xmm0
641; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
642; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
643; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
644; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
645; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm3
646; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
647; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
648; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
649; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
650; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
651; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
652; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
653; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
654; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
655; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm3
656; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
657; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
658; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
659; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
660; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
661; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000662; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000663; AVX1-NEXT: retq
664;
665; AVX2-LABEL: ext_i8_8i64:
666; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000667; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
668; AVX2-NEXT: vmovq %rdi, %xmm0
669; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
670; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
671; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
672; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
673; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
674; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000675; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000676; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
677; AVX2-NEXT: vpsrlq $63, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000678; AVX2-NEXT: retq
679;
680; AVX512-LABEL: ext_i8_8i64:
681; AVX512: # BB#0:
682; AVX512-NEXT: kmovd %edi, %k1
683; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
684; AVX512-NEXT: retq
685 %1 = bitcast i8 %a0 to <8 x i1>
686 %2 = zext <8 x i1> %1 to <8 x i64>
687 ret <8 x i64> %2
688}
689
690define <16 x i32> @ext_i16_16i32(i16 %a0) {
691; SSE2-SSSE3-LABEL: ext_i16_16i32:
692; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000693; SSE2-SSSE3-NEXT: movd %edi, %xmm0
694; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
695; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
696; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
697; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
698; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
699; SSE2-SSSE3-NEXT: psrld $31, %xmm0
700; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000701; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000702; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
703; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
704; SSE2-SSSE3-NEXT: psrld $31, %xmm1
705; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000706; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000707; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000708; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
709; SSE2-SSSE3-NEXT: psrld $31, %xmm2
710; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000711; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000712; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
713; SSE2-SSSE3-NEXT: psrld $31, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000714; SSE2-SSSE3-NEXT: retq
715;
716; AVX1-LABEL: ext_i16_16i32:
717; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000718; AVX1-NEXT: vmovd %edi, %xmm0
719; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
720; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
721; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
722; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
723; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm3
724; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
725; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
726; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
727; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
728; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
729; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
730; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
731; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
732; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
733; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm3
734; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
735; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
736; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
737; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
738; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
739; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
740; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000741; AVX1-NEXT: retq
742;
743; AVX2-LABEL: ext_i16_16i32:
744; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000745; AVX2-NEXT: vmovd %edi, %xmm0
746; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
747; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
748; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
749; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
750; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
751; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000752; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000753; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
754; AVX2-NEXT: vpsrld $31, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000755; AVX2-NEXT: retq
756;
757; AVX512-LABEL: ext_i16_16i32:
758; AVX512: # BB#0:
759; AVX512-NEXT: kmovd %edi, %k1
760; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
761; AVX512-NEXT: retq
762 %1 = bitcast i16 %a0 to <16 x i1>
763 %2 = zext <16 x i1> %1 to <16 x i32>
764 ret <16 x i32> %2
765}
766
767define <32 x i16> @ext_i32_32i16(i32 %a0) {
768; SSE2-SSSE3-LABEL: ext_i32_32i16:
769; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000770; SSE2-SSSE3-NEXT: movd %edi, %xmm2
771; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
772; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
773; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000774; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000775; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000776; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
777; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
778; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
779; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
780; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
781; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
782; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
783; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000784; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000785; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000786; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
787; SSE2-SSSE3-NEXT: psrlw $15, %xmm2
788; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
789; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
790; SSE2-SSSE3-NEXT: psrlw $15, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000791; SSE2-SSSE3-NEXT: retq
792;
793; AVX1-LABEL: ext_i32_32i16:
794; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000795; AVX1-NEXT: vmovd %edi, %xmm1
796; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
797; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
798; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
799; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000800; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000801; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
802; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4
803; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
804; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
805; AVX1-NEXT: vpsrlw $15, %xmm4, %xmm4
806; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
807; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
808; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
809; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
810; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
811; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
812; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
813; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000814; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000815; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm2
816; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
817; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
818; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
819; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
820; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
821; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
822; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000823; AVX1-NEXT: retq
824;
825; AVX2-LABEL: ext_i32_32i16:
826; AVX2: # BB#0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000827; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000828; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
829; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
830; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
831; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
832; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
833; AVX2-NEXT: shrl $16, %edi
834; AVX2-NEXT: vmovd %edi, %xmm2
835; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
836; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
837; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
838; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000839; AVX2-NEXT: retq
840;
841; AVX512-LABEL: ext_i32_32i16:
842; AVX512: # BB#0:
843; AVX512-NEXT: kmovd %edi, %k1
844; AVX512-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
845; AVX512-NEXT: retq
846 %1 = bitcast i32 %a0 to <32 x i1>
847 %2 = zext <32 x i1> %1 to <32 x i16>
848 ret <32 x i16> %2
849}
850
851define <64 x i8> @ext_i64_64i8(i64 %a0) {
852; SSE2-SSSE3-LABEL: ext_i64_64i8:
853; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000854; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
855; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
856; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
857; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
858; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
859; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
860; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
861; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
862; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
863; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
864; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
865; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
866; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
867; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
868; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
869; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
870; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
871; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
872; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
873; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
874; SSE2-SSSE3-NEXT: psrlw $7, %xmm2
875; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2
876; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
877; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
878; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
879; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
880; SSE2-SSSE3-NEXT: psrlw $7, %xmm3
881; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000882; SSE2-SSSE3-NEXT: retq
883;
884; AVX1-LABEL: ext_i64_64i8:
885; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000886; AVX1-NEXT: vmovq %rdi, %xmm0
887; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
888; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
889; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
890; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
891; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
892; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
893; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
894; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
895; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
896; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
897; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
898; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
899; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
900; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
901; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
902; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
903; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
904; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
905; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
906; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
907; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
908; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
909; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
910; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
911; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
912; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
913; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
914; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
915; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
916; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
917; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm2
918; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
919; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
920; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
921; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
922; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
923; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000924; AVX1-NEXT: retq
925;
926; AVX2-LABEL: ext_i64_64i8:
927; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000928; AVX2-NEXT: vmovq %rdi, %xmm0
929; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
930; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
931; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
932; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
933; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
934; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
935; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
936; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
937; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
938; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
939; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
940; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
941; AVX2-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5]
942; AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
943; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
944; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
945; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm4, %ymm1
946; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
947; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
948; AVX2-NEXT: vpsrlw $7, %ymm1, %ymm1
949; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000950; AVX2-NEXT: retq
951;
952; AVX512-LABEL: ext_i64_64i8:
953; AVX512: # BB#0:
954; AVX512-NEXT: kmovq %rdi, %k1
955; AVX512-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
956; AVX512-NEXT: retq
957 %1 = bitcast i64 %a0 to <64 x i1>
958 %2 = zext <64 x i1> %1 to <64 x i8>
959 ret <64 x i8> %2
960}