blob: 6d4fd919062244a7f3e22bab102351a00eaf37b0 [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
Craig Topper5befc5b2017-11-28 01:36:31 +00006; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00008
9;
10; 128-bit vectors
11;
12
13define <2 x i64> @ext_i2_2i64(i2 %a0) {
14; SSE2-SSSE3-LABEL: ext_i2_2i64:
15; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000016; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
17; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
18; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
19; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
20; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
21; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
22; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
23; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
24; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000025; SSE2-SSSE3-NEXT: retq
26;
Simon Pilgrima705db92017-09-24 13:42:31 +000027; AVX1-LABEL: ext_i2_2i64:
28; AVX1: # BB#0:
29; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
30; AVX1-NEXT: vmovq %rdi, %xmm0
31; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
32; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
33; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
35; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
36; AVX1-NEXT: retq
37;
38; AVX2-LABEL: ext_i2_2i64:
39; AVX2: # BB#0:
40; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
41; AVX2-NEXT: vmovq %rdi, %xmm0
42; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
43; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
44; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
45; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
46; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0
47; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000048;
Craig Topper5befc5b2017-11-28 01:36:31 +000049; AVX512F-LABEL: ext_i2_2i64:
50; AVX512F: # BB#0:
51; AVX512F-NEXT: andb $3, %dil
52; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
53; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
54; AVX512F-NEXT: kmovw %eax, %k1
55; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
56; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
57; AVX512F-NEXT: vzeroupper
58; AVX512F-NEXT: retq
59;
60; AVX512VLBW-LABEL: ext_i2_2i64:
61; AVX512VLBW: # BB#0:
62; AVX512VLBW-NEXT: andb $3, %dil
63; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
64; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
65; AVX512VLBW-NEXT: kmovd %eax, %k1
66; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
67; AVX512VLBW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
68; AVX512VLBW-NEXT: vzeroupper
69; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000070 %1 = bitcast i2 %a0 to <2 x i1>
71 %2 = zext <2 x i1> %1 to <2 x i64>
72 ret <2 x i64> %2
73}
74
75define <4 x i32> @ext_i4_4i32(i4 %a0) {
76; SSE2-SSSE3-LABEL: ext_i4_4i32:
77; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000078; SSE2-SSSE3-NEXT: movd %edi, %xmm0
79; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
80; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
81; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
82; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
83; SSE2-SSSE3-NEXT: psrld $31, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000084; SSE2-SSSE3-NEXT: retq
85;
86; AVX1-LABEL: ext_i4_4i32:
87; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000088; AVX1-NEXT: vmovd %edi, %xmm0
89; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
90; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
91; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
92; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
93; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000094; AVX1-NEXT: retq
95;
96; AVX2-LABEL: ext_i4_4i32:
97; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000098; AVX2-NEXT: vmovd %edi, %xmm0
99; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
100; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000101; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000102; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
103; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000104; AVX2-NEXT: retq
105;
Craig Topper5befc5b2017-11-28 01:36:31 +0000106; AVX512F-LABEL: ext_i4_4i32:
107; AVX512F: # BB#0:
108; AVX512F-NEXT: andb $15, %dil
109; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
110; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
111; AVX512F-NEXT: kmovw %eax, %k1
112; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
113; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
114; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
115; AVX512F-NEXT: vzeroupper
116; AVX512F-NEXT: retq
117;
118; AVX512VLBW-LABEL: ext_i4_4i32:
119; AVX512VLBW: # BB#0:
120; AVX512VLBW-NEXT: andb $15, %dil
121; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
122; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
123; AVX512VLBW-NEXT: kmovd %eax, %k1
124; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
125; AVX512VLBW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
126; AVX512VLBW-NEXT: vzeroupper
127; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000128 %1 = bitcast i4 %a0 to <4 x i1>
129 %2 = zext <4 x i1> %1 to <4 x i32>
130 ret <4 x i32> %2
131}
132
133define <8 x i16> @ext_i8_8i16(i8 %a0) {
134; SSE2-SSSE3-LABEL: ext_i8_8i16:
135; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000136; SSE2-SSSE3-NEXT: movd %edi, %xmm0
137; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
138; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
139; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
140; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
141; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
142; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000143; SSE2-SSSE3-NEXT: retq
144;
Simon Pilgrima705db92017-09-24 13:42:31 +0000145; AVX1-LABEL: ext_i8_8i16:
146; AVX1: # BB#0:
147; AVX1-NEXT: vmovd %edi, %xmm0
148; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
149; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
150; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
151; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
152; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
153; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
154; AVX1-NEXT: retq
155;
156; AVX2-LABEL: ext_i8_8i16:
157; AVX2: # BB#0:
158; AVX2-NEXT: vmovd %edi, %xmm0
159; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
160; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
161; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
162; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
163; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
164; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000165;
Craig Topper5befc5b2017-11-28 01:36:31 +0000166; AVX512F-LABEL: ext_i8_8i16:
167; AVX512F: # BB#0:
Craig Topperddbc3402017-11-28 01:36:33 +0000168; AVX512F-NEXT: kmovw %edi, %k1
169; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
170; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
171; AVX512F-NEXT: vzeroupper
Craig Topper5befc5b2017-11-28 01:36:31 +0000172; AVX512F-NEXT: retq
173;
174; AVX512VLBW-LABEL: ext_i8_8i16:
175; AVX512VLBW: # BB#0:
Craig Topperddbc3402017-11-28 01:36:33 +0000176; AVX512VLBW-NEXT: kmovd %edi, %k1
177; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %xmm0 {%k1} {z}
Craig Topper5befc5b2017-11-28 01:36:31 +0000178; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000179 %1 = bitcast i8 %a0 to <8 x i1>
180 %2 = zext <8 x i1> %1 to <8 x i16>
181 ret <8 x i16> %2
182}
183
184define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000185; SSE2-LABEL: ext_i16_16i8:
186; SSE2: # BB#0:
187; SSE2-NEXT: movd %edi, %xmm0
188; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
189; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
190; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
191; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
192; SSE2-NEXT: pand %xmm1, %xmm0
193; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
194; SSE2-NEXT: psrlw $7, %xmm0
195; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
196; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000197;
Simon Pilgrima705db92017-09-24 13:42:31 +0000198; SSSE3-LABEL: ext_i16_16i8:
199; SSSE3: # BB#0:
200; SSSE3-NEXT: movd %edi, %xmm0
201; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
202; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
203; SSSE3-NEXT: pand %xmm1, %xmm0
204; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
205; SSSE3-NEXT: psrlw $7, %xmm0
206; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
207; SSSE3-NEXT: retq
208;
209; AVX1-LABEL: ext_i16_16i8:
210; AVX1: # BB#0:
211; AVX1-NEXT: vmovd %edi, %xmm0
212; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
213; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
214; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
215; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
216; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
217; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
218; AVX1-NEXT: retq
219;
220; AVX2-LABEL: ext_i16_16i8:
221; AVX2: # BB#0:
222; AVX2-NEXT: vmovd %edi, %xmm0
223; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
224; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
225; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
226; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
227; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
228; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
229; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000230;
Craig Topper5befc5b2017-11-28 01:36:31 +0000231; AVX512F-LABEL: ext_i16_16i8:
232; AVX512F: # BB#0:
Craig Topperddbc3402017-11-28 01:36:33 +0000233; AVX512F-NEXT: kmovw %edi, %k1
234; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
235; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
236; AVX512F-NEXT: vzeroupper
Craig Topper5befc5b2017-11-28 01:36:31 +0000237; AVX512F-NEXT: retq
238;
239; AVX512VLBW-LABEL: ext_i16_16i8:
240; AVX512VLBW: # BB#0:
Craig Topperddbc3402017-11-28 01:36:33 +0000241; AVX512VLBW-NEXT: kmovd %edi, %k1
242; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %xmm0 {%k1} {z}
Craig Topper5befc5b2017-11-28 01:36:31 +0000243; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000244 %1 = bitcast i16 %a0 to <16 x i1>
245 %2 = zext <16 x i1> %1 to <16 x i8>
246 ret <16 x i8> %2
247}
248
249;
250; 256-bit vectors
251;
252
253define <4 x i64> @ext_i4_4i64(i4 %a0) {
254; SSE2-SSSE3-LABEL: ext_i4_4i64:
255; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000256; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
257; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
258; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
259; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
260; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
261; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
262; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
263; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
264; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
265; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
266; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
267; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
268; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
269; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
270; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
271; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000272; SSE2-SSSE3-NEXT: retq
273;
274; AVX1-LABEL: ext_i4_4i64:
275; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000276; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
277; AVX1-NEXT: vmovq %rdi, %xmm0
278; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
279; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
280; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
281; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
282; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2
283; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
284; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
285; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
286; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
287; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
288; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
289; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
290; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000291; AVX1-NEXT: retq
292;
293; AVX2-LABEL: ext_i4_4i64:
294; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000295; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
296; AVX2-NEXT: vmovq %rdi, %xmm0
297; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
298; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
299; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
300; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
301; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000302; AVX2-NEXT: retq
303;
Craig Topper5befc5b2017-11-28 01:36:31 +0000304; AVX512F-LABEL: ext_i4_4i64:
305; AVX512F: # BB#0:
306; AVX512F-NEXT: andb $15, %dil
307; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
308; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
309; AVX512F-NEXT: kmovw %eax, %k1
310; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
311; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
312; AVX512F-NEXT: retq
313;
314; AVX512VLBW-LABEL: ext_i4_4i64:
315; AVX512VLBW: # BB#0:
316; AVX512VLBW-NEXT: andb $15, %dil
317; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
318; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
319; AVX512VLBW-NEXT: kmovd %eax, %k1
320; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
321; AVX512VLBW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
322; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000323 %1 = bitcast i4 %a0 to <4 x i1>
324 %2 = zext <4 x i1> %1 to <4 x i64>
325 ret <4 x i64> %2
326}
327
328define <8 x i32> @ext_i8_8i32(i8 %a0) {
329; SSE2-SSSE3-LABEL: ext_i8_8i32:
330; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000331; SSE2-SSSE3-NEXT: movd %edi, %xmm0
332; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
333; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000334; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000335; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000336; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
337; SSE2-SSSE3-NEXT: psrld $31, %xmm0
338; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000339; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000340; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
341; SSE2-SSSE3-NEXT: psrld $31, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000342; SSE2-SSSE3-NEXT: retq
343;
344; AVX1-LABEL: ext_i8_8i32:
345; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000346; AVX1-NEXT: vmovd %edi, %xmm0
347; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
348; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000349; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000350; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
351; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2
352; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
353; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
354; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
355; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
356; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
357; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
358; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
359; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000360; AVX1-NEXT: retq
361;
362; AVX2-LABEL: ext_i8_8i32:
363; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000364; AVX2-NEXT: vmovd %edi, %xmm0
365; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
366; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000367; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000368; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
369; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000370; AVX2-NEXT: retq
371;
Craig Topper5befc5b2017-11-28 01:36:31 +0000372; AVX512F-LABEL: ext_i8_8i32:
373; AVX512F: # BB#0:
374; AVX512F-NEXT: kmovw %edi, %k1
375; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
376; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
377; AVX512F-NEXT: retq
378;
379; AVX512VLBW-LABEL: ext_i8_8i32:
380; AVX512VLBW: # BB#0:
381; AVX512VLBW-NEXT: kmovd %edi, %k1
382; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
383; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000384 %1 = bitcast i8 %a0 to <8 x i1>
385 %2 = zext <8 x i1> %1 to <8 x i32>
386 ret <8 x i32> %2
387}
388
389define <16 x i16> @ext_i16_16i16(i16 %a0) {
390; SSE2-SSSE3-LABEL: ext_i16_16i16:
391; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000392; SSE2-SSSE3-NEXT: movd %edi, %xmm0
393; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
394; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
395; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000396; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000397; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000398; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
399; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
400; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000401; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000402; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
403; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000404; SSE2-SSSE3-NEXT: retq
405;
406; AVX1-LABEL: ext_i16_16i16:
407; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000408; AVX1-NEXT: vmovd %edi, %xmm0
409; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
410; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
411; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
412; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
413; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
414; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
415; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
416; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
417; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
418; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
419; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
420; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
421; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
422; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000423; AVX1-NEXT: retq
424;
425; AVX2-LABEL: ext_i16_16i16:
426; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000427; AVX2-NEXT: vmovd %edi, %xmm0
428; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
429; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
430; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
431; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
432; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000433; AVX2-NEXT: retq
434;
Craig Topper5befc5b2017-11-28 01:36:31 +0000435; AVX512F-LABEL: ext_i16_16i16:
436; AVX512F: # BB#0:
437; AVX512F-NEXT: kmovw %edi, %k1
438; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
439; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
440; AVX512F-NEXT: retq
441;
442; AVX512VLBW-LABEL: ext_i16_16i16:
443; AVX512VLBW: # BB#0:
444; AVX512VLBW-NEXT: kmovd %edi, %k1
445; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z}
446; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000447 %1 = bitcast i16 %a0 to <16 x i1>
448 %2 = zext <16 x i1> %1 to <16 x i16>
449 ret <16 x i16> %2
450}
451
452define <32 x i8> @ext_i32_32i8(i32 %a0) {
453; SSE2-SSSE3-LABEL: ext_i32_32i8:
454; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000455; SSE2-SSSE3-NEXT: movd %edi, %xmm1
456; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
457; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
458; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
459; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
460; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
461; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
462; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
463; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
464; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
465; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
466; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
467; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
468; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
469; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
470; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000471; SSE2-SSSE3-NEXT: retq
472;
473; AVX1-LABEL: ext_i32_32i8:
474; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000475; AVX1-NEXT: vmovd %edi, %xmm0
476; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
477; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
478; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
479; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
480; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000481; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000482; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
483; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
484; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
485; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
486; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
487; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
488; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
489; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
490; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
491; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
492; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
493; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
494; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
495; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000496; AVX1-NEXT: retq
497;
498; AVX2-LABEL: ext_i32_32i8:
499; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000500; AVX2-NEXT: vmovd %edi, %xmm0
501; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
502; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
503; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
504; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
505; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000506; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000507; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
508; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
509; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
510; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
511; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000512; AVX2-NEXT: retq
513;
Craig Topper5befc5b2017-11-28 01:36:31 +0000514; AVX512F-LABEL: ext_i32_32i8:
515; AVX512F: # BB#0:
516; AVX512F-NEXT: pushq %rbp
517; AVX512F-NEXT: .cfi_def_cfa_offset 16
518; AVX512F-NEXT: .cfi_offset %rbp, -16
519; AVX512F-NEXT: movq %rsp, %rbp
520; AVX512F-NEXT: .cfi_def_cfa_register %rbp
521; AVX512F-NEXT: andq $-32, %rsp
522; AVX512F-NEXT: subq $32, %rsp
523; AVX512F-NEXT: movl %edi, (%rsp)
Craig Topperddbc3402017-11-28 01:36:33 +0000524; AVX512F-NEXT: movl {{.*}}(%rip), %eax
525; AVX512F-NEXT: kmovw (%rsp), %k1
526; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
527; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
528; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
529; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z}
530; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
531; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000532; AVX512F-NEXT: movq %rbp, %rsp
533; AVX512F-NEXT: popq %rbp
534; AVX512F-NEXT: retq
535;
536; AVX512VLBW-LABEL: ext_i32_32i8:
537; AVX512VLBW: # BB#0:
538; AVX512VLBW-NEXT: kmovd %edi, %k1
539; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
540; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000541 %1 = bitcast i32 %a0 to <32 x i1>
542 %2 = zext <32 x i1> %1 to <32 x i8>
543 ret <32 x i8> %2
544}
545
546;
547; 512-bit vectors
548;
549
550define <8 x i64> @ext_i8_8i64(i8 %a0) {
551; SSE2-SSSE3-LABEL: ext_i8_8i64:
552; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000553; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
554; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
555; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
556; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
557; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
558; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
559; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
560; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
561; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
562; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
563; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
564; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
565; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
566; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
567; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
568; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
569; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
570; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
571; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
572; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
573; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
574; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
575; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
576; SSE2-SSSE3-NEXT: psrlq $63, %xmm2
577; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
578; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
579; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
580; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000581; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000582; SSE2-SSSE3-NEXT: psrlq $63, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000583; SSE2-SSSE3-NEXT: retq
584;
585; AVX1-LABEL: ext_i8_8i64:
586; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000587; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
588; AVX1-NEXT: vmovq %rdi, %xmm0
589; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
590; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
591; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
592; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
593; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm3
594; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
595; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
596; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
597; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
598; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
599; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
600; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
601; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
602; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
603; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm3
604; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
605; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
606; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
607; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
608; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
609; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000610; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000611; AVX1-NEXT: retq
612;
613; AVX2-LABEL: ext_i8_8i64:
614; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000615; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
616; AVX2-NEXT: vmovq %rdi, %xmm0
617; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
618; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
619; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
620; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
621; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
622; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000623; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000624; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
625; AVX2-NEXT: vpsrlq $63, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000626; AVX2-NEXT: retq
627;
Craig Topper5befc5b2017-11-28 01:36:31 +0000628; AVX512F-LABEL: ext_i8_8i64:
629; AVX512F: # BB#0:
630; AVX512F-NEXT: kmovw %edi, %k1
631; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
632; AVX512F-NEXT: retq
633;
634; AVX512VLBW-LABEL: ext_i8_8i64:
635; AVX512VLBW: # BB#0:
636; AVX512VLBW-NEXT: kmovd %edi, %k1
637; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
638; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000639 %1 = bitcast i8 %a0 to <8 x i1>
640 %2 = zext <8 x i1> %1 to <8 x i64>
641 ret <8 x i64> %2
642}
643
644define <16 x i32> @ext_i16_16i32(i16 %a0) {
645; SSE2-SSSE3-LABEL: ext_i16_16i32:
646; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000647; SSE2-SSSE3-NEXT: movd %edi, %xmm0
648; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
649; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
650; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
651; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
652; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
653; SSE2-SSSE3-NEXT: psrld $31, %xmm0
654; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000655; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000656; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
657; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
658; SSE2-SSSE3-NEXT: psrld $31, %xmm1
659; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000660; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000661; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000662; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
663; SSE2-SSSE3-NEXT: psrld $31, %xmm2
664; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000665; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000666; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
667; SSE2-SSSE3-NEXT: psrld $31, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000668; SSE2-SSSE3-NEXT: retq
669;
670; AVX1-LABEL: ext_i16_16i32:
671; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000672; AVX1-NEXT: vmovd %edi, %xmm0
673; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
674; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
675; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
676; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
677; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm3
678; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
679; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
680; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
681; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
682; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
683; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
684; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
685; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
686; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
687; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm3
688; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
689; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
690; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
691; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
692; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
693; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
694; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000695; AVX1-NEXT: retq
696;
697; AVX2-LABEL: ext_i16_16i32:
698; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000699; AVX2-NEXT: vmovd %edi, %xmm0
700; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
701; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
702; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
703; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
704; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
705; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000706; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000707; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
708; AVX2-NEXT: vpsrld $31, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000709; AVX2-NEXT: retq
710;
Craig Topper5befc5b2017-11-28 01:36:31 +0000711; AVX512F-LABEL: ext_i16_16i32:
712; AVX512F: # BB#0:
713; AVX512F-NEXT: kmovw %edi, %k1
714; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
715; AVX512F-NEXT: retq
716;
717; AVX512VLBW-LABEL: ext_i16_16i32:
718; AVX512VLBW: # BB#0:
719; AVX512VLBW-NEXT: kmovd %edi, %k1
720; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
721; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000722 %1 = bitcast i16 %a0 to <16 x i1>
723 %2 = zext <16 x i1> %1 to <16 x i32>
724 ret <16 x i32> %2
725}
726
727define <32 x i16> @ext_i32_32i16(i32 %a0) {
728; SSE2-SSSE3-LABEL: ext_i32_32i16:
729; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000730; SSE2-SSSE3-NEXT: movd %edi, %xmm2
731; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
732; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
733; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000734; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000735; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000736; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
737; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
738; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
739; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
740; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
741; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
742; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
743; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000744; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000745; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000746; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
747; SSE2-SSSE3-NEXT: psrlw $15, %xmm2
748; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
749; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
750; SSE2-SSSE3-NEXT: psrlw $15, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000751; SSE2-SSSE3-NEXT: retq
752;
753; AVX1-LABEL: ext_i32_32i16:
754; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000755; AVX1-NEXT: vmovd %edi, %xmm1
756; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
757; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
758; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
759; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000760; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000761; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
762; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4
763; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
764; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
765; AVX1-NEXT: vpsrlw $15, %xmm4, %xmm4
766; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
767; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
768; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
769; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
770; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
771; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
772; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
773; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000774; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000775; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm2
776; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
777; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
778; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
779; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
780; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
781; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
782; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000783; AVX1-NEXT: retq
784;
785; AVX2-LABEL: ext_i32_32i16:
786; AVX2: # BB#0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000787; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000788; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
789; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
790; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
791; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
792; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
793; AVX2-NEXT: shrl $16, %edi
794; AVX2-NEXT: vmovd %edi, %xmm2
795; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
796; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
797; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
798; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000799; AVX2-NEXT: retq
800;
Craig Topper5befc5b2017-11-28 01:36:31 +0000801; AVX512F-LABEL: ext_i32_32i16:
802; AVX512F: # BB#0:
803; AVX512F-NEXT: pushq %rbp
804; AVX512F-NEXT: .cfi_def_cfa_offset 16
805; AVX512F-NEXT: .cfi_offset %rbp, -16
806; AVX512F-NEXT: movq %rsp, %rbp
807; AVX512F-NEXT: .cfi_def_cfa_register %rbp
808; AVX512F-NEXT: andq $-32, %rsp
809; AVX512F-NEXT: subq $32, %rsp
810; AVX512F-NEXT: movl %edi, (%rsp)
811; AVX512F-NEXT: kmovw (%rsp), %k1
812; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
813; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
814; AVX512F-NEXT: vpmovdb %zmm0, %xmm1
815; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
816; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
817; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
818; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
819; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
820; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
821; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
822; AVX512F-NEXT: movq %rbp, %rsp
823; AVX512F-NEXT: popq %rbp
824; AVX512F-NEXT: retq
825;
826; AVX512VLBW-LABEL: ext_i32_32i16:
827; AVX512VLBW: # BB#0:
828; AVX512VLBW-NEXT: kmovd %edi, %k1
829; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
830; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000831 %1 = bitcast i32 %a0 to <32 x i1>
832 %2 = zext <32 x i1> %1 to <32 x i16>
833 ret <32 x i16> %2
834}
835
836define <64 x i8> @ext_i64_64i8(i64 %a0) {
837; SSE2-SSSE3-LABEL: ext_i64_64i8:
838; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000839; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
840; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
841; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
842; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
843; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
844; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
845; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
846; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
847; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
848; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
849; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
850; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
851; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
852; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
853; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
854; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
855; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
856; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
857; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
858; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
859; SSE2-SSSE3-NEXT: psrlw $7, %xmm2
860; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2
861; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
862; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
863; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
864; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
865; SSE2-SSSE3-NEXT: psrlw $7, %xmm3
866; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000867; SSE2-SSSE3-NEXT: retq
868;
869; AVX1-LABEL: ext_i64_64i8:
870; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000871; AVX1-NEXT: vmovq %rdi, %xmm0
872; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
873; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
874; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
875; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
876; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
877; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
878; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
879; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
880; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
881; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
882; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
883; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
884; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
885; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
886; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
887; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
888; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
889; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
890; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
891; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
892; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
893; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
894; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
895; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
896; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
897; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
898; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
899; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
900; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
901; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
902; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm2
903; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
904; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
905; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
906; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
907; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
908; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000909; AVX1-NEXT: retq
910;
911; AVX2-LABEL: ext_i64_64i8:
912; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000913; AVX2-NEXT: vmovq %rdi, %xmm0
914; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
915; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
916; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
917; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
918; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
919; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
920; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
921; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
922; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
923; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
924; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
925; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
926; AVX2-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5]
927; AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
928; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
929; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
930; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm4, %ymm1
931; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
932; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
933; AVX2-NEXT: vpsrlw $7, %ymm1, %ymm1
934; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000935; AVX2-NEXT: retq
936;
Craig Topper5befc5b2017-11-28 01:36:31 +0000937; AVX512F-LABEL: ext_i64_64i8:
938; AVX512F: # BB#0:
939; AVX512F-NEXT: pushq %rbp
940; AVX512F-NEXT: .cfi_def_cfa_offset 16
941; AVX512F-NEXT: .cfi_offset %rbp, -16
942; AVX512F-NEXT: movq %rsp, %rbp
943; AVX512F-NEXT: .cfi_def_cfa_register %rbp
944; AVX512F-NEXT: andq $-32, %rsp
945; AVX512F-NEXT: subq $64, %rsp
946; AVX512F-NEXT: movl %edi, (%rsp)
947; AVX512F-NEXT: shrq $32, %rdi
948; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
Craig Topperddbc3402017-11-28 01:36:33 +0000949; AVX512F-NEXT: movl {{.*}}(%rip), %eax
950; AVX512F-NEXT: kmovw (%rsp), %k1
951; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
952; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
953; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
954; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z}
955; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
956; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000957; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
Craig Topperddbc3402017-11-28 01:36:33 +0000958; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
959; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
960; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
961; AVX512F-NEXT: vpbroadcastd %eax, %zmm2 {%k2} {z}
962; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
963; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
Craig Topper5befc5b2017-11-28 01:36:31 +0000964; AVX512F-NEXT: movq %rbp, %rsp
965; AVX512F-NEXT: popq %rbp
966; AVX512F-NEXT: retq
967;
968; AVX512VLBW-LABEL: ext_i64_64i8:
969; AVX512VLBW: # BB#0:
970; AVX512VLBW-NEXT: kmovq %rdi, %k1
971; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
972; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000973 %1 = bitcast i64 %a0 to <64 x i1>
974 %2 = zext <64 x i1> %1 to <64 x i8>
975 ret <64 x i8> %2
976}