blob: 356e9b4a5be28a7fa69c3d89b3b6b74ca3f4723f [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
Craig Topper5befc5b2017-11-28 01:36:31 +00006; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00008
9;
10; 128-bit vectors
11;
12
13define <2 x i64> @ext_i2_2i64(i2 %a0) {
14; SSE2-SSSE3-LABEL: ext_i2_2i64:
15; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000016; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
17; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
18; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
19; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
20; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
21; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
22; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
23; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
24; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000025; SSE2-SSSE3-NEXT: retq
26;
Simon Pilgrima705db92017-09-24 13:42:31 +000027; AVX1-LABEL: ext_i2_2i64:
28; AVX1: # BB#0:
29; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
30; AVX1-NEXT: vmovq %rdi, %xmm0
31; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
32; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
33; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
34; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
35; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
36; AVX1-NEXT: retq
37;
38; AVX2-LABEL: ext_i2_2i64:
39; AVX2: # BB#0:
40; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
41; AVX2-NEXT: vmovq %rdi, %xmm0
42; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
43; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
44; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
45; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
46; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0
47; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000048;
Craig Topper5befc5b2017-11-28 01:36:31 +000049; AVX512F-LABEL: ext_i2_2i64:
50; AVX512F: # BB#0:
51; AVX512F-NEXT: andb $3, %dil
52; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
53; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
54; AVX512F-NEXT: kmovw %eax, %k1
55; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
56; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
57; AVX512F-NEXT: vzeroupper
58; AVX512F-NEXT: retq
59;
60; AVX512VLBW-LABEL: ext_i2_2i64:
61; AVX512VLBW: # BB#0:
62; AVX512VLBW-NEXT: andb $3, %dil
63; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
64; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
65; AVX512VLBW-NEXT: kmovd %eax, %k1
66; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
67; AVX512VLBW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
68; AVX512VLBW-NEXT: vzeroupper
69; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000070 %1 = bitcast i2 %a0 to <2 x i1>
71 %2 = zext <2 x i1> %1 to <2 x i64>
72 ret <2 x i64> %2
73}
74
75define <4 x i32> @ext_i4_4i32(i4 %a0) {
76; SSE2-SSSE3-LABEL: ext_i4_4i32:
77; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000078; SSE2-SSSE3-NEXT: movd %edi, %xmm0
79; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
80; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
81; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
82; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
83; SSE2-SSSE3-NEXT: psrld $31, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000084; SSE2-SSSE3-NEXT: retq
85;
86; AVX1-LABEL: ext_i4_4i32:
87; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000088; AVX1-NEXT: vmovd %edi, %xmm0
89; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
90; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
91; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
92; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
93; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000094; AVX1-NEXT: retq
95;
96; AVX2-LABEL: ext_i4_4i32:
97; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000098; AVX2-NEXT: vmovd %edi, %xmm0
99; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
100; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000101; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000102; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
103; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000104; AVX2-NEXT: retq
105;
Craig Topper5befc5b2017-11-28 01:36:31 +0000106; AVX512F-LABEL: ext_i4_4i32:
107; AVX512F: # BB#0:
108; AVX512F-NEXT: andb $15, %dil
109; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
110; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
111; AVX512F-NEXT: kmovw %eax, %k1
112; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
113; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
114; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
115; AVX512F-NEXT: vzeroupper
116; AVX512F-NEXT: retq
117;
118; AVX512VLBW-LABEL: ext_i4_4i32:
119; AVX512VLBW: # BB#0:
120; AVX512VLBW-NEXT: andb $15, %dil
121; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
122; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
123; AVX512VLBW-NEXT: kmovd %eax, %k1
124; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
125; AVX512VLBW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
126; AVX512VLBW-NEXT: vzeroupper
127; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000128 %1 = bitcast i4 %a0 to <4 x i1>
129 %2 = zext <4 x i1> %1 to <4 x i32>
130 ret <4 x i32> %2
131}
132
133define <8 x i16> @ext_i8_8i16(i8 %a0) {
134; SSE2-SSSE3-LABEL: ext_i8_8i16:
135; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000136; SSE2-SSSE3-NEXT: movd %edi, %xmm0
137; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
138; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
139; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
140; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
141; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
142; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000143; SSE2-SSSE3-NEXT: retq
144;
Simon Pilgrima705db92017-09-24 13:42:31 +0000145; AVX1-LABEL: ext_i8_8i16:
146; AVX1: # BB#0:
147; AVX1-NEXT: vmovd %edi, %xmm0
148; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
149; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
150; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
151; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
152; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
153; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
154; AVX1-NEXT: retq
155;
156; AVX2-LABEL: ext_i8_8i16:
157; AVX2: # BB#0:
158; AVX2-NEXT: vmovd %edi, %xmm0
159; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
160; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
161; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
162; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
163; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
164; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000165;
Craig Topper5befc5b2017-11-28 01:36:31 +0000166; AVX512F-LABEL: ext_i8_8i16:
167; AVX512F: # BB#0:
168; AVX512F-NEXT: kmovw %edi, %k5
169; AVX512F-NEXT: kshiftlw $8, %k5, %k0
170; AVX512F-NEXT: kshiftrw $15, %k0, %k0
171; AVX512F-NEXT: kshiftlw $9, %k5, %k1
172; AVX512F-NEXT: kshiftrw $15, %k1, %k1
173; AVX512F-NEXT: kshiftlw $10, %k5, %k2
174; AVX512F-NEXT: kshiftrw $15, %k2, %k2
175; AVX512F-NEXT: kshiftlw $11, %k5, %k3
176; AVX512F-NEXT: kshiftrw $15, %k3, %k3
177; AVX512F-NEXT: kshiftlw $12, %k5, %k4
178; AVX512F-NEXT: kshiftrw $15, %k4, %k4
179; AVX512F-NEXT: kshiftlw $13, %k5, %k6
180; AVX512F-NEXT: kshiftrw $15, %k6, %k6
181; AVX512F-NEXT: kshiftlw $15, %k5, %k7
182; AVX512F-NEXT: kshiftrw $15, %k7, %k7
183; AVX512F-NEXT: kshiftlw $14, %k5, %k5
184; AVX512F-NEXT: kshiftrw $15, %k5, %k5
185; AVX512F-NEXT: kmovw %k5, %eax
186; AVX512F-NEXT: andl $1, %eax
187; AVX512F-NEXT: kmovw %k7, %ecx
188; AVX512F-NEXT: andl $1, %ecx
189; AVX512F-NEXT: vmovd %ecx, %xmm0
190; AVX512F-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
191; AVX512F-NEXT: kmovw %k6, %eax
192; AVX512F-NEXT: andl $1, %eax
193; AVX512F-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
194; AVX512F-NEXT: kmovw %k4, %eax
195; AVX512F-NEXT: andl $1, %eax
196; AVX512F-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
197; AVX512F-NEXT: kmovw %k3, %eax
198; AVX512F-NEXT: andl $1, %eax
199; AVX512F-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
200; AVX512F-NEXT: kmovw %k2, %eax
201; AVX512F-NEXT: andl $1, %eax
202; AVX512F-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
203; AVX512F-NEXT: kmovw %k1, %eax
204; AVX512F-NEXT: andl $1, %eax
205; AVX512F-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
206; AVX512F-NEXT: kmovw %k0, %eax
207; AVX512F-NEXT: andl $1, %eax
208; AVX512F-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
209; AVX512F-NEXT: retq
210;
211; AVX512VLBW-LABEL: ext_i8_8i16:
212; AVX512VLBW: # BB#0:
213; AVX512VLBW-NEXT: kmovd %edi, %k5
214; AVX512VLBW-NEXT: kshiftlw $8, %k5, %k0
215; AVX512VLBW-NEXT: kshiftrw $15, %k0, %k0
216; AVX512VLBW-NEXT: kshiftlw $9, %k5, %k1
217; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
218; AVX512VLBW-NEXT: kshiftlw $10, %k5, %k2
219; AVX512VLBW-NEXT: kshiftrw $15, %k2, %k2
220; AVX512VLBW-NEXT: kshiftlw $11, %k5, %k3
221; AVX512VLBW-NEXT: kshiftrw $15, %k3, %k3
222; AVX512VLBW-NEXT: kshiftlw $12, %k5, %k4
223; AVX512VLBW-NEXT: kshiftrw $15, %k4, %k4
224; AVX512VLBW-NEXT: kshiftlw $13, %k5, %k6
225; AVX512VLBW-NEXT: kshiftrw $15, %k6, %k6
226; AVX512VLBW-NEXT: kshiftlw $15, %k5, %k7
227; AVX512VLBW-NEXT: kshiftrw $15, %k7, %k7
228; AVX512VLBW-NEXT: kshiftlw $14, %k5, %k5
229; AVX512VLBW-NEXT: kshiftrw $15, %k5, %k5
230; AVX512VLBW-NEXT: kmovd %k5, %eax
231; AVX512VLBW-NEXT: andl $1, %eax
232; AVX512VLBW-NEXT: kmovd %k7, %ecx
233; AVX512VLBW-NEXT: andl $1, %ecx
234; AVX512VLBW-NEXT: vmovd %ecx, %xmm0
235; AVX512VLBW-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
236; AVX512VLBW-NEXT: kmovd %k6, %eax
237; AVX512VLBW-NEXT: andl $1, %eax
238; AVX512VLBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
239; AVX512VLBW-NEXT: kmovd %k4, %eax
240; AVX512VLBW-NEXT: andl $1, %eax
241; AVX512VLBW-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
242; AVX512VLBW-NEXT: kmovd %k3, %eax
243; AVX512VLBW-NEXT: andl $1, %eax
244; AVX512VLBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
245; AVX512VLBW-NEXT: kmovd %k2, %eax
246; AVX512VLBW-NEXT: andl $1, %eax
247; AVX512VLBW-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
248; AVX512VLBW-NEXT: kmovd %k1, %eax
249; AVX512VLBW-NEXT: andl $1, %eax
250; AVX512VLBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
251; AVX512VLBW-NEXT: kmovd %k0, %eax
252; AVX512VLBW-NEXT: andl $1, %eax
253; AVX512VLBW-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
254; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000255 %1 = bitcast i8 %a0 to <8 x i1>
256 %2 = zext <8 x i1> %1 to <8 x i16>
257 ret <8 x i16> %2
258}
259
260define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000261; SSE2-LABEL: ext_i16_16i8:
262; SSE2: # BB#0:
263; SSE2-NEXT: movd %edi, %xmm0
264; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
265; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
266; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
267; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
268; SSE2-NEXT: pand %xmm1, %xmm0
269; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
270; SSE2-NEXT: psrlw $7, %xmm0
271; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
272; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000273;
Simon Pilgrima705db92017-09-24 13:42:31 +0000274; SSSE3-LABEL: ext_i16_16i8:
275; SSSE3: # BB#0:
276; SSSE3-NEXT: movd %edi, %xmm0
277; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
278; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
279; SSSE3-NEXT: pand %xmm1, %xmm0
280; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
281; SSSE3-NEXT: psrlw $7, %xmm0
282; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
283; SSSE3-NEXT: retq
284;
285; AVX1-LABEL: ext_i16_16i8:
286; AVX1: # BB#0:
287; AVX1-NEXT: vmovd %edi, %xmm0
288; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
289; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
290; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
291; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
292; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
293; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
294; AVX1-NEXT: retq
295;
296; AVX2-LABEL: ext_i16_16i8:
297; AVX2: # BB#0:
298; AVX2-NEXT: vmovd %edi, %xmm0
299; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
300; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
301; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
302; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
303; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
304; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
305; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000306;
Craig Topper5befc5b2017-11-28 01:36:31 +0000307; AVX512F-LABEL: ext_i16_16i8:
308; AVX512F: # BB#0:
309; AVX512F-NEXT: pushq %rbp
310; AVX512F-NEXT: .cfi_def_cfa_offset 16
311; AVX512F-NEXT: pushq %r15
312; AVX512F-NEXT: .cfi_def_cfa_offset 24
313; AVX512F-NEXT: pushq %r14
314; AVX512F-NEXT: .cfi_def_cfa_offset 32
315; AVX512F-NEXT: pushq %r13
316; AVX512F-NEXT: .cfi_def_cfa_offset 40
317; AVX512F-NEXT: pushq %r12
318; AVX512F-NEXT: .cfi_def_cfa_offset 48
319; AVX512F-NEXT: pushq %rbx
320; AVX512F-NEXT: .cfi_def_cfa_offset 56
321; AVX512F-NEXT: .cfi_offset %rbx, -56
322; AVX512F-NEXT: .cfi_offset %r12, -48
323; AVX512F-NEXT: .cfi_offset %r13, -40
324; AVX512F-NEXT: .cfi_offset %r14, -32
325; AVX512F-NEXT: .cfi_offset %r15, -24
326; AVX512F-NEXT: .cfi_offset %rbp, -16
327; AVX512F-NEXT: kmovw %edi, %k0
328; AVX512F-NEXT: kshiftlw $14, %k0, %k1
329; AVX512F-NEXT: kshiftrw $15, %k1, %k1
330; AVX512F-NEXT: kmovw %k1, %r8d
331; AVX512F-NEXT: kshiftlw $15, %k0, %k1
332; AVX512F-NEXT: kshiftrw $15, %k1, %k1
333; AVX512F-NEXT: kmovw %k1, %r9d
334; AVX512F-NEXT: kshiftlw $13, %k0, %k1
335; AVX512F-NEXT: kshiftrw $15, %k1, %k1
336; AVX512F-NEXT: kmovw %k1, %r10d
337; AVX512F-NEXT: kshiftlw $12, %k0, %k1
338; AVX512F-NEXT: kshiftrw $15, %k1, %k1
339; AVX512F-NEXT: kmovw %k1, %r11d
340; AVX512F-NEXT: kshiftlw $11, %k0, %k1
341; AVX512F-NEXT: kshiftrw $15, %k1, %k1
342; AVX512F-NEXT: kmovw %k1, %r14d
343; AVX512F-NEXT: kshiftlw $10, %k0, %k1
344; AVX512F-NEXT: kshiftrw $15, %k1, %k1
345; AVX512F-NEXT: kmovw %k1, %r15d
346; AVX512F-NEXT: kshiftlw $9, %k0, %k1
347; AVX512F-NEXT: kshiftrw $15, %k1, %k1
348; AVX512F-NEXT: kmovw %k1, %r12d
349; AVX512F-NEXT: kshiftlw $8, %k0, %k1
350; AVX512F-NEXT: kshiftrw $15, %k1, %k1
351; AVX512F-NEXT: kmovw %k1, %r13d
352; AVX512F-NEXT: kshiftlw $7, %k0, %k1
353; AVX512F-NEXT: kshiftrw $15, %k1, %k1
354; AVX512F-NEXT: kmovw %k1, %esi
355; AVX512F-NEXT: kshiftlw $6, %k0, %k1
356; AVX512F-NEXT: kshiftrw $15, %k1, %k1
357; AVX512F-NEXT: kmovw %k1, %ebx
358; AVX512F-NEXT: kshiftlw $5, %k0, %k1
359; AVX512F-NEXT: kshiftrw $15, %k1, %k1
360; AVX512F-NEXT: kmovw %k1, %ebp
361; AVX512F-NEXT: kshiftlw $4, %k0, %k1
362; AVX512F-NEXT: kshiftrw $15, %k1, %k1
363; AVX512F-NEXT: kmovw %k1, %edi
364; AVX512F-NEXT: kshiftlw $3, %k0, %k1
365; AVX512F-NEXT: kshiftrw $15, %k1, %k1
366; AVX512F-NEXT: kmovw %k1, %eax
367; AVX512F-NEXT: kshiftlw $2, %k0, %k1
368; AVX512F-NEXT: kshiftrw $15, %k1, %k1
369; AVX512F-NEXT: kmovw %k1, %ecx
370; AVX512F-NEXT: kshiftlw $1, %k0, %k1
371; AVX512F-NEXT: kshiftrw $15, %k1, %k1
372; AVX512F-NEXT: kmovw %k1, %edx
373; AVX512F-NEXT: kshiftrw $15, %k0, %k0
374; AVX512F-NEXT: vmovd %r9d, %xmm0
375; AVX512F-NEXT: kmovw %k0, %r9d
376; AVX512F-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
377; AVX512F-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
378; AVX512F-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
379; AVX512F-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
380; AVX512F-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
381; AVX512F-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
382; AVX512F-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
383; AVX512F-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
384; AVX512F-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
385; AVX512F-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
386; AVX512F-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0
387; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
388; AVX512F-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
389; AVX512F-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0
390; AVX512F-NEXT: vpinsrb $15, %r9d, %xmm0, %xmm0
391; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
392; AVX512F-NEXT: popq %rbx
393; AVX512F-NEXT: popq %r12
394; AVX512F-NEXT: popq %r13
395; AVX512F-NEXT: popq %r14
396; AVX512F-NEXT: popq %r15
397; AVX512F-NEXT: popq %rbp
398; AVX512F-NEXT: retq
399;
400; AVX512VLBW-LABEL: ext_i16_16i8:
401; AVX512VLBW: # BB#0:
402; AVX512VLBW-NEXT: pushq %rbp
403; AVX512VLBW-NEXT: .cfi_def_cfa_offset 16
404; AVX512VLBW-NEXT: pushq %r15
405; AVX512VLBW-NEXT: .cfi_def_cfa_offset 24
406; AVX512VLBW-NEXT: pushq %r14
407; AVX512VLBW-NEXT: .cfi_def_cfa_offset 32
408; AVX512VLBW-NEXT: pushq %r13
409; AVX512VLBW-NEXT: .cfi_def_cfa_offset 40
410; AVX512VLBW-NEXT: pushq %r12
411; AVX512VLBW-NEXT: .cfi_def_cfa_offset 48
412; AVX512VLBW-NEXT: pushq %rbx
413; AVX512VLBW-NEXT: .cfi_def_cfa_offset 56
414; AVX512VLBW-NEXT: .cfi_offset %rbx, -56
415; AVX512VLBW-NEXT: .cfi_offset %r12, -48
416; AVX512VLBW-NEXT: .cfi_offset %r13, -40
417; AVX512VLBW-NEXT: .cfi_offset %r14, -32
418; AVX512VLBW-NEXT: .cfi_offset %r15, -24
419; AVX512VLBW-NEXT: .cfi_offset %rbp, -16
420; AVX512VLBW-NEXT: kmovd %edi, %k0
421; AVX512VLBW-NEXT: kshiftlw $14, %k0, %k1
422; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
423; AVX512VLBW-NEXT: kmovd %k1, %r8d
424; AVX512VLBW-NEXT: kshiftlw $15, %k0, %k1
425; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
426; AVX512VLBW-NEXT: kmovd %k1, %r9d
427; AVX512VLBW-NEXT: kshiftlw $13, %k0, %k1
428; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
429; AVX512VLBW-NEXT: kmovd %k1, %r10d
430; AVX512VLBW-NEXT: kshiftlw $12, %k0, %k1
431; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
432; AVX512VLBW-NEXT: kmovd %k1, %r11d
433; AVX512VLBW-NEXT: kshiftlw $11, %k0, %k1
434; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
435; AVX512VLBW-NEXT: kmovd %k1, %r14d
436; AVX512VLBW-NEXT: kshiftlw $10, %k0, %k1
437; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
438; AVX512VLBW-NEXT: kmovd %k1, %r15d
439; AVX512VLBW-NEXT: kshiftlw $9, %k0, %k1
440; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
441; AVX512VLBW-NEXT: kmovd %k1, %r12d
442; AVX512VLBW-NEXT: kshiftlw $8, %k0, %k1
443; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
444; AVX512VLBW-NEXT: kmovd %k1, %r13d
445; AVX512VLBW-NEXT: kshiftlw $7, %k0, %k1
446; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
447; AVX512VLBW-NEXT: kmovd %k1, %esi
448; AVX512VLBW-NEXT: kshiftlw $6, %k0, %k1
449; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
450; AVX512VLBW-NEXT: kmovd %k1, %ebx
451; AVX512VLBW-NEXT: kshiftlw $5, %k0, %k1
452; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
453; AVX512VLBW-NEXT: kmovd %k1, %ebp
454; AVX512VLBW-NEXT: kshiftlw $4, %k0, %k1
455; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
456; AVX512VLBW-NEXT: kmovd %k1, %edi
457; AVX512VLBW-NEXT: kshiftlw $3, %k0, %k1
458; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
459; AVX512VLBW-NEXT: kmovd %k1, %eax
460; AVX512VLBW-NEXT: kshiftlw $2, %k0, %k1
461; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
462; AVX512VLBW-NEXT: kmovd %k1, %ecx
463; AVX512VLBW-NEXT: kshiftlw $1, %k0, %k1
464; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1
465; AVX512VLBW-NEXT: kmovd %k1, %edx
466; AVX512VLBW-NEXT: kshiftrw $15, %k0, %k0
467; AVX512VLBW-NEXT: vmovd %r9d, %xmm0
468; AVX512VLBW-NEXT: kmovd %k0, %r9d
469; AVX512VLBW-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
470; AVX512VLBW-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0
471; AVX512VLBW-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
472; AVX512VLBW-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
473; AVX512VLBW-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
474; AVX512VLBW-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
475; AVX512VLBW-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
476; AVX512VLBW-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
477; AVX512VLBW-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
478; AVX512VLBW-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0
479; AVX512VLBW-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0
480; AVX512VLBW-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
481; AVX512VLBW-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
482; AVX512VLBW-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0
483; AVX512VLBW-NEXT: vpinsrb $15, %r9d, %xmm0, %xmm0
484; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
485; AVX512VLBW-NEXT: popq %rbx
486; AVX512VLBW-NEXT: popq %r12
487; AVX512VLBW-NEXT: popq %r13
488; AVX512VLBW-NEXT: popq %r14
489; AVX512VLBW-NEXT: popq %r15
490; AVX512VLBW-NEXT: popq %rbp
491; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000492 %1 = bitcast i16 %a0 to <16 x i1>
493 %2 = zext <16 x i1> %1 to <16 x i8>
494 ret <16 x i8> %2
495}
496
497;
498; 256-bit vectors
499;
500
501define <4 x i64> @ext_i4_4i64(i4 %a0) {
502; SSE2-SSSE3-LABEL: ext_i4_4i64:
503; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000504; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
505; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
506; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
507; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
508; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
509; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
510; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
511; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
512; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
513; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
514; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
515; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
516; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
517; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
518; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
519; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000520; SSE2-SSSE3-NEXT: retq
521;
522; AVX1-LABEL: ext_i4_4i64:
523; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000524; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
525; AVX1-NEXT: vmovq %rdi, %xmm0
526; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
527; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
528; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
529; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
530; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2
531; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
532; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
533; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
534; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
535; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
536; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
537; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
538; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000539; AVX1-NEXT: retq
540;
541; AVX2-LABEL: ext_i4_4i64:
542; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000543; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
544; AVX2-NEXT: vmovq %rdi, %xmm0
545; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
546; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
547; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
548; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
549; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000550; AVX2-NEXT: retq
551;
Craig Topper5befc5b2017-11-28 01:36:31 +0000552; AVX512F-LABEL: ext_i4_4i64:
553; AVX512F: # BB#0:
554; AVX512F-NEXT: andb $15, %dil
555; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
556; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
557; AVX512F-NEXT: kmovw %eax, %k1
558; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
559; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
560; AVX512F-NEXT: retq
561;
562; AVX512VLBW-LABEL: ext_i4_4i64:
563; AVX512VLBW: # BB#0:
564; AVX512VLBW-NEXT: andb $15, %dil
565; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
566; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
567; AVX512VLBW-NEXT: kmovd %eax, %k1
568; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
569; AVX512VLBW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
570; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000571 %1 = bitcast i4 %a0 to <4 x i1>
572 %2 = zext <4 x i1> %1 to <4 x i64>
573 ret <4 x i64> %2
574}
575
576define <8 x i32> @ext_i8_8i32(i8 %a0) {
577; SSE2-SSSE3-LABEL: ext_i8_8i32:
578; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000579; SSE2-SSSE3-NEXT: movd %edi, %xmm0
580; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
581; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000582; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000583; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000584; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
585; SSE2-SSSE3-NEXT: psrld $31, %xmm0
586; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000587; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000588; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
589; SSE2-SSSE3-NEXT: psrld $31, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000590; SSE2-SSSE3-NEXT: retq
591;
592; AVX1-LABEL: ext_i8_8i32:
593; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000594; AVX1-NEXT: vmovd %edi, %xmm0
595; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
596; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000597; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000598; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
599; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2
600; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
601; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
602; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
603; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
604; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
605; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
606; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
607; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000608; AVX1-NEXT: retq
609;
610; AVX2-LABEL: ext_i8_8i32:
611; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000612; AVX2-NEXT: vmovd %edi, %xmm0
613; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
614; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000615; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000616; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
617; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000618; AVX2-NEXT: retq
619;
Craig Topper5befc5b2017-11-28 01:36:31 +0000620; AVX512F-LABEL: ext_i8_8i32:
621; AVX512F: # BB#0:
622; AVX512F-NEXT: kmovw %edi, %k1
623; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
624; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
625; AVX512F-NEXT: retq
626;
627; AVX512VLBW-LABEL: ext_i8_8i32:
628; AVX512VLBW: # BB#0:
629; AVX512VLBW-NEXT: kmovd %edi, %k1
630; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
631; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000632 %1 = bitcast i8 %a0 to <8 x i1>
633 %2 = zext <8 x i1> %1 to <8 x i32>
634 ret <8 x i32> %2
635}
636
637define <16 x i16> @ext_i16_16i16(i16 %a0) {
638; SSE2-SSSE3-LABEL: ext_i16_16i16:
639; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000640; SSE2-SSSE3-NEXT: movd %edi, %xmm0
641; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
642; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
643; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000644; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000645; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000646; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
647; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
648; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000649; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000650; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
651; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000652; SSE2-SSSE3-NEXT: retq
653;
654; AVX1-LABEL: ext_i16_16i16:
655; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000656; AVX1-NEXT: vmovd %edi, %xmm0
657; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
658; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
659; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
660; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
661; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
662; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
663; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
664; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
665; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
666; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
667; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
668; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
669; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
670; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000671; AVX1-NEXT: retq
672;
673; AVX2-LABEL: ext_i16_16i16:
674; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000675; AVX2-NEXT: vmovd %edi, %xmm0
676; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
677; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
678; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
679; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
680; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000681; AVX2-NEXT: retq
682;
Craig Topper5befc5b2017-11-28 01:36:31 +0000683; AVX512F-LABEL: ext_i16_16i16:
684; AVX512F: # BB#0:
685; AVX512F-NEXT: kmovw %edi, %k1
686; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
687; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
688; AVX512F-NEXT: retq
689;
690; AVX512VLBW-LABEL: ext_i16_16i16:
691; AVX512VLBW: # BB#0:
692; AVX512VLBW-NEXT: kmovd %edi, %k1
693; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z}
694; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000695 %1 = bitcast i16 %a0 to <16 x i1>
696 %2 = zext <16 x i1> %1 to <16 x i16>
697 ret <16 x i16> %2
698}
699
700define <32 x i8> @ext_i32_32i8(i32 %a0) {
701; SSE2-SSSE3-LABEL: ext_i32_32i8:
702; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000703; SSE2-SSSE3-NEXT: movd %edi, %xmm1
704; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
705; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
706; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
707; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
708; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
709; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
710; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
711; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
712; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
713; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
714; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
715; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
716; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
717; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
718; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000719; SSE2-SSSE3-NEXT: retq
720;
721; AVX1-LABEL: ext_i32_32i8:
722; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000723; AVX1-NEXT: vmovd %edi, %xmm0
724; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
725; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
726; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
727; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
728; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000729; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000730; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
731; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
732; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
733; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
734; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
735; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
736; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
737; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
738; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
739; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
740; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
741; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
742; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
743; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000744; AVX1-NEXT: retq
745;
746; AVX2-LABEL: ext_i32_32i8:
747; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000748; AVX2-NEXT: vmovd %edi, %xmm0
749; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
750; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
751; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
752; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
753; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000754; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000755; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
756; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
757; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
758; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
759; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000760; AVX2-NEXT: retq
761;
Craig Topper5befc5b2017-11-28 01:36:31 +0000762; AVX512F-LABEL: ext_i32_32i8:
763; AVX512F: # BB#0:
764; AVX512F-NEXT: pushq %rbp
765; AVX512F-NEXT: .cfi_def_cfa_offset 16
766; AVX512F-NEXT: .cfi_offset %rbp, -16
767; AVX512F-NEXT: movq %rsp, %rbp
768; AVX512F-NEXT: .cfi_def_cfa_register %rbp
769; AVX512F-NEXT: andq $-32, %rsp
770; AVX512F-NEXT: subq $32, %rsp
771; AVX512F-NEXT: movl %edi, (%rsp)
772; AVX512F-NEXT: kmovw (%rsp), %k0
773; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
774; AVX512F-NEXT: kshiftlw $14, %k1, %k2
775; AVX512F-NEXT: kshiftrw $15, %k2, %k2
776; AVX512F-NEXT: kmovw %k2, %eax
777; AVX512F-NEXT: kshiftlw $15, %k1, %k2
778; AVX512F-NEXT: kshiftrw $15, %k2, %k2
779; AVX512F-NEXT: kmovw %k2, %ecx
780; AVX512F-NEXT: vmovd %ecx, %xmm0
781; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
782; AVX512F-NEXT: kshiftlw $13, %k1, %k2
783; AVX512F-NEXT: kshiftrw $15, %k2, %k2
784; AVX512F-NEXT: kmovw %k2, %eax
785; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
786; AVX512F-NEXT: kshiftlw $12, %k1, %k2
787; AVX512F-NEXT: kshiftrw $15, %k2, %k2
788; AVX512F-NEXT: kmovw %k2, %eax
789; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
790; AVX512F-NEXT: kshiftlw $11, %k1, %k2
791; AVX512F-NEXT: kshiftrw $15, %k2, %k2
792; AVX512F-NEXT: kmovw %k2, %eax
793; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
794; AVX512F-NEXT: kshiftlw $10, %k1, %k2
795; AVX512F-NEXT: kshiftrw $15, %k2, %k2
796; AVX512F-NEXT: kmovw %k2, %eax
797; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
798; AVX512F-NEXT: kshiftlw $9, %k1, %k2
799; AVX512F-NEXT: kshiftrw $15, %k2, %k2
800; AVX512F-NEXT: kmovw %k2, %eax
801; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
802; AVX512F-NEXT: kshiftlw $8, %k1, %k2
803; AVX512F-NEXT: kshiftrw $15, %k2, %k2
804; AVX512F-NEXT: kmovw %k2, %eax
805; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
806; AVX512F-NEXT: kshiftlw $7, %k1, %k2
807; AVX512F-NEXT: kshiftrw $15, %k2, %k2
808; AVX512F-NEXT: kmovw %k2, %eax
809; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
810; AVX512F-NEXT: kshiftlw $6, %k1, %k2
811; AVX512F-NEXT: kshiftrw $15, %k2, %k2
812; AVX512F-NEXT: kmovw %k2, %eax
813; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
814; AVX512F-NEXT: kshiftlw $5, %k1, %k2
815; AVX512F-NEXT: kshiftrw $15, %k2, %k2
816; AVX512F-NEXT: kmovw %k2, %eax
817; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
818; AVX512F-NEXT: kshiftlw $4, %k1, %k2
819; AVX512F-NEXT: kshiftrw $15, %k2, %k2
820; AVX512F-NEXT: kmovw %k2, %eax
821; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
822; AVX512F-NEXT: kshiftlw $3, %k1, %k2
823; AVX512F-NEXT: kshiftrw $15, %k2, %k2
824; AVX512F-NEXT: kmovw %k2, %eax
825; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
826; AVX512F-NEXT: kshiftlw $2, %k1, %k2
827; AVX512F-NEXT: kshiftrw $15, %k2, %k2
828; AVX512F-NEXT: kmovw %k2, %eax
829; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
830; AVX512F-NEXT: kshiftlw $1, %k1, %k2
831; AVX512F-NEXT: kshiftrw $15, %k2, %k2
832; AVX512F-NEXT: kmovw %k2, %eax
833; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
834; AVX512F-NEXT: kshiftrw $15, %k1, %k1
835; AVX512F-NEXT: kmovw %k1, %eax
836; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
837; AVX512F-NEXT: kshiftlw $14, %k0, %k1
838; AVX512F-NEXT: kshiftrw $15, %k1, %k1
839; AVX512F-NEXT: kmovw %k1, %eax
840; AVX512F-NEXT: kshiftlw $15, %k0, %k1
841; AVX512F-NEXT: kshiftrw $15, %k1, %k1
842; AVX512F-NEXT: kmovw %k1, %ecx
843; AVX512F-NEXT: vmovd %ecx, %xmm1
844; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
845; AVX512F-NEXT: kshiftlw $13, %k0, %k1
846; AVX512F-NEXT: kshiftrw $15, %k1, %k1
847; AVX512F-NEXT: kmovw %k1, %eax
848; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
849; AVX512F-NEXT: kshiftlw $12, %k0, %k1
850; AVX512F-NEXT: kshiftrw $15, %k1, %k1
851; AVX512F-NEXT: kmovw %k1, %eax
852; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
853; AVX512F-NEXT: kshiftlw $11, %k0, %k1
854; AVX512F-NEXT: kshiftrw $15, %k1, %k1
855; AVX512F-NEXT: kmovw %k1, %eax
856; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
857; AVX512F-NEXT: kshiftlw $10, %k0, %k1
858; AVX512F-NEXT: kshiftrw $15, %k1, %k1
859; AVX512F-NEXT: kmovw %k1, %eax
860; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
861; AVX512F-NEXT: kshiftlw $9, %k0, %k1
862; AVX512F-NEXT: kshiftrw $15, %k1, %k1
863; AVX512F-NEXT: kmovw %k1, %eax
864; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
865; AVX512F-NEXT: kshiftlw $8, %k0, %k1
866; AVX512F-NEXT: kshiftrw $15, %k1, %k1
867; AVX512F-NEXT: kmovw %k1, %eax
868; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
869; AVX512F-NEXT: kshiftlw $7, %k0, %k1
870; AVX512F-NEXT: kshiftrw $15, %k1, %k1
871; AVX512F-NEXT: kmovw %k1, %eax
872; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
873; AVX512F-NEXT: kshiftlw $6, %k0, %k1
874; AVX512F-NEXT: kshiftrw $15, %k1, %k1
875; AVX512F-NEXT: kmovw %k1, %eax
876; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
877; AVX512F-NEXT: kshiftlw $5, %k0, %k1
878; AVX512F-NEXT: kshiftrw $15, %k1, %k1
879; AVX512F-NEXT: kmovw %k1, %eax
880; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
881; AVX512F-NEXT: kshiftlw $4, %k0, %k1
882; AVX512F-NEXT: kshiftrw $15, %k1, %k1
883; AVX512F-NEXT: kmovw %k1, %eax
884; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
885; AVX512F-NEXT: kshiftlw $3, %k0, %k1
886; AVX512F-NEXT: kshiftrw $15, %k1, %k1
887; AVX512F-NEXT: kmovw %k1, %eax
888; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
889; AVX512F-NEXT: kshiftlw $2, %k0, %k1
890; AVX512F-NEXT: kshiftrw $15, %k1, %k1
891; AVX512F-NEXT: kmovw %k1, %eax
892; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
893; AVX512F-NEXT: kshiftlw $1, %k0, %k1
894; AVX512F-NEXT: kshiftrw $15, %k1, %k1
895; AVX512F-NEXT: kmovw %k1, %eax
896; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
897; AVX512F-NEXT: kshiftrw $15, %k0, %k0
898; AVX512F-NEXT: kmovw %k0, %eax
899; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
900; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
901; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
902; AVX512F-NEXT: movq %rbp, %rsp
903; AVX512F-NEXT: popq %rbp
904; AVX512F-NEXT: retq
905;
906; AVX512VLBW-LABEL: ext_i32_32i8:
907; AVX512VLBW: # BB#0:
908; AVX512VLBW-NEXT: kmovd %edi, %k1
909; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
910; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000911 %1 = bitcast i32 %a0 to <32 x i1>
912 %2 = zext <32 x i1> %1 to <32 x i8>
913 ret <32 x i8> %2
914}
915
916;
917; 512-bit vectors
918;
919
920define <8 x i64> @ext_i8_8i64(i8 %a0) {
921; SSE2-SSSE3-LABEL: ext_i8_8i64:
922; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000923; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
924; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
925; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
926; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
927; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
928; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
929; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
930; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
931; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
932; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
933; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
934; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
935; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
936; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
937; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
938; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
939; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
940; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
941; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
942; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
943; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
944; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
945; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
946; SSE2-SSSE3-NEXT: psrlq $63, %xmm2
947; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
948; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
949; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
950; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000951; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000952; SSE2-SSSE3-NEXT: psrlq $63, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000953; SSE2-SSSE3-NEXT: retq
954;
955; AVX1-LABEL: ext_i8_8i64:
956; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000957; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
958; AVX1-NEXT: vmovq %rdi, %xmm0
959; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
960; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
961; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
962; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
963; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm3
964; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
965; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
966; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
967; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
968; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
969; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
970; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
971; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
972; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
973; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm3
974; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
975; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
976; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
977; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
978; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
979; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000980; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000981; AVX1-NEXT: retq
982;
983; AVX2-LABEL: ext_i8_8i64:
984; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000985; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
986; AVX2-NEXT: vmovq %rdi, %xmm0
987; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
988; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
989; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
990; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
991; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
992; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000993; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000994; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
995; AVX2-NEXT: vpsrlq $63, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000996; AVX2-NEXT: retq
997;
Craig Topper5befc5b2017-11-28 01:36:31 +0000998; AVX512F-LABEL: ext_i8_8i64:
999; AVX512F: # BB#0:
1000; AVX512F-NEXT: kmovw %edi, %k1
1001; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
1002; AVX512F-NEXT: retq
1003;
1004; AVX512VLBW-LABEL: ext_i8_8i64:
1005; AVX512VLBW: # BB#0:
1006; AVX512VLBW-NEXT: kmovd %edi, %k1
1007; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
1008; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001009 %1 = bitcast i8 %a0 to <8 x i1>
1010 %2 = zext <8 x i1> %1 to <8 x i64>
1011 ret <8 x i64> %2
1012}
1013
1014define <16 x i32> @ext_i16_16i32(i16 %a0) {
1015; SSE2-SSSE3-LABEL: ext_i16_16i32:
1016; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +00001017; SSE2-SSSE3-NEXT: movd %edi, %xmm0
1018; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
1019; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
1020; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
1021; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
1022; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
1023; SSE2-SSSE3-NEXT: psrld $31, %xmm0
1024; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001025; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +00001026; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
1027; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
1028; SSE2-SSSE3-NEXT: psrld $31, %xmm1
1029; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001030; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001031; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +00001032; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
1033; SSE2-SSSE3-NEXT: psrld $31, %xmm2
1034; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001035; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +00001036; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
1037; SSE2-SSSE3-NEXT: psrld $31, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001038; SSE2-SSSE3-NEXT: retq
1039;
1040; AVX1-LABEL: ext_i16_16i32:
1041; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +00001042; AVX1-NEXT: vmovd %edi, %xmm0
1043; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
1044; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
1045; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
1046; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
1047; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm3
1048; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
1049; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
1050; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
1051; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1052; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
1053; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
1054; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
1055; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
1056; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
1057; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm3
1058; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
1059; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
1060; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1061; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
1062; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
1063; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
1064; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001065; AVX1-NEXT: retq
1066;
1067; AVX2-LABEL: ext_i16_16i32:
1068; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +00001069; AVX2-NEXT: vmovd %edi, %xmm0
1070; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
1071; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
1072; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
1073; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
1074; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
1075; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001076; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +00001077; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
1078; AVX2-NEXT: vpsrld $31, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001079; AVX2-NEXT: retq
1080;
Craig Topper5befc5b2017-11-28 01:36:31 +00001081; AVX512F-LABEL: ext_i16_16i32:
1082; AVX512F: # BB#0:
1083; AVX512F-NEXT: kmovw %edi, %k1
1084; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1085; AVX512F-NEXT: retq
1086;
1087; AVX512VLBW-LABEL: ext_i16_16i32:
1088; AVX512VLBW: # BB#0:
1089; AVX512VLBW-NEXT: kmovd %edi, %k1
1090; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
1091; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001092 %1 = bitcast i16 %a0 to <16 x i1>
1093 %2 = zext <16 x i1> %1 to <16 x i32>
1094 ret <16 x i32> %2
1095}
1096
1097define <32 x i16> @ext_i32_32i16(i32 %a0) {
1098; SSE2-SSSE3-LABEL: ext_i32_32i16:
1099; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +00001100; SSE2-SSSE3-NEXT: movd %edi, %xmm2
1101; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
1102; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
1103; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001104; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001105; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +00001106; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
1107; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
1108; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
1109; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
1110; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
1111; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
1112; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
1113; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001114; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001115; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +00001116; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
1117; SSE2-SSSE3-NEXT: psrlw $15, %xmm2
1118; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
1119; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
1120; SSE2-SSSE3-NEXT: psrlw $15, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001121; SSE2-SSSE3-NEXT: retq
1122;
1123; AVX1-LABEL: ext_i32_32i16:
1124; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +00001125; AVX1-NEXT: vmovd %edi, %xmm1
1126; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
1127; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1128; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
1129; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001130; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +00001131; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1132; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4
1133; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
1134; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
1135; AVX1-NEXT: vpsrlw $15, %xmm4, %xmm4
1136; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1137; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
1138; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
1139; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
1140; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
1141; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
1142; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1143; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001144; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +00001145; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm2
1146; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
1147; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
1148; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
1149; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
1150; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
1151; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
1152; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001153; AVX1-NEXT: retq
1154;
1155; AVX2-LABEL: ext_i32_32i16:
1156; AVX2: # BB#0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001157; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +00001158; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
1159; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
1160; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
1161; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
1162; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
1163; AVX2-NEXT: shrl $16, %edi
1164; AVX2-NEXT: vmovd %edi, %xmm2
1165; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
1166; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
1167; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
1168; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001169; AVX2-NEXT: retq
1170;
Craig Topper5befc5b2017-11-28 01:36:31 +00001171; AVX512F-LABEL: ext_i32_32i16:
1172; AVX512F: # BB#0:
1173; AVX512F-NEXT: pushq %rbp
1174; AVX512F-NEXT: .cfi_def_cfa_offset 16
1175; AVX512F-NEXT: .cfi_offset %rbp, -16
1176; AVX512F-NEXT: movq %rsp, %rbp
1177; AVX512F-NEXT: .cfi_def_cfa_register %rbp
1178; AVX512F-NEXT: andq $-32, %rsp
1179; AVX512F-NEXT: subq $32, %rsp
1180; AVX512F-NEXT: movl %edi, (%rsp)
1181; AVX512F-NEXT: kmovw (%rsp), %k1
1182; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
1183; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
1184; AVX512F-NEXT: vpmovdb %zmm0, %xmm1
1185; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
1186; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
1187; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1188; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
1189; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1190; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
1191; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
1192; AVX512F-NEXT: movq %rbp, %rsp
1193; AVX512F-NEXT: popq %rbp
1194; AVX512F-NEXT: retq
1195;
1196; AVX512VLBW-LABEL: ext_i32_32i16:
1197; AVX512VLBW: # BB#0:
1198; AVX512VLBW-NEXT: kmovd %edi, %k1
1199; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
1200; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001201 %1 = bitcast i32 %a0 to <32 x i1>
1202 %2 = zext <32 x i1> %1 to <32 x i16>
1203 ret <32 x i16> %2
1204}
1205
1206define <64 x i8> @ext_i64_64i8(i64 %a0) {
1207; SSE2-SSSE3-LABEL: ext_i64_64i8:
1208; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +00001209; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
1210; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1211; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
1212; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1213; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
1214; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
1215; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
1216; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
1217; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1218; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
1219; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
1220; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
1221; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
1222; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
1223; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
1224; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
1225; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
1226; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
1227; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
1228; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
1229; SSE2-SSSE3-NEXT: psrlw $7, %xmm2
1230; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2
1231; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
1232; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
1233; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
1234; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
1235; SSE2-SSSE3-NEXT: psrlw $7, %xmm3
1236; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001237; SSE2-SSSE3-NEXT: retq
1238;
1239; AVX1-LABEL: ext_i64_64i8:
1240; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +00001241; AVX1-NEXT: vmovq %rdi, %xmm0
1242; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1243; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
1244; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1245; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
1246; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
1247; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1248; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
1249; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
1250; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1251; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
1252; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
1253; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
1254; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
1255; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
1256; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1257; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
1258; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
1259; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
1260; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
1261; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
1262; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
1263; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
1264; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
1265; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
1266; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1267; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
1268; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
1269; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1270; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
1271; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
1272; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm2
1273; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
1274; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
1275; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
1276; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
1277; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
1278; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001279; AVX1-NEXT: retq
1280;
1281; AVX2-LABEL: ext_i64_64i8:
1282; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +00001283; AVX2-NEXT: vmovq %rdi, %xmm0
1284; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1285; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
1286; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
1287; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
1288; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
1289; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
1290; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
1291; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
1292; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
1293; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
1294; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1295; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
1296; AVX2-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5]
1297; AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
1298; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
1299; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1300; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm4, %ymm1
1301; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
1302; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
1303; AVX2-NEXT: vpsrlw $7, %ymm1, %ymm1
1304; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001305; AVX2-NEXT: retq
1306;
Craig Topper5befc5b2017-11-28 01:36:31 +00001307; AVX512F-LABEL: ext_i64_64i8:
1308; AVX512F: # BB#0:
1309; AVX512F-NEXT: pushq %rbp
1310; AVX512F-NEXT: .cfi_def_cfa_offset 16
1311; AVX512F-NEXT: .cfi_offset %rbp, -16
1312; AVX512F-NEXT: movq %rsp, %rbp
1313; AVX512F-NEXT: .cfi_def_cfa_register %rbp
1314; AVX512F-NEXT: andq $-32, %rsp
1315; AVX512F-NEXT: subq $64, %rsp
1316; AVX512F-NEXT: movl %edi, (%rsp)
1317; AVX512F-NEXT: shrq $32, %rdi
1318; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
1319; AVX512F-NEXT: kmovw (%rsp), %k0
1320; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
1321; AVX512F-NEXT: kshiftlw $14, %k1, %k2
1322; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1323; AVX512F-NEXT: kmovw %k2, %eax
1324; AVX512F-NEXT: kshiftlw $15, %k1, %k2
1325; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1326; AVX512F-NEXT: kmovw %k2, %ecx
1327; AVX512F-NEXT: vmovd %ecx, %xmm0
1328; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
1329; AVX512F-NEXT: kshiftlw $13, %k1, %k2
1330; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1331; AVX512F-NEXT: kmovw %k2, %eax
1332; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
1333; AVX512F-NEXT: kshiftlw $12, %k1, %k2
1334; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1335; AVX512F-NEXT: kmovw %k2, %eax
1336; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
1337; AVX512F-NEXT: kshiftlw $11, %k1, %k2
1338; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1339; AVX512F-NEXT: kmovw %k2, %eax
1340; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
1341; AVX512F-NEXT: kshiftlw $10, %k1, %k2
1342; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1343; AVX512F-NEXT: kmovw %k2, %eax
1344; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
1345; AVX512F-NEXT: kshiftlw $9, %k1, %k2
1346; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1347; AVX512F-NEXT: kmovw %k2, %eax
1348; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
1349; AVX512F-NEXT: kshiftlw $8, %k1, %k2
1350; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1351; AVX512F-NEXT: kmovw %k2, %eax
1352; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
1353; AVX512F-NEXT: kshiftlw $7, %k1, %k2
1354; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1355; AVX512F-NEXT: kmovw %k2, %eax
1356; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
1357; AVX512F-NEXT: kshiftlw $6, %k1, %k2
1358; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1359; AVX512F-NEXT: kmovw %k2, %eax
1360; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
1361; AVX512F-NEXT: kshiftlw $5, %k1, %k2
1362; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1363; AVX512F-NEXT: kmovw %k2, %eax
1364; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
1365; AVX512F-NEXT: kshiftlw $4, %k1, %k2
1366; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1367; AVX512F-NEXT: kmovw %k2, %eax
1368; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
1369; AVX512F-NEXT: kshiftlw $3, %k1, %k2
1370; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1371; AVX512F-NEXT: kmovw %k2, %eax
1372; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
1373; AVX512F-NEXT: kshiftlw $2, %k1, %k2
1374; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1375; AVX512F-NEXT: kmovw %k2, %eax
1376; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
1377; AVX512F-NEXT: kshiftlw $1, %k1, %k2
1378; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1379; AVX512F-NEXT: kmovw %k2, %eax
1380; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
1381; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1382; AVX512F-NEXT: kmovw %k1, %eax
1383; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
1384; AVX512F-NEXT: kshiftlw $14, %k0, %k1
1385; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1386; AVX512F-NEXT: kmovw %k1, %eax
1387; AVX512F-NEXT: kshiftlw $15, %k0, %k1
1388; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1389; AVX512F-NEXT: kmovw %k1, %ecx
1390; AVX512F-NEXT: vmovd %ecx, %xmm1
1391; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
1392; AVX512F-NEXT: kshiftlw $13, %k0, %k1
1393; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1394; AVX512F-NEXT: kmovw %k1, %eax
1395; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
1396; AVX512F-NEXT: kshiftlw $12, %k0, %k1
1397; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1398; AVX512F-NEXT: kmovw %k1, %eax
1399; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
1400; AVX512F-NEXT: kshiftlw $11, %k0, %k1
1401; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1402; AVX512F-NEXT: kmovw %k1, %eax
1403; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
1404; AVX512F-NEXT: kshiftlw $10, %k0, %k1
1405; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1406; AVX512F-NEXT: kmovw %k1, %eax
1407; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
1408; AVX512F-NEXT: kshiftlw $9, %k0, %k1
1409; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1410; AVX512F-NEXT: kmovw %k1, %eax
1411; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
1412; AVX512F-NEXT: kshiftlw $8, %k0, %k1
1413; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1414; AVX512F-NEXT: kmovw %k1, %eax
1415; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
1416; AVX512F-NEXT: kshiftlw $7, %k0, %k1
1417; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1418; AVX512F-NEXT: kmovw %k1, %eax
1419; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
1420; AVX512F-NEXT: kshiftlw $6, %k0, %k1
1421; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1422; AVX512F-NEXT: kmovw %k1, %eax
1423; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
1424; AVX512F-NEXT: kshiftlw $5, %k0, %k1
1425; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1426; AVX512F-NEXT: kmovw %k1, %eax
1427; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
1428; AVX512F-NEXT: kshiftlw $4, %k0, %k1
1429; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1430; AVX512F-NEXT: kmovw %k1, %eax
1431; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
1432; AVX512F-NEXT: kshiftlw $3, %k0, %k1
1433; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1434; AVX512F-NEXT: kmovw %k1, %eax
1435; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
1436; AVX512F-NEXT: kshiftlw $2, %k0, %k1
1437; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1438; AVX512F-NEXT: kmovw %k1, %eax
1439; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
1440; AVX512F-NEXT: kshiftlw $1, %k0, %k1
1441; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1442; AVX512F-NEXT: kmovw %k1, %eax
1443; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
1444; AVX512F-NEXT: kshiftrw $15, %k0, %k0
1445; AVX512F-NEXT: kmovw %k0, %eax
1446; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
1447; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
1448; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1449; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
1450; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0
1451; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
1452; AVX512F-NEXT: kshiftlw $14, %k1, %k2
1453; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1454; AVX512F-NEXT: kmovw %k2, %eax
1455; AVX512F-NEXT: kshiftlw $15, %k1, %k2
1456; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1457; AVX512F-NEXT: kmovw %k2, %ecx
1458; AVX512F-NEXT: vmovd %ecx, %xmm2
1459; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
1460; AVX512F-NEXT: kshiftlw $13, %k1, %k2
1461; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1462; AVX512F-NEXT: kmovw %k2, %eax
1463; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
1464; AVX512F-NEXT: kshiftlw $12, %k1, %k2
1465; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1466; AVX512F-NEXT: kmovw %k2, %eax
1467; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
1468; AVX512F-NEXT: kshiftlw $11, %k1, %k2
1469; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1470; AVX512F-NEXT: kmovw %k2, %eax
1471; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
1472; AVX512F-NEXT: kshiftlw $10, %k1, %k2
1473; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1474; AVX512F-NEXT: kmovw %k2, %eax
1475; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
1476; AVX512F-NEXT: kshiftlw $9, %k1, %k2
1477; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1478; AVX512F-NEXT: kmovw %k2, %eax
1479; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
1480; AVX512F-NEXT: kshiftlw $8, %k1, %k2
1481; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1482; AVX512F-NEXT: kmovw %k2, %eax
1483; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
1484; AVX512F-NEXT: kshiftlw $7, %k1, %k2
1485; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1486; AVX512F-NEXT: kmovw %k2, %eax
1487; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
1488; AVX512F-NEXT: kshiftlw $6, %k1, %k2
1489; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1490; AVX512F-NEXT: kmovw %k2, %eax
1491; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
1492; AVX512F-NEXT: kshiftlw $5, %k1, %k2
1493; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1494; AVX512F-NEXT: kmovw %k2, %eax
1495; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
1496; AVX512F-NEXT: kshiftlw $4, %k1, %k2
1497; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1498; AVX512F-NEXT: kmovw %k2, %eax
1499; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
1500; AVX512F-NEXT: kshiftlw $3, %k1, %k2
1501; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1502; AVX512F-NEXT: kmovw %k2, %eax
1503; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
1504; AVX512F-NEXT: kshiftlw $2, %k1, %k2
1505; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1506; AVX512F-NEXT: kmovw %k2, %eax
1507; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
1508; AVX512F-NEXT: kshiftlw $1, %k1, %k2
1509; AVX512F-NEXT: kshiftrw $15, %k2, %k2
1510; AVX512F-NEXT: kmovw %k2, %eax
1511; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
1512; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1513; AVX512F-NEXT: kmovw %k1, %eax
1514; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
1515; AVX512F-NEXT: kshiftlw $14, %k0, %k1
1516; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1517; AVX512F-NEXT: kmovw %k1, %eax
1518; AVX512F-NEXT: kshiftlw $15, %k0, %k1
1519; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1520; AVX512F-NEXT: kmovw %k1, %ecx
1521; AVX512F-NEXT: vmovd %ecx, %xmm3
1522; AVX512F-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
1523; AVX512F-NEXT: kshiftlw $13, %k0, %k1
1524; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1525; AVX512F-NEXT: kmovw %k1, %eax
1526; AVX512F-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
1527; AVX512F-NEXT: kshiftlw $12, %k0, %k1
1528; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1529; AVX512F-NEXT: kmovw %k1, %eax
1530; AVX512F-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
1531; AVX512F-NEXT: kshiftlw $11, %k0, %k1
1532; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1533; AVX512F-NEXT: kmovw %k1, %eax
1534; AVX512F-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
1535; AVX512F-NEXT: kshiftlw $10, %k0, %k1
1536; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1537; AVX512F-NEXT: kmovw %k1, %eax
1538; AVX512F-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
1539; AVX512F-NEXT: kshiftlw $9, %k0, %k1
1540; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1541; AVX512F-NEXT: kmovw %k1, %eax
1542; AVX512F-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
1543; AVX512F-NEXT: kshiftlw $8, %k0, %k1
1544; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1545; AVX512F-NEXT: kmovw %k1, %eax
1546; AVX512F-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
1547; AVX512F-NEXT: kshiftlw $7, %k0, %k1
1548; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1549; AVX512F-NEXT: kmovw %k1, %eax
1550; AVX512F-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
1551; AVX512F-NEXT: kshiftlw $6, %k0, %k1
1552; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1553; AVX512F-NEXT: kmovw %k1, %eax
1554; AVX512F-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
1555; AVX512F-NEXT: kshiftlw $5, %k0, %k1
1556; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1557; AVX512F-NEXT: kmovw %k1, %eax
1558; AVX512F-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
1559; AVX512F-NEXT: kshiftlw $4, %k0, %k1
1560; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1561; AVX512F-NEXT: kmovw %k1, %eax
1562; AVX512F-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
1563; AVX512F-NEXT: kshiftlw $3, %k0, %k1
1564; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1565; AVX512F-NEXT: kmovw %k1, %eax
1566; AVX512F-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
1567; AVX512F-NEXT: kshiftlw $2, %k0, %k1
1568; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1569; AVX512F-NEXT: kmovw %k1, %eax
1570; AVX512F-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
1571; AVX512F-NEXT: kshiftlw $1, %k0, %k1
1572; AVX512F-NEXT: kshiftrw $15, %k1, %k1
1573; AVX512F-NEXT: kmovw %k1, %eax
1574; AVX512F-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
1575; AVX512F-NEXT: kshiftrw $15, %k0, %k0
1576; AVX512F-NEXT: kmovw %k0, %eax
1577; AVX512F-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
1578; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
1579; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1
1580; AVX512F-NEXT: movq %rbp, %rsp
1581; AVX512F-NEXT: popq %rbp
1582; AVX512F-NEXT: retq
1583;
1584; AVX512VLBW-LABEL: ext_i64_64i8:
1585; AVX512VLBW: # BB#0:
1586; AVX512VLBW-NEXT: kmovq %rdi, %k1
1587; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
1588; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001589 %1 = bitcast i64 %a0 to <64 x i1>
1590 %2 = zext <64 x i1> %1 to <64 x i8>
1591 ret <64 x i8> %2
1592}