blob: 984d2b818fc8e8f49e63c3ed470e5ed67ca8c1d2 [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8;
9; 128-bit vectors
10;
11
12define <2 x i64> @ext_i2_2i64(i2 %a0) {
13; SSE2-SSSE3-LABEL: ext_i2_2i64:
14; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000015; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
16; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
17; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
18; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
19; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
20; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
21; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
22; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000023; SSE2-SSSE3-NEXT: retq
24;
Simon Pilgrima705db92017-09-24 13:42:31 +000025; AVX1-LABEL: ext_i2_2i64:
26; AVX1: # BB#0:
27; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
28; AVX1-NEXT: vmovq %rdi, %xmm0
29; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
30; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
31; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
33; AVX1-NEXT: retq
34;
35; AVX2-LABEL: ext_i2_2i64:
36; AVX2: # BB#0:
37; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
38; AVX2-NEXT: vmovq %rdi, %xmm0
39; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
40; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
41; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
42; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
43; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000044;
45; AVX512-LABEL: ext_i2_2i64:
46; AVX512: # BB#0:
47; AVX512-NEXT: andb $3, %dil
48; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
49; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
50; AVX512-NEXT: kmovd %eax, %k1
51; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
52; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
53; AVX512-NEXT: vzeroupper
54; AVX512-NEXT: retq
55 %1 = bitcast i2 %a0 to <2 x i1>
56 %2 = sext <2 x i1> %1 to <2 x i64>
57 ret <2 x i64> %2
58}
59
60define <4 x i32> @ext_i4_4i32(i4 %a0) {
61; SSE2-SSSE3-LABEL: ext_i4_4i32:
62; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +000063; SSE2-SSSE3-NEXT: movd %edi, %xmm0
64; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
65; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
66; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
67; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000068; SSE2-SSSE3-NEXT: retq
69;
Simon Pilgrima705db92017-09-24 13:42:31 +000070; AVX1-LABEL: ext_i4_4i32:
71; AVX1: # BB#0:
72; AVX1-NEXT: vmovd %edi, %xmm0
73; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
74; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
75; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
76; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
77; AVX1-NEXT: retq
78;
79; AVX2-LABEL: ext_i4_4i32:
80; AVX2: # BB#0:
81; AVX2-NEXT: vmovd %edi, %xmm0
82; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
83; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
84; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
85; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
86; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000087;
88; AVX512-LABEL: ext_i4_4i32:
89; AVX512: # BB#0:
90; AVX512-NEXT: andb $15, %dil
91; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
92; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
93; AVX512-NEXT: kmovd %eax, %k1
94; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
95; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
96; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
97; AVX512-NEXT: vzeroupper
98; AVX512-NEXT: retq
99 %1 = bitcast i4 %a0 to <4 x i1>
100 %2 = sext <4 x i1> %1 to <4 x i32>
101 ret <4 x i32> %2
102}
103
104define <8 x i16> @ext_i8_8i16(i8 %a0) {
105; SSE2-SSSE3-LABEL: ext_i8_8i16:
106; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000107; SSE2-SSSE3-NEXT: movd %edi, %xmm0
108; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
109; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
110; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
111; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
112; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000113; SSE2-SSSE3-NEXT: retq
114;
Simon Pilgrima705db92017-09-24 13:42:31 +0000115; AVX1-LABEL: ext_i8_8i16:
116; AVX1: # BB#0:
117; AVX1-NEXT: vmovd %edi, %xmm0
118; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
119; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
120; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
121; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
122; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
123; AVX1-NEXT: retq
124;
125; AVX2-LABEL: ext_i8_8i16:
126; AVX2: # BB#0:
127; AVX2-NEXT: vmovd %edi, %xmm0
128; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
129; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
130; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
131; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
132; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000133;
134; AVX512-LABEL: ext_i8_8i16:
135; AVX512: # BB#0:
136; AVX512-NEXT: kmovd %edi, %k0
137; AVX512-NEXT: vpmovm2w %k0, %xmm0
138; AVX512-NEXT: retq
139 %1 = bitcast i8 %a0 to <8 x i1>
140 %2 = sext <8 x i1> %1 to <8 x i16>
141 ret <8 x i16> %2
142}
143
144define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000145; SSE2-LABEL: ext_i16_16i8:
146; SSE2: # BB#0:
147; SSE2-NEXT: movd %edi, %xmm0
148; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
149; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
150; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
151; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
152; SSE2-NEXT: pand %xmm1, %xmm0
153; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
154; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000155;
Simon Pilgrima705db92017-09-24 13:42:31 +0000156; SSSE3-LABEL: ext_i16_16i8:
157; SSSE3: # BB#0:
158; SSSE3-NEXT: movd %edi, %xmm0
159; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
160; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
161; SSSE3-NEXT: pand %xmm1, %xmm0
162; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
163; SSSE3-NEXT: retq
164;
165; AVX1-LABEL: ext_i16_16i8:
166; AVX1: # BB#0:
167; AVX1-NEXT: vmovd %edi, %xmm0
168; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
169; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
170; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
171; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
172; AVX1-NEXT: retq
173;
174; AVX2-LABEL: ext_i16_16i8:
175; AVX2: # BB#0:
176; AVX2-NEXT: vmovd %edi, %xmm0
177; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
178; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
179; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
180; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
181; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000182;
183; AVX512-LABEL: ext_i16_16i8:
184; AVX512: # BB#0:
185; AVX512-NEXT: kmovd %edi, %k0
186; AVX512-NEXT: vpmovm2b %k0, %xmm0
187; AVX512-NEXT: retq
188 %1 = bitcast i16 %a0 to <16 x i1>
189 %2 = sext <16 x i1> %1 to <16 x i8>
190 ret <16 x i8> %2
191}
192
193;
194; 256-bit vectors
195;
196
197define <4 x i64> @ext_i4_4i64(i4 %a0) {
198; SSE2-SSSE3-LABEL: ext_i4_4i64:
199; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000200; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
201; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
202; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
203; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
204; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
205; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
206; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
207; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
208; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
209; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
210; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
211; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
212; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
213; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000214; SSE2-SSSE3-NEXT: retq
215;
216; AVX1-LABEL: ext_i4_4i64:
217; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000218; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
219; AVX1-NEXT: vmovq %rdi, %xmm0
220; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
221; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
222; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
223; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
224; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
225; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
226; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
227; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
228; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
229; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
230; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000231; AVX1-NEXT: retq
232;
233; AVX2-LABEL: ext_i4_4i64:
234; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000235; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
236; AVX2-NEXT: vmovq %rdi, %xmm0
237; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
238; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
239; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
240; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000241; AVX2-NEXT: retq
242;
243; AVX512-LABEL: ext_i4_4i64:
244; AVX512: # BB#0:
245; AVX512-NEXT: andb $15, %dil
246; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
247; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
248; AVX512-NEXT: kmovd %eax, %k1
249; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
250; AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
251; AVX512-NEXT: retq
252 %1 = bitcast i4 %a0 to <4 x i1>
253 %2 = sext <4 x i1> %1 to <4 x i64>
254 ret <4 x i64> %2
255}
256
257define <8 x i32> @ext_i8_8i32(i8 %a0) {
258; SSE2-SSSE3-LABEL: ext_i8_8i32:
259; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000260; SSE2-SSSE3-NEXT: movd %edi, %xmm0
261; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
262; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000263; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000264; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
265; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
266; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
267; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
268; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000269; SSE2-SSSE3-NEXT: retq
270;
271; AVX1-LABEL: ext_i8_8i32:
272; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000273; AVX1-NEXT: vmovd %edi, %xmm0
274; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
275; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
276; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
277; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
278; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
279; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
280; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
281; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
282; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
283; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
284; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000285; AVX1-NEXT: retq
286;
287; AVX2-LABEL: ext_i8_8i32:
288; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000289; AVX2-NEXT: vmovd %edi, %xmm0
290; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
291; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
292; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
293; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000294; AVX2-NEXT: retq
295;
296; AVX512-LABEL: ext_i8_8i32:
297; AVX512: # BB#0:
298; AVX512-NEXT: kmovd %edi, %k1
299; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
300; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
301; AVX512-NEXT: retq
302 %1 = bitcast i8 %a0 to <8 x i1>
303 %2 = sext <8 x i1> %1 to <8 x i32>
304 ret <8 x i32> %2
305}
306
307define <16 x i16> @ext_i16_16i16(i16 %a0) {
308; SSE2-SSSE3-LABEL: ext_i16_16i16:
309; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000310; SSE2-SSSE3-NEXT: movd %edi, %xmm0
311; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
312; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
313; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000314; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000315; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
316; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
317; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
318; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
319; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000320; SSE2-SSSE3-NEXT: retq
321;
322; AVX1-LABEL: ext_i16_16i16:
323; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000324; AVX1-NEXT: vmovd %edi, %xmm0
325; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
326; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
327; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
328; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
329; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
330; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
331; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
332; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
333; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
334; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
335; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
336; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000337; AVX1-NEXT: retq
338;
339; AVX2-LABEL: ext_i16_16i16:
340; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000341; AVX2-NEXT: vmovd %edi, %xmm0
342; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
343; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
344; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
345; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000346; AVX2-NEXT: retq
347;
348; AVX512-LABEL: ext_i16_16i16:
349; AVX512: # BB#0:
350; AVX512-NEXT: kmovd %edi, %k0
351; AVX512-NEXT: vpmovm2w %k0, %ymm0
352; AVX512-NEXT: retq
353 %1 = bitcast i16 %a0 to <16 x i1>
354 %2 = sext <16 x i1> %1 to <16 x i16>
355 ret <16 x i16> %2
356}
357
358define <32 x i8> @ext_i32_32i8(i32 %a0) {
359; SSE2-SSSE3-LABEL: ext_i32_32i8:
360; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000361; SSE2-SSSE3-NEXT: movd %edi, %xmm1
362; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
363; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
364; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
365; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
366; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
367; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
368; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
369; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
370; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
371; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000372; SSE2-SSSE3-NEXT: retq
373;
374; AVX1-LABEL: ext_i32_32i8:
375; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000376; AVX1-NEXT: vmovd %edi, %xmm0
377; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
378; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
379; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
380; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
381; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000382; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000383; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
384; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
385; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
386; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
387; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
388; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
389; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
390; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
391; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000392; AVX1-NEXT: retq
393;
394; AVX2-LABEL: ext_i32_32i8:
395; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000396; AVX2-NEXT: vmovd %edi, %xmm0
397; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
398; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
399; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
400; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
401; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000402; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000403; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
404; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
405; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000406; AVX2-NEXT: retq
407;
408; AVX512-LABEL: ext_i32_32i8:
409; AVX512: # BB#0:
410; AVX512-NEXT: kmovd %edi, %k0
411; AVX512-NEXT: vpmovm2b %k0, %ymm0
412; AVX512-NEXT: retq
413 %1 = bitcast i32 %a0 to <32 x i1>
414 %2 = sext <32 x i1> %1 to <32 x i8>
415 ret <32 x i8> %2
416}
417
418;
419; 512-bit vectors
420;
421
422define <8 x i64> @ext_i8_8i64(i8 %a0) {
423; SSE2-SSSE3-LABEL: ext_i8_8i64:
424; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000425; SSE2-SSSE3-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
426; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
427; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
428; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
429; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
430; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
431; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
432; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
433; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
434; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
435; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
436; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
437; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
438; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
439; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
440; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
441; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
442; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
443; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
444; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
445; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
446; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
447; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
448; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
449; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
450; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000451; SSE2-SSSE3-NEXT: retq
452;
453; AVX1-LABEL: ext_i8_8i64:
454; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000455; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
456; AVX1-NEXT: vmovq %rdi, %xmm0
457; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
458; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
459; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
460; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
461; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
462; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
463; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
464; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
465; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
466; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
467; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
468; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
469; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
470; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
471; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
472; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
473; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
474; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000475; AVX1-NEXT: retq
476;
477; AVX2-LABEL: ext_i8_8i64:
478; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000479; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
480; AVX2-NEXT: vmovq %rdi, %xmm0
481; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
482; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
483; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
484; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
485; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
486; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
487; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000488; AVX2-NEXT: retq
489;
490; AVX512-LABEL: ext_i8_8i64:
491; AVX512: # BB#0:
492; AVX512-NEXT: kmovd %edi, %k1
493; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
494; AVX512-NEXT: retq
495 %1 = bitcast i8 %a0 to <8 x i1>
496 %2 = sext <8 x i1> %1 to <8 x i64>
497 ret <8 x i64> %2
498}
499
500define <16 x i32> @ext_i16_16i32(i16 %a0) {
501; SSE2-SSSE3-LABEL: ext_i16_16i32:
502; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000503; SSE2-SSSE3-NEXT: movd %edi, %xmm0
504; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
505; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
506; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
507; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
508; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
509; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000510; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000511; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
512; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
513; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000514; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000515; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
516; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
517; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
518; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
519; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000520; SSE2-SSSE3-NEXT: retq
521;
522; AVX1-LABEL: ext_i16_16i32:
523; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000524; AVX1-NEXT: vmovd %edi, %xmm0
525; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
526; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
527; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
528; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
529; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
530; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
531; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
532; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
533; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
534; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
535; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
536; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
537; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
538; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
539; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
540; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
541; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
542; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000543; AVX1-NEXT: retq
544;
545; AVX2-LABEL: ext_i16_16i32:
546; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000547; AVX2-NEXT: vmovd %edi, %xmm0
548; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
549; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
550; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
551; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
552; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
553; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
554; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000555; AVX2-NEXT: retq
556;
557; AVX512-LABEL: ext_i16_16i32:
558; AVX512: # BB#0:
559; AVX512-NEXT: kmovd %edi, %k1
560; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
561; AVX512-NEXT: retq
562 %1 = bitcast i16 %a0 to <16 x i1>
563 %2 = sext <16 x i1> %1 to <16 x i32>
564 ret <16 x i32> %2
565}
566
567define <32 x i16> @ext_i32_32i16(i32 %a0) {
568; SSE2-SSSE3-LABEL: ext_i32_32i16:
569; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000570; SSE2-SSSE3-NEXT: movd %edi, %xmm2
571; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
572; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
573; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000574; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000575; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
576; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
577; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
578; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
579; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
580; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
581; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000582; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000583; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
584; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
585; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
586; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000587; SSE2-SSSE3-NEXT: retq
588;
589; AVX1-LABEL: ext_i32_32i16:
590; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000591; AVX1-NEXT: vmovd %edi, %xmm1
592; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
593; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
594; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
595; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
596; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
597; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
598; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
599; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
600; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
601; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
602; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
603; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
604; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
605; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
606; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
607; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
608; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
609; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
610; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
611; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
612; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
613; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
614; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000615; AVX1-NEXT: retq
616;
617; AVX2-LABEL: ext_i32_32i16:
618; AVX2: # BB#0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000619; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000620; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
621; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
622; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
623; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
624; AVX2-NEXT: shrl $16, %edi
625; AVX2-NEXT: vmovd %edi, %xmm2
626; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
627; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
628; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000629; AVX2-NEXT: retq
630;
631; AVX512-LABEL: ext_i32_32i16:
632; AVX512: # BB#0:
633; AVX512-NEXT: kmovd %edi, %k0
634; AVX512-NEXT: vpmovm2w %k0, %zmm0
635; AVX512-NEXT: retq
636 %1 = bitcast i32 %a0 to <32 x i1>
637 %2 = sext <32 x i1> %1 to <32 x i16>
638 ret <32 x i16> %2
639}
640
641define <64 x i8> @ext_i64_64i8(i64 %a0) {
642; SSE2-SSSE3-LABEL: ext_i64_64i8:
643; SSE2-SSSE3: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000644; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
645; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
646; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
647; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
648; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
649; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
650; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
651; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
652; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
653; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
654; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
655; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
656; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
657; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
658; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
659; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
660; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
661; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
662; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000663; SSE2-SSSE3-NEXT: retq
664;
665; AVX1-LABEL: ext_i64_64i8:
666; AVX1: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000667; AVX1-NEXT: vmovq %rdi, %xmm0
668; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
669; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
670; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
671; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
672; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
673; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
674; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
675; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
676; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
677; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
678; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
679; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
680; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
681; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
682; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
683; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
684; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
685; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
686; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
687; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
688; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
689; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
690; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
691; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
692; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
693; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
694; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000695; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000696; AVX1-NEXT: retq
697;
698; AVX2-LABEL: ext_i64_64i8:
699; AVX2: # BB#0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000700; AVX2-NEXT: vmovq %rdi, %xmm0
701; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
702; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
703; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
704; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
705; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
706; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
707; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
708; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
709; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
710; AVX2-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
711; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
712; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
713; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
714; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
715; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
716; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000717; AVX2-NEXT: retq
718;
719; AVX512-LABEL: ext_i64_64i8:
720; AVX512: # BB#0:
721; AVX512-NEXT: kmovq %rdi, %k0
722; AVX512-NEXT: vpmovm2b %k0, %zmm0
723; AVX512-NEXT: retq
724 %1 = bitcast i64 %a0 to <64 x i1>
725 %2 = sext <64 x i1> %1 to <64 x i8>
726 ret <64 x i8> %2
727}