blob: 6ef2be99dee52b8541af7f7a19f5f774d9522d2b [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8;
9; 128-bit vectors
10;
11
12define <2 x i64> @ext_i2_2i64(i2 %a0) {
13; SSE2-SSSE3-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000014; SSE2-SSSE3: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000015; SSE2-SSSE3-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000016; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
17; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
18; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
19; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
20; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
21; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
22; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000023; SSE2-SSSE3-NEXT: retq
24;
Simon Pilgrima705db92017-09-24 13:42:31 +000025; AVX1-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000026; AVX1: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000027; AVX1-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000028; AVX1-NEXT: vmovq %rdi, %xmm0
29; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
30; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
31; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
33; AVX1-NEXT: retq
34;
35; AVX2-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000036; AVX2: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000037; AVX2-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000038; AVX2-NEXT: vmovq %rdi, %xmm0
39; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
40; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
41; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
42; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
43; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000044;
45; AVX512-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000046; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000047; AVX512-NEXT: andb $3, %dil
48; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
49; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
50; AVX512-NEXT: kmovd %eax, %k1
Craig Topper55cf8802017-12-28 19:46:11 +000051; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
52; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000053; AVX512-NEXT: retq
54 %1 = bitcast i2 %a0 to <2 x i1>
55 %2 = sext <2 x i1> %1 to <2 x i64>
56 ret <2 x i64> %2
57}
58
59define <4 x i32> @ext_i4_4i32(i4 %a0) {
60; SSE2-SSSE3-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000061; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000062; SSE2-SSSE3-NEXT: movd %edi, %xmm0
63; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
64; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
65; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
66; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000067; SSE2-SSSE3-NEXT: retq
68;
Simon Pilgrima705db92017-09-24 13:42:31 +000069; AVX1-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000070; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000071; AVX1-NEXT: vmovd %edi, %xmm0
72; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
73; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
74; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
75; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
76; AVX1-NEXT: retq
77;
78; AVX2-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000079; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000080; AVX2-NEXT: vmovd %edi, %xmm0
81; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
82; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
83; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
84; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
85; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000086;
87; AVX512-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000088; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000089; AVX512-NEXT: andb $15, %dil
90; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
91; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
92; AVX512-NEXT: kmovd %eax, %k1
Craig Topper55cf8802017-12-28 19:46:11 +000093; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
94; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000095; AVX512-NEXT: retq
96 %1 = bitcast i4 %a0 to <4 x i1>
97 %2 = sext <4 x i1> %1 to <4 x i32>
98 ret <4 x i32> %2
99}
100
101define <8 x i16> @ext_i8_8i16(i8 %a0) {
102; SSE2-SSSE3-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000103; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000104; SSE2-SSSE3-NEXT: movd %edi, %xmm0
105; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
106; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
107; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
108; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
109; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000110; SSE2-SSSE3-NEXT: retq
111;
Simon Pilgrima705db92017-09-24 13:42:31 +0000112; AVX1-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000113; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000114; AVX1-NEXT: vmovd %edi, %xmm0
115; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
116; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
117; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
118; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
119; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
120; AVX1-NEXT: retq
121;
122; AVX2-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000123; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000124; AVX2-NEXT: vmovd %edi, %xmm0
125; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
126; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
127; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
128; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
129; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000130;
131; AVX512-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000132; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000133; AVX512-NEXT: kmovd %edi, %k0
134; AVX512-NEXT: vpmovm2w %k0, %xmm0
135; AVX512-NEXT: retq
136 %1 = bitcast i8 %a0 to <8 x i1>
137 %2 = sext <8 x i1> %1 to <8 x i16>
138 ret <8 x i16> %2
139}
140
141define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000142; SSE2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000143; SSE2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000144; SSE2-NEXT: movd %edi, %xmm0
145; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
146; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
147; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
148; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
149; SSE2-NEXT: pand %xmm1, %xmm0
150; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
151; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000152;
Simon Pilgrima705db92017-09-24 13:42:31 +0000153; SSSE3-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000154; SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000155; SSSE3-NEXT: movd %edi, %xmm0
156; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
157; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
158; SSSE3-NEXT: pand %xmm1, %xmm0
159; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
160; SSSE3-NEXT: retq
161;
162; AVX1-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000163; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000164; AVX1-NEXT: vmovd %edi, %xmm0
165; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
166; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
167; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
168; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
169; AVX1-NEXT: retq
170;
171; AVX2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000172; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000173; AVX2-NEXT: vmovd %edi, %xmm0
174; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
175; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
176; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
177; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
178; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000179;
180; AVX512-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000181; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000182; AVX512-NEXT: kmovd %edi, %k0
183; AVX512-NEXT: vpmovm2b %k0, %xmm0
184; AVX512-NEXT: retq
185 %1 = bitcast i16 %a0 to <16 x i1>
186 %2 = sext <16 x i1> %1 to <16 x i8>
187 ret <16 x i8> %2
188}
189
190;
191; 256-bit vectors
192;
193
194define <4 x i64> @ext_i4_4i64(i4 %a0) {
195; SSE2-SSSE3-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000196; SSE2-SSSE3: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000197; SSE2-SSSE3-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000198; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
199; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
200; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
201; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
202; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
203; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
204; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
205; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
206; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
207; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
208; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
209; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
210; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000211; SSE2-SSSE3-NEXT: retq
212;
213; AVX1-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000214; AVX1: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000215; AVX1-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000216; AVX1-NEXT: vmovq %rdi, %xmm0
217; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
218; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
219; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
220; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
221; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
222; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
223; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
224; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
225; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
226; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
227; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000228; AVX1-NEXT: retq
229;
230; AVX2-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000231; AVX2: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000232; AVX2-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000233; AVX2-NEXT: vmovq %rdi, %xmm0
234; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
235; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
236; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
237; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000238; AVX2-NEXT: retq
239;
240; AVX512-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000241; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000242; AVX512-NEXT: andb $15, %dil
243; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
244; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
245; AVX512-NEXT: kmovd %eax, %k1
Craig Topper55cf8802017-12-28 19:46:11 +0000246; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
247; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000248; AVX512-NEXT: retq
249 %1 = bitcast i4 %a0 to <4 x i1>
250 %2 = sext <4 x i1> %1 to <4 x i64>
251 ret <4 x i64> %2
252}
253
254define <8 x i32> @ext_i8_8i32(i8 %a0) {
255; SSE2-SSSE3-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000256; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000257; SSE2-SSSE3-NEXT: movd %edi, %xmm0
258; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
259; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000260; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000261; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
262; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
263; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
264; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
265; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000266; SSE2-SSSE3-NEXT: retq
267;
268; AVX1-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000269; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000270; AVX1-NEXT: vmovd %edi, %xmm0
271; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
272; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
273; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
274; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
275; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
276; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
277; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
278; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
279; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
280; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
281; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000282; AVX1-NEXT: retq
283;
284; AVX2-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000285; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000286; AVX2-NEXT: vmovd %edi, %xmm0
287; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
288; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
289; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
290; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000291; AVX2-NEXT: retq
292;
293; AVX512-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000294; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000295; AVX512-NEXT: kmovd %edi, %k1
296; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
297; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
298; AVX512-NEXT: retq
299 %1 = bitcast i8 %a0 to <8 x i1>
300 %2 = sext <8 x i1> %1 to <8 x i32>
301 ret <8 x i32> %2
302}
303
304define <16 x i16> @ext_i16_16i16(i16 %a0) {
305; SSE2-SSSE3-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000306; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000307; SSE2-SSSE3-NEXT: movd %edi, %xmm0
308; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
309; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
310; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000311; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000312; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
313; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
314; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
315; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
316; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000317; SSE2-SSSE3-NEXT: retq
318;
319; AVX1-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000320; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000321; AVX1-NEXT: vmovd %edi, %xmm0
322; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
323; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
324; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
325; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
326; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
327; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
328; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
329; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
330; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
331; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
332; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
333; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000334; AVX1-NEXT: retq
335;
336; AVX2-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000337; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000338; AVX2-NEXT: vmovd %edi, %xmm0
339; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
340; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
341; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
342; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000343; AVX2-NEXT: retq
344;
345; AVX512-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000346; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000347; AVX512-NEXT: kmovd %edi, %k0
348; AVX512-NEXT: vpmovm2w %k0, %ymm0
349; AVX512-NEXT: retq
350 %1 = bitcast i16 %a0 to <16 x i1>
351 %2 = sext <16 x i1> %1 to <16 x i16>
352 ret <16 x i16> %2
353}
354
355define <32 x i8> @ext_i32_32i8(i32 %a0) {
356; SSE2-SSSE3-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000357; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000358; SSE2-SSSE3-NEXT: movd %edi, %xmm1
359; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
360; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
361; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
362; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
363; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
364; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
365; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
366; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
367; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
368; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000369; SSE2-SSSE3-NEXT: retq
370;
371; AVX1-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000372; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000373; AVX1-NEXT: vmovd %edi, %xmm0
374; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
375; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
376; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
377; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
378; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000379; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000380; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
381; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
382; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
383; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
384; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
385; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
386; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
387; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
388; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000389; AVX1-NEXT: retq
390;
391; AVX2-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000392; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000393; AVX2-NEXT: vmovd %edi, %xmm0
394; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
395; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
396; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
397; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
398; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000399; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000400; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
401; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
402; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000403; AVX2-NEXT: retq
404;
405; AVX512-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000406; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000407; AVX512-NEXT: kmovd %edi, %k0
408; AVX512-NEXT: vpmovm2b %k0, %ymm0
409; AVX512-NEXT: retq
410 %1 = bitcast i32 %a0 to <32 x i1>
411 %2 = sext <32 x i1> %1 to <32 x i8>
412 ret <32 x i8> %2
413}
414
415;
416; 512-bit vectors
417;
418
419define <8 x i64> @ext_i8_8i64(i8 %a0) {
420; SSE2-SSSE3-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000421; SSE2-SSSE3: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000422; SSE2-SSSE3-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000423; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
424; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
425; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
426; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
427; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
428; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
429; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
430; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
431; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
432; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
433; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
434; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
435; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
436; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
437; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
438; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
439; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
440; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
441; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
442; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
443; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
444; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
445; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
446; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
447; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000448; SSE2-SSSE3-NEXT: retq
449;
450; AVX1-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000451; AVX1: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000452; AVX1-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000453; AVX1-NEXT: vmovq %rdi, %xmm0
454; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
455; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
456; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
457; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
458; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
459; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
460; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
461; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
462; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
463; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
464; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
465; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
466; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
467; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
468; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
469; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
470; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
471; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000472; AVX1-NEXT: retq
473;
474; AVX2-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000475; AVX2: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000476; AVX2-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000477; AVX2-NEXT: vmovq %rdi, %xmm0
478; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
479; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
480; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
481; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
482; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
483; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
484; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000485; AVX2-NEXT: retq
486;
487; AVX512-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000488; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000489; AVX512-NEXT: kmovd %edi, %k1
490; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
491; AVX512-NEXT: retq
492 %1 = bitcast i8 %a0 to <8 x i1>
493 %2 = sext <8 x i1> %1 to <8 x i64>
494 ret <8 x i64> %2
495}
496
497define <16 x i32> @ext_i16_16i32(i16 %a0) {
498; SSE2-SSSE3-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000499; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000500; SSE2-SSSE3-NEXT: movd %edi, %xmm0
501; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
502; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
503; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
504; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
505; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
506; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000507; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000508; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
509; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
510; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000511; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000512; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
513; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
514; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
515; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
516; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000517; SSE2-SSSE3-NEXT: retq
518;
519; AVX1-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000520; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000521; AVX1-NEXT: vmovd %edi, %xmm0
522; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
523; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
524; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
525; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
526; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
527; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
528; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
529; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
530; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
531; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
532; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
533; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
534; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
535; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
536; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
537; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
538; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
539; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000540; AVX1-NEXT: retq
541;
542; AVX2-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000543; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000544; AVX2-NEXT: vmovd %edi, %xmm0
545; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
546; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
547; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
548; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
549; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
550; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
551; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000552; AVX2-NEXT: retq
553;
554; AVX512-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000555; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000556; AVX512-NEXT: kmovd %edi, %k1
557; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
558; AVX512-NEXT: retq
559 %1 = bitcast i16 %a0 to <16 x i1>
560 %2 = sext <16 x i1> %1 to <16 x i32>
561 ret <16 x i32> %2
562}
563
564define <32 x i16> @ext_i32_32i16(i32 %a0) {
565; SSE2-SSSE3-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000566; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000567; SSE2-SSSE3-NEXT: movd %edi, %xmm2
568; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,0,0,4,5,6,7]
569; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
570; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000571; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000572; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
573; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
574; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
575; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
576; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
577; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,1,1,4,5,6,7]
578; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000579; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000580; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
581; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
582; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
583; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000584; SSE2-SSSE3-NEXT: retq
585;
586; AVX1-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000587; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000588; AVX1-NEXT: vmovd %edi, %xmm1
589; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,0,0,4,5,6,7]
590; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
591; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
592; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
593; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
594; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
595; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
596; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
597; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
598; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
599; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
600; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
601; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
602; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,1,1,4,5,6,7]
603; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
604; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
605; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
606; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
607; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
608; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
609; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
610; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
611; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000612; AVX1-NEXT: retq
613;
614; AVX2-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000615; AVX2: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000616; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000617; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
618; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
619; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
620; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
621; AVX2-NEXT: shrl $16, %edi
622; AVX2-NEXT: vmovd %edi, %xmm2
623; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
624; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
625; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000626; AVX2-NEXT: retq
627;
628; AVX512-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000629; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000630; AVX512-NEXT: kmovd %edi, %k0
631; AVX512-NEXT: vpmovm2w %k0, %zmm0
632; AVX512-NEXT: retq
633 %1 = bitcast i32 %a0 to <32 x i1>
634 %2 = sext <32 x i1> %1 to <32 x i16>
635 ret <32 x i16> %2
636}
637
638define <64 x i8> @ext_i64_64i8(i64 %a0) {
639; SSE2-SSSE3-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000640; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000641; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
642; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
643; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
644; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
645; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
646; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
647; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
648; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
649; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
650; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
651; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
652; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
653; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
654; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
655; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
656; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
657; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
658; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
659; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000660; SSE2-SSSE3-NEXT: retq
661;
662; AVX1-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000663; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000664; AVX1-NEXT: vmovq %rdi, %xmm0
665; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
666; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
667; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
668; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
669; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
670; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
671; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
672; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
673; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
674; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
675; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
676; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
677; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
678; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
679; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
680; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
681; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
682; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
683; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
684; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
685; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
686; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
687; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
688; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
689; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
690; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
691; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000692; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000693; AVX1-NEXT: retq
694;
695; AVX2-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000696; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000697; AVX2-NEXT: vmovq %rdi, %xmm0
698; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
699; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
700; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
701; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
702; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
703; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
704; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
705; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
706; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
707; AVX2-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
708; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
709; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
710; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
711; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
712; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
713; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000714; AVX2-NEXT: retq
715;
716; AVX512-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000717; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000718; AVX512-NEXT: kmovq %rdi, %k0
719; AVX512-NEXT: vpmovm2b %k0, %zmm0
720; AVX512-NEXT: retq
721 %1 = bitcast i64 %a0 to <64 x i1>
722 %2 = sext <64 x i1> %1 to <64 x i8>
723 ret <64 x i8> %2
724}