blob: 76f3280a5dd925328d21ae0ab720ff6d02b49f82 [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
Zvi Rackover72b0bb12018-01-09 16:26:06 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST
Craig Topper5befc5b2017-11-28 01:36:31 +00007; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
Zvi Rackover72b0bb12018-01-09 16:26:06 +00008; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00009
10;
11; 128-bit vectors
12;
13
14define <2 x i64> @ext_i2_2i64(i2 %a0) {
15; SSE2-SSSE3-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000016; SSE2-SSSE3: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000017; SSE2-SSSE3-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000018; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
19; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
20; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
21; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
22; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
23; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
24; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
25; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000026; SSE2-SSSE3-NEXT: retq
27;
Simon Pilgrima705db92017-09-24 13:42:31 +000028; AVX1-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000029; AVX1: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000030; AVX1-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000031; AVX1-NEXT: vmovq %rdi, %xmm0
32; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
33; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
34; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
36; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
37; AVX1-NEXT: retq
38;
39; AVX2-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000040; AVX2: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000041; AVX2-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000042; AVX2-NEXT: vmovq %rdi, %xmm0
43; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
44; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
45; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
46; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
47; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0
48; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000049;
Craig Topper5befc5b2017-11-28 01:36:31 +000050; AVX512F-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000051; AVX512F: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +000052; AVX512F-NEXT: kmovw %edi, %k1
Craig Topper5befc5b2017-11-28 01:36:31 +000053; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000054; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +000055; AVX512F-NEXT: vzeroupper
56; AVX512F-NEXT: retq
57;
58; AVX512VLBW-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000059; AVX512VLBW: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +000060; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +000061; AVX512VLBW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
Craig Topper5befc5b2017-11-28 01:36:31 +000062; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000063 %1 = bitcast i2 %a0 to <2 x i1>
64 %2 = zext <2 x i1> %1 to <2 x i64>
65 ret <2 x i64> %2
66}
67
68define <4 x i32> @ext_i4_4i32(i4 %a0) {
69; SSE2-SSSE3-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000070; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000071; SSE2-SSSE3-NEXT: movd %edi, %xmm0
72; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
73; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
74; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
75; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
76; SSE2-SSSE3-NEXT: psrld $31, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000077; SSE2-SSSE3-NEXT: retq
78;
79; AVX1-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000080; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000081; AVX1-NEXT: vmovd %edi, %xmm0
82; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
83; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
84; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
85; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
86; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000087; AVX1-NEXT: retq
88;
89; AVX2-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000090; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000091; AVX2-NEXT: vmovd %edi, %xmm0
92; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
93; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000094; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +000095; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
96; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000097; AVX2-NEXT: retq
98;
Craig Topper5befc5b2017-11-28 01:36:31 +000099; AVX512F-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000100; AVX512F: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000101; AVX512F-NEXT: kmovw %edi, %k1
Craig Topper276c7702017-12-05 01:45:46 +0000102; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000103; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000104; AVX512F-NEXT: vzeroupper
105; AVX512F-NEXT: retq
106;
107; AVX512VLBW-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000108; AVX512VLBW: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000109; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +0000110; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
Craig Topper5befc5b2017-11-28 01:36:31 +0000111; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000112 %1 = bitcast i4 %a0 to <4 x i1>
113 %2 = zext <4 x i1> %1 to <4 x i32>
114 ret <4 x i32> %2
115}
116
117define <8 x i16> @ext_i8_8i16(i8 %a0) {
118; SSE2-SSSE3-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000119; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000120; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000121; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
122; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000123; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
124; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
125; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
126; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000127; SSE2-SSSE3-NEXT: retq
128;
Simon Pilgrima705db92017-09-24 13:42:31 +0000129; AVX1-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000130; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000131; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000132; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
133; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000134; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
135; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
136; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
137; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
138; AVX1-NEXT: retq
139;
140; AVX2-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000141; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000142; AVX2-NEXT: vmovd %edi, %xmm0
143; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
144; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
145; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
146; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
147; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
148; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000149;
Craig Topper5befc5b2017-11-28 01:36:31 +0000150; AVX512F-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000151; AVX512F: # %bb.0:
Craig Topperddbc3402017-11-28 01:36:33 +0000152; AVX512F-NEXT: kmovw %edi, %k1
Craig Topper276c7702017-12-05 01:45:46 +0000153; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
154; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000155; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
Craig Topperddbc3402017-11-28 01:36:33 +0000156; AVX512F-NEXT: vzeroupper
Craig Topper5befc5b2017-11-28 01:36:31 +0000157; AVX512F-NEXT: retq
158;
159; AVX512VLBW-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000160; AVX512VLBW: # %bb.0:
Craig Topperddbc3402017-11-28 01:36:33 +0000161; AVX512VLBW-NEXT: kmovd %edi, %k1
162; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %xmm0 {%k1} {z}
Craig Topper5befc5b2017-11-28 01:36:31 +0000163; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000164 %1 = bitcast i8 %a0 to <8 x i1>
165 %2 = zext <8 x i1> %1 to <8 x i16>
166 ret <8 x i16> %2
167}
168
169define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000170; SSE2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000171; SSE2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000172; SSE2-NEXT: movd %edi, %xmm0
173; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
174; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
175; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
176; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
177; SSE2-NEXT: pand %xmm1, %xmm0
178; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
179; SSE2-NEXT: psrlw $7, %xmm0
180; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
181; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000182;
Simon Pilgrima705db92017-09-24 13:42:31 +0000183; SSSE3-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000184; SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000185; SSSE3-NEXT: movd %edi, %xmm0
186; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
187; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
188; SSSE3-NEXT: pand %xmm1, %xmm0
189; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
190; SSSE3-NEXT: psrlw $7, %xmm0
191; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
192; SSSE3-NEXT: retq
193;
194; AVX1-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000195; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000196; AVX1-NEXT: vmovd %edi, %xmm0
197; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
198; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
199; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
200; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
201; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
202; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
203; AVX1-NEXT: retq
204;
205; AVX2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000206; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000207; AVX2-NEXT: vmovd %edi, %xmm0
208; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
209; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
210; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
211; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
212; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
213; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
214; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000215;
Craig Topper5befc5b2017-11-28 01:36:31 +0000216; AVX512F-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000217; AVX512F: # %bb.0:
Craig Topperddbc3402017-11-28 01:36:33 +0000218; AVX512F-NEXT: kmovw %edi, %k1
219; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
220; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
221; AVX512F-NEXT: vzeroupper
Craig Topper5befc5b2017-11-28 01:36:31 +0000222; AVX512F-NEXT: retq
223;
224; AVX512VLBW-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000225; AVX512VLBW: # %bb.0:
Craig Topperddbc3402017-11-28 01:36:33 +0000226; AVX512VLBW-NEXT: kmovd %edi, %k1
227; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %xmm0 {%k1} {z}
Craig Topper5befc5b2017-11-28 01:36:31 +0000228; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000229 %1 = bitcast i16 %a0 to <16 x i1>
230 %2 = zext <16 x i1> %1 to <16 x i8>
231 ret <16 x i8> %2
232}
233
234;
235; 256-bit vectors
236;
237
238define <4 x i64> @ext_i4_4i64(i4 %a0) {
239; SSE2-SSSE3-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000240; SSE2-SSSE3: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000241; SSE2-SSSE3-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000242; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
243; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
244; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
245; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
246; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
247; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
248; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
249; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
250; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
251; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
252; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
253; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
254; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
255; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
256; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000257; SSE2-SSSE3-NEXT: retq
258;
259; AVX1-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000260; AVX1: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000261; AVX1-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000262; AVX1-NEXT: vmovq %rdi, %xmm0
263; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
264; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
265; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
266; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
267; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2
268; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
269; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
270; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
271; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
272; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
273; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
274; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
275; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000276; AVX1-NEXT: retq
277;
278; AVX2-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000279; AVX2: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000280; AVX2-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000281; AVX2-NEXT: vmovq %rdi, %xmm0
282; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
283; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
284; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
285; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
286; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000287; AVX2-NEXT: retq
288;
Craig Topper5befc5b2017-11-28 01:36:31 +0000289; AVX512F-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000290; AVX512F: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000291; AVX512F-NEXT: kmovw %edi, %k1
Craig Topper5befc5b2017-11-28 01:36:31 +0000292; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000293; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000294; AVX512F-NEXT: retq
295;
296; AVX512VLBW-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000297; AVX512VLBW: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000298; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +0000299; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k1} {z}
Craig Topper5befc5b2017-11-28 01:36:31 +0000300; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000301 %1 = bitcast i4 %a0 to <4 x i1>
302 %2 = zext <4 x i1> %1 to <4 x i64>
303 ret <4 x i64> %2
304}
305
306define <8 x i32> @ext_i8_8i32(i8 %a0) {
307; SSE2-SSSE3-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000308; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000309; SSE2-SSSE3-NEXT: movd %edi, %xmm0
310; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
311; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000312; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000313; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000314; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
315; SSE2-SSSE3-NEXT: psrld $31, %xmm0
316; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000317; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000318; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
319; SSE2-SSSE3-NEXT: psrld $31, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000320; SSE2-SSSE3-NEXT: retq
321;
322; AVX1-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000323; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000324; AVX1-NEXT: vmovd %edi, %xmm0
325; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
326; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000327; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000328; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
329; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2
330; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
331; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
332; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
333; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
334; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
335; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
336; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
337; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000338; AVX1-NEXT: retq
339;
340; AVX2-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000341; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000342; AVX2-NEXT: vmovd %edi, %xmm0
343; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
344; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000345; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000346; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
347; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000348; AVX2-NEXT: retq
349;
Craig Topper5befc5b2017-11-28 01:36:31 +0000350; AVX512F-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000351; AVX512F: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000352; AVX512F-NEXT: kmovw %edi, %k1
Craig Topper276c7702017-12-05 01:45:46 +0000353; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000354; AVX512F-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000355; AVX512F-NEXT: retq
356;
357; AVX512VLBW-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000358; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000359; AVX512VLBW-NEXT: kmovd %edi, %k1
360; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
361; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000362 %1 = bitcast i8 %a0 to <8 x i1>
363 %2 = zext <8 x i1> %1 to <8 x i32>
364 ret <8 x i32> %2
365}
366
367define <16 x i16> @ext_i16_16i16(i16 %a0) {
368; SSE2-SSSE3-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000369; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000370; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000371; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
372; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000373; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000374; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000375; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000376; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
377; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
378; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000379; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000380; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
381; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000382; SSE2-SSSE3-NEXT: retq
383;
384; AVX1-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000385; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000386; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000387; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
388; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000389; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
390; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
391; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
392; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
393; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
394; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
395; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
396; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
397; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
398; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
399; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
400; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000401; AVX1-NEXT: retq
402;
403; AVX2-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000404; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000405; AVX2-NEXT: vmovd %edi, %xmm0
406; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
407; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
408; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
409; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
410; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000411; AVX2-NEXT: retq
412;
Craig Topper5befc5b2017-11-28 01:36:31 +0000413; AVX512F-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000414; AVX512F: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000415; AVX512F-NEXT: kmovw %edi, %k1
416; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
417; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
418; AVX512F-NEXT: retq
419;
420; AVX512VLBW-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000421; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000422; AVX512VLBW-NEXT: kmovd %edi, %k1
423; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z}
424; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000425 %1 = bitcast i16 %a0 to <16 x i1>
426 %2 = zext <16 x i1> %1 to <16 x i16>
427 ret <16 x i16> %2
428}
429
430define <32 x i8> @ext_i32_32i8(i32 %a0) {
431; SSE2-SSSE3-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000432; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000433; SSE2-SSSE3-NEXT: movd %edi, %xmm1
434; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
435; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
436; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
437; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
438; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
439; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
440; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
441; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
442; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
443; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
444; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
445; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
446; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
447; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
448; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000449; SSE2-SSSE3-NEXT: retq
450;
451; AVX1-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000452; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000453; AVX1-NEXT: vmovd %edi, %xmm0
454; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
455; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
456; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
457; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
458; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000459; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000460; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
461; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
462; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
463; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
464; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
465; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
466; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
467; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
468; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
469; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
470; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
471; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
472; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
473; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000474; AVX1-NEXT: retq
475;
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000476; AVX2-SLOW-LABEL: ext_i32_32i8:
477; AVX2-SLOW: # %bb.0:
478; AVX2-SLOW-NEXT: vmovd %edi, %xmm0
479; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
480; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
481; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
482; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
483; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
484; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
485; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
486; AVX2-SLOW-NEXT: vpand %ymm1, %ymm0, %ymm0
487; AVX2-SLOW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
488; AVX2-SLOW-NEXT: vpsrlw $7, %ymm0, %ymm0
489; AVX2-SLOW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
490; AVX2-SLOW-NEXT: retq
491;
492; AVX2-FAST-LABEL: ext_i32_32i8:
493; AVX2-FAST: # %bb.0:
494; AVX2-FAST-NEXT: vmovd %edi, %xmm0
495; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
496; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
497; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
498; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
499; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
500; AVX2-FAST-NEXT: vpand %ymm1, %ymm0, %ymm0
501; AVX2-FAST-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
502; AVX2-FAST-NEXT: vpsrlw $7, %ymm0, %ymm0
503; AVX2-FAST-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
504; AVX2-FAST-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000505;
Craig Topper5befc5b2017-11-28 01:36:31 +0000506; AVX512F-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000507; AVX512F: # %bb.0:
Craig Topper76adcc82018-01-23 14:25:39 +0000508; AVX512F-NEXT: kmovw %edi, %k1
509; AVX512F-NEXT: shrl $16, %edi
510; AVX512F-NEXT: kmovw %edi, %k2
Craig Topperddbc3402017-11-28 01:36:33 +0000511; AVX512F-NEXT: movl {{.*}}(%rip), %eax
Craig Topperddbc3402017-11-28 01:36:33 +0000512; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
513; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
514; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z}
515; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
516; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000517; AVX512F-NEXT: retq
518;
519; AVX512VLBW-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000520; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000521; AVX512VLBW-NEXT: kmovd %edi, %k1
522; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
523; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000524 %1 = bitcast i32 %a0 to <32 x i1>
525 %2 = zext <32 x i1> %1 to <32 x i8>
526 ret <32 x i8> %2
527}
528
529;
530; 512-bit vectors
531;
532
533define <8 x i64> @ext_i8_8i64(i8 %a0) {
534; SSE2-SSSE3-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000535; SSE2-SSSE3: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000536; SSE2-SSSE3-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000537; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
538; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
539; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
540; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
541; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
542; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
543; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
544; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
545; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
546; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
547; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
548; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
549; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
550; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
551; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
552; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
553; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
554; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
555; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
556; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
557; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
558; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
559; SSE2-SSSE3-NEXT: psrlq $63, %xmm2
560; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
561; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
562; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
563; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000564; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000565; SSE2-SSSE3-NEXT: psrlq $63, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000566; SSE2-SSSE3-NEXT: retq
567;
568; AVX1-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000569; AVX1: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000570; AVX1-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000571; AVX1-NEXT: vmovq %rdi, %xmm0
572; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
573; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
574; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
575; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
576; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm3
577; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
578; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
579; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
580; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
581; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
582; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
583; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
584; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
585; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
586; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm3
587; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
588; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
589; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
590; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
591; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
592; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000593; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000594; AVX1-NEXT: retq
595;
596; AVX2-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000597; AVX2: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000598; AVX2-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000599; AVX2-NEXT: vmovq %rdi, %xmm0
600; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
601; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
602; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
603; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
604; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
605; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000606; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000607; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
608; AVX2-NEXT: vpsrlq $63, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000609; AVX2-NEXT: retq
610;
Craig Topper5befc5b2017-11-28 01:36:31 +0000611; AVX512F-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000612; AVX512F: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000613; AVX512F-NEXT: kmovw %edi, %k1
614; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
615; AVX512F-NEXT: retq
616;
617; AVX512VLBW-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000618; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000619; AVX512VLBW-NEXT: kmovd %edi, %k1
620; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
621; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000622 %1 = bitcast i8 %a0 to <8 x i1>
623 %2 = zext <8 x i1> %1 to <8 x i64>
624 ret <8 x i64> %2
625}
626
627define <16 x i32> @ext_i16_16i32(i16 %a0) {
628; SSE2-SSSE3-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000629; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000630; SSE2-SSSE3-NEXT: movd %edi, %xmm0
631; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
632; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
633; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
634; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
635; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
636; SSE2-SSSE3-NEXT: psrld $31, %xmm0
637; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000638; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000639; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
640; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
641; SSE2-SSSE3-NEXT: psrld $31, %xmm1
642; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000643; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000644; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000645; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
646; SSE2-SSSE3-NEXT: psrld $31, %xmm2
647; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000648; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000649; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
650; SSE2-SSSE3-NEXT: psrld $31, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000651; SSE2-SSSE3-NEXT: retq
652;
653; AVX1-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000654; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000655; AVX1-NEXT: vmovd %edi, %xmm0
656; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
657; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
658; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
659; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
660; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm3
661; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
662; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
663; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
664; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
665; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
666; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
667; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
668; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
669; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
670; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm3
671; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
672; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
673; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
674; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
675; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
676; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
677; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000678; AVX1-NEXT: retq
679;
680; AVX2-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000681; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000682; AVX2-NEXT: vmovd %edi, %xmm0
683; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
684; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
685; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
686; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
687; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
688; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000689; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000690; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
691; AVX2-NEXT: vpsrld $31, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000692; AVX2-NEXT: retq
693;
Craig Topper5befc5b2017-11-28 01:36:31 +0000694; AVX512F-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000695; AVX512F: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000696; AVX512F-NEXT: kmovw %edi, %k1
697; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
698; AVX512F-NEXT: retq
699;
700; AVX512VLBW-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000701; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000702; AVX512VLBW-NEXT: kmovd %edi, %k1
703; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
704; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000705 %1 = bitcast i16 %a0 to <16 x i1>
706 %2 = zext <16 x i1> %1 to <16 x i32>
707 ret <16 x i32> %2
708}
709
710define <32 x i16> @ext_i32_32i16(i32 %a0) {
711; SSE2-SSSE3-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000712; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000713; SSE2-SSSE3-NEXT: movd %edi, %xmm2
Simon Pilgrimc7015962017-12-29 14:41:50 +0000714; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
715; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000716; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000717; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000718; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000719; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
720; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
721; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
722; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
723; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
724; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000725; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
726; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000727; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000728; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000729; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
730; SSE2-SSSE3-NEXT: psrlw $15, %xmm2
731; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
732; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
733; SSE2-SSSE3-NEXT: psrlw $15, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000734; SSE2-SSSE3-NEXT: retq
735;
736; AVX1-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000737; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000738; AVX1-NEXT: vmovd %edi, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000739; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
740; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000741; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
742; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000743; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000744; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
745; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4
746; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
747; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
748; AVX1-NEXT: vpsrlw $15, %xmm4, %xmm4
749; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
750; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
751; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
752; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
753; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000754; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
755; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000756; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000757; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000758; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm2
759; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
760; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
761; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
762; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
763; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
764; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
765; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000766; AVX1-NEXT: retq
767;
768; AVX2-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000769; AVX2: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000770; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000771; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
772; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
773; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
774; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
775; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
776; AVX2-NEXT: shrl $16, %edi
777; AVX2-NEXT: vmovd %edi, %xmm2
778; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
779; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
780; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
781; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000782; AVX2-NEXT: retq
783;
Craig Topper5befc5b2017-11-28 01:36:31 +0000784; AVX512F-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000785; AVX512F: # %bb.0:
Craig Topper76adcc82018-01-23 14:25:39 +0000786; AVX512F-NEXT: kmovw %edi, %k1
787; AVX512F-NEXT: shrl $16, %edi
788; AVX512F-NEXT: kmovw %edi, %k2
789; AVX512F-NEXT: movl {{.*}}(%rip), %eax
790; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
791; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
792; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z}
793; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
Craig Topper5befc5b2017-11-28 01:36:31 +0000794; AVX512F-NEXT: retq
795;
796; AVX512VLBW-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000797; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000798; AVX512VLBW-NEXT: kmovd %edi, %k1
799; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z}
800; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000801 %1 = bitcast i32 %a0 to <32 x i1>
802 %2 = zext <32 x i1> %1 to <32 x i16>
803 ret <32 x i16> %2
804}
805
806define <64 x i8> @ext_i64_64i8(i64 %a0) {
807; SSE2-SSSE3-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000808; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000809; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
810; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
811; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
812; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
813; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
814; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
815; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
816; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
817; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
818; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
819; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
820; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
821; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
822; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
823; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
824; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
825; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
826; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
827; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
828; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
829; SSE2-SSSE3-NEXT: psrlw $7, %xmm2
830; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2
831; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
832; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
833; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
834; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
835; SSE2-SSSE3-NEXT: psrlw $7, %xmm3
836; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000837; SSE2-SSSE3-NEXT: retq
838;
839; AVX1-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000840; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000841; AVX1-NEXT: vmovq %rdi, %xmm0
842; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
843; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
844; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
845; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
846; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
847; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
848; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
849; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
850; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
851; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
852; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
853; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
854; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
855; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
856; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
857; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
858; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
859; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
860; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
861; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
862; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
863; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
864; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
865; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
866; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
867; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
868; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
869; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
870; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
871; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
872; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm2
873; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
874; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
875; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
876; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
877; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
878; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000879; AVX1-NEXT: retq
880;
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000881; AVX2-SLOW-LABEL: ext_i64_64i8:
882; AVX2-SLOW: # %bb.0:
883; AVX2-SLOW-NEXT: vmovq %rdi, %xmm0
884; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
885; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
886; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
887; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
888; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
889; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
890; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
891; AVX2-SLOW-NEXT: vpand %ymm2, %ymm0, %ymm0
892; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
893; AVX2-SLOW-NEXT: vpsrlw $7, %ymm0, %ymm0
894; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
895; AVX2-SLOW-NEXT: vpand %ymm3, %ymm0, %ymm0
896; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5]
897; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
898; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
899; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
900; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm4, %ymm1
901; AVX2-SLOW-NEXT: vpand %ymm2, %ymm1, %ymm1
902; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
903; AVX2-SLOW-NEXT: vpsrlw $7, %ymm1, %ymm1
904; AVX2-SLOW-NEXT: vpand %ymm3, %ymm1, %ymm1
905; AVX2-SLOW-NEXT: retq
906;
907; AVX2-FAST-LABEL: ext_i64_64i8:
908; AVX2-FAST: # %bb.0:
909; AVX2-FAST-NEXT: vmovq %rdi, %xmm0
910; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
911; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
912; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm1[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
913; AVX2-FAST-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
914; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
915; AVX2-FAST-NEXT: vpand %ymm2, %ymm0, %ymm0
916; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
917; AVX2-FAST-NEXT: vpsrlw $7, %ymm0, %ymm0
918; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
919; AVX2-FAST-NEXT: vpand %ymm3, %ymm0, %ymm0
920; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,9,8,9,8,9,8,9,10,11,10,11,10,11,10,11]
921; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,12,13,12,13,14,15,14,15,14,15,14,15]
922; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm4, %ymm1
923; AVX2-FAST-NEXT: vpand %ymm2, %ymm1, %ymm1
924; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
925; AVX2-FAST-NEXT: vpsrlw $7, %ymm1, %ymm1
926; AVX2-FAST-NEXT: vpand %ymm3, %ymm1, %ymm1
927; AVX2-FAST-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000928;
Craig Topper5befc5b2017-11-28 01:36:31 +0000929; AVX512F-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000930; AVX512F: # %bb.0:
Craig Topper76adcc82018-01-23 14:25:39 +0000931; AVX512F-NEXT: movq %rdi, %rax
932; AVX512F-NEXT: movq %rdi, %rcx
933; AVX512F-NEXT: kmovw %edi, %k1
934; AVX512F-NEXT: movl %edi, %edx
935; AVX512F-NEXT: shrl $16, %edx
936; AVX512F-NEXT: shrq $32, %rax
937; AVX512F-NEXT: shrq $48, %rcx
938; AVX512F-NEXT: kmovw %ecx, %k2
939; AVX512F-NEXT: kmovw %eax, %k3
940; AVX512F-NEXT: kmovw %edx, %k4
Craig Topperddbc3402017-11-28 01:36:33 +0000941; AVX512F-NEXT: movl {{.*}}(%rip), %eax
Craig Topperddbc3402017-11-28 01:36:33 +0000942; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
943; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
Craig Topper76adcc82018-01-23 14:25:39 +0000944; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k4} {z}
Craig Topperddbc3402017-11-28 01:36:33 +0000945; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
946; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
Craig Topper76adcc82018-01-23 14:25:39 +0000947; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k3} {z}
Craig Topperddbc3402017-11-28 01:36:33 +0000948; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
949; AVX512F-NEXT: vpbroadcastd %eax, %zmm2 {%k2} {z}
950; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
951; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
Craig Topper5befc5b2017-11-28 01:36:31 +0000952; AVX512F-NEXT: retq
953;
954; AVX512VLBW-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000955; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000956; AVX512VLBW-NEXT: kmovq %rdi, %k1
957; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
958; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000959 %1 = bitcast i64 %a0 to <64 x i1>
960 %2 = zext <64 x i1> %1 to <64 x i8>
961 ret <64 x i8> %2
962}