blob: 6cd52c4d25c459b35f4865ef312e9b862318e54f [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
Zvi Rackover72b0bb12018-01-09 16:26:06 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST
Craig Topper5befc5b2017-11-28 01:36:31 +00007; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
Zvi Rackover72b0bb12018-01-09 16:26:06 +00008; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00009
10;
11; 128-bit vectors
12;
13
14define <2 x i64> @ext_i2_2i64(i2 %a0) {
15; SSE2-SSSE3-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000016; SSE2-SSSE3: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000017; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000018; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
19; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
20; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
21; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
22; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
23; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
24; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
25; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000026; SSE2-SSSE3-NEXT: retq
27;
Simon Pilgrima705db92017-09-24 13:42:31 +000028; AVX1-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000029; AVX1: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000030; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000031; AVX1-NEXT: vmovq %rdi, %xmm0
32; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
33; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
34; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
36; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
37; AVX1-NEXT: retq
38;
39; AVX2-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000040; AVX2: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000041; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000042; AVX2-NEXT: vmovq %rdi, %xmm0
43; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
44; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
45; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
46; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
47; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0
48; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000049;
Craig Topper5befc5b2017-11-28 01:36:31 +000050; AVX512F-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000051; AVX512F: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +000052; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +000053; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
54; AVX512F-NEXT: vpsrlq $63, %xmm0, %xmm0
Craig Topper5befc5b2017-11-28 01:36:31 +000055; AVX512F-NEXT: vzeroupper
56; AVX512F-NEXT: retq
57;
58; AVX512VLBW-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000059; AVX512VLBW: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +000060; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +000061; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
62; AVX512VLBW-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
63; AVX512VLBW-NEXT: vpsrlq $63, %xmm0, %xmm0
Craig Topper5befc5b2017-11-28 01:36:31 +000064; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000065 %1 = bitcast i2 %a0 to <2 x i1>
66 %2 = zext <2 x i1> %1 to <2 x i64>
67 ret <2 x i64> %2
68}
69
70define <4 x i32> @ext_i4_4i32(i4 %a0) {
71; SSE2-SSSE3-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000072; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000073; SSE2-SSSE3-NEXT: movd %edi, %xmm0
74; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
75; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
76; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
77; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
78; SSE2-SSSE3-NEXT: psrld $31, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000079; SSE2-SSSE3-NEXT: retq
80;
81; AVX1-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000082; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000083; AVX1-NEXT: vmovd %edi, %xmm0
84; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
85; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
86; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
87; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
88; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000089; AVX1-NEXT: retq
90;
91; AVX2-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000092; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000093; AVX2-NEXT: vmovd %edi, %xmm0
94; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
95; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000096; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +000097; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
98; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000099; AVX2-NEXT: retq
100;
Craig Topper5befc5b2017-11-28 01:36:31 +0000101; AVX512F-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000102; AVX512F: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000103; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000104; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
105; AVX512F-NEXT: vpsrld $31, %xmm0, %xmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000106; AVX512F-NEXT: vzeroupper
107; AVX512F-NEXT: retq
108;
109; AVX512VLBW-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000110; AVX512VLBW: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000111; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000112; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
113; AVX512VLBW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
114; AVX512VLBW-NEXT: vpsrld $31, %xmm0, %xmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000115; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000116 %1 = bitcast i4 %a0 to <4 x i1>
117 %2 = zext <4 x i1> %1 to <4 x i32>
118 ret <4 x i32> %2
119}
120
121define <8 x i16> @ext_i8_8i16(i8 %a0) {
122; SSE2-SSSE3-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000123; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000124; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000125; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
126; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000127; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
128; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
129; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
130; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000131; SSE2-SSSE3-NEXT: retq
132;
Simon Pilgrima705db92017-09-24 13:42:31 +0000133; AVX1-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000134; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000135; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000136; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
137; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000138; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
139; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
140; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
141; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
142; AVX1-NEXT: retq
143;
144; AVX2-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000145; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000146; AVX2-NEXT: vmovd %edi, %xmm0
147; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
148; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
149; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
150; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
151; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
152; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000153;
Craig Topper5befc5b2017-11-28 01:36:31 +0000154; AVX512F-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000155; AVX512F: # %bb.0:
Craig Topperddbc3402017-11-28 01:36:33 +0000156; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000157; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
Craig Topper276c7702017-12-05 01:45:46 +0000158; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Craig Topperc3aab4b2018-02-10 08:06:52 +0000159; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm0
Craig Topperddbc3402017-11-28 01:36:33 +0000160; AVX512F-NEXT: vzeroupper
Craig Topper5befc5b2017-11-28 01:36:31 +0000161; AVX512F-NEXT: retq
162;
163; AVX512VLBW-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000164; AVX512VLBW: # %bb.0:
Craig Topperc3aab4b2018-02-10 08:06:52 +0000165; AVX512VLBW-NEXT: kmovd %edi, %k0
166; AVX512VLBW-NEXT: vpmovm2w %k0, %xmm0
167; AVX512VLBW-NEXT: vpsrlw $15, %xmm0, %xmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000168; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000169 %1 = bitcast i8 %a0 to <8 x i1>
170 %2 = zext <8 x i1> %1 to <8 x i16>
171 ret <8 x i16> %2
172}
173
174define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000175; SSE2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000176; SSE2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000177; SSE2-NEXT: movd %edi, %xmm0
178; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
179; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
180; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
181; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
182; SSE2-NEXT: pand %xmm1, %xmm0
183; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
184; SSE2-NEXT: psrlw $7, %xmm0
185; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
186; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000187;
Simon Pilgrima705db92017-09-24 13:42:31 +0000188; SSSE3-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000189; SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000190; SSSE3-NEXT: movd %edi, %xmm0
191; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
192; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
193; SSSE3-NEXT: pand %xmm1, %xmm0
194; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
195; SSSE3-NEXT: psrlw $7, %xmm0
196; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
197; SSSE3-NEXT: retq
198;
199; AVX1-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000200; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000201; AVX1-NEXT: vmovd %edi, %xmm0
202; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
203; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
204; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
205; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
206; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
207; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
208; AVX1-NEXT: retq
209;
210; AVX2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000211; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000212; AVX2-NEXT: vmovd %edi, %xmm0
213; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
214; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
215; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
216; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
217; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
218; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
219; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000220;
Craig Topper5befc5b2017-11-28 01:36:31 +0000221; AVX512F-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000222; AVX512F: # %bb.0:
Craig Topperddbc3402017-11-28 01:36:33 +0000223; AVX512F-NEXT: kmovw %edi, %k1
224; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
225; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
226; AVX512F-NEXT: vzeroupper
Craig Topper5befc5b2017-11-28 01:36:31 +0000227; AVX512F-NEXT: retq
228;
229; AVX512VLBW-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000230; AVX512VLBW: # %bb.0:
Craig Topperddbc3402017-11-28 01:36:33 +0000231; AVX512VLBW-NEXT: kmovd %edi, %k1
232; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %xmm0 {%k1} {z}
Craig Topper5befc5b2017-11-28 01:36:31 +0000233; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000234 %1 = bitcast i16 %a0 to <16 x i1>
235 %2 = zext <16 x i1> %1 to <16 x i8>
236 ret <16 x i8> %2
237}
238
239;
240; 256-bit vectors
241;
242
243define <4 x i64> @ext_i4_4i64(i4 %a0) {
244; SSE2-SSSE3-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000245; SSE2-SSSE3: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000246; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000247; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
248; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
249; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
250; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
251; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
252; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
253; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
254; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
255; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
256; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
257; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
258; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
259; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
260; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
261; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000262; SSE2-SSSE3-NEXT: retq
263;
264; AVX1-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000265; AVX1: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000266; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000267; AVX1-NEXT: vmovq %rdi, %xmm0
268; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
269; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
270; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
271; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
272; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2
273; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
274; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
275; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
276; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
277; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
278; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
279; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
280; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000281; AVX1-NEXT: retq
282;
283; AVX2-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000284; AVX2: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000285; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000286; AVX2-NEXT: vmovq %rdi, %xmm0
287; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
288; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
289; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
290; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
291; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000292; AVX2-NEXT: retq
293;
Craig Topper5befc5b2017-11-28 01:36:31 +0000294; AVX512F-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000295; AVX512F: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000296; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000297; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
298; AVX512F-NEXT: vpsrlq $63, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000299; AVX512F-NEXT: retq
300;
301; AVX512VLBW-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000302; AVX512VLBW: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000303; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000304; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
305; AVX512VLBW-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
306; AVX512VLBW-NEXT: vpsrlq $63, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000307; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000308 %1 = bitcast i4 %a0 to <4 x i1>
309 %2 = zext <4 x i1> %1 to <4 x i64>
310 ret <4 x i64> %2
311}
312
313define <8 x i32> @ext_i8_8i32(i8 %a0) {
314; SSE2-SSSE3-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000315; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000316; SSE2-SSSE3-NEXT: movd %edi, %xmm0
317; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
318; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000319; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000320; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000321; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
322; SSE2-SSSE3-NEXT: psrld $31, %xmm0
323; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000324; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000325; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
326; SSE2-SSSE3-NEXT: psrld $31, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000327; SSE2-SSSE3-NEXT: retq
328;
329; AVX1-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000330; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000331; AVX1-NEXT: vmovd %edi, %xmm0
332; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
333; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000334; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000335; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
336; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2
337; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
338; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
339; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
340; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
341; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
342; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
343; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
344; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000345; AVX1-NEXT: retq
346;
347; AVX2-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000348; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000349; AVX2-NEXT: vmovd %edi, %xmm0
350; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
351; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000352; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000353; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
354; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000355; AVX2-NEXT: retq
356;
Craig Topper5befc5b2017-11-28 01:36:31 +0000357; AVX512F-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000358; AVX512F: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000359; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000360; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
361; AVX512F-NEXT: vpsrld $31, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000362; AVX512F-NEXT: retq
363;
364; AVX512VLBW-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000365; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000366; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000367; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
368; AVX512VLBW-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
369; AVX512VLBW-NEXT: vpsrld $31, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000370; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000371 %1 = bitcast i8 %a0 to <8 x i1>
372 %2 = zext <8 x i1> %1 to <8 x i32>
373 ret <8 x i32> %2
374}
375
376define <16 x i16> @ext_i16_16i16(i16 %a0) {
377; SSE2-SSSE3-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000378; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000379; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000380; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
381; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000382; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000383; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000384; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000385; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
386; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
387; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000388; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000389; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
390; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000391; SSE2-SSSE3-NEXT: retq
392;
393; AVX1-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000394; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000395; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000396; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
397; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000398; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
399; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
400; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
401; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
402; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
403; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
404; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
405; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
406; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
407; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
408; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
409; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000410; AVX1-NEXT: retq
411;
412; AVX2-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000413; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000414; AVX2-NEXT: vmovd %edi, %xmm0
415; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
416; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
417; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
418; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
419; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000420; AVX2-NEXT: retq
421;
Craig Topper5befc5b2017-11-28 01:36:31 +0000422; AVX512F-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000423; AVX512F: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000424; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000425; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
Craig Topper5befc5b2017-11-28 01:36:31 +0000426; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Craig Topperc3aab4b2018-02-10 08:06:52 +0000427; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000428; AVX512F-NEXT: retq
429;
430; AVX512VLBW-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000431; AVX512VLBW: # %bb.0:
Craig Topperc3aab4b2018-02-10 08:06:52 +0000432; AVX512VLBW-NEXT: kmovd %edi, %k0
433; AVX512VLBW-NEXT: vpmovm2w %k0, %ymm0
434; AVX512VLBW-NEXT: vpsrlw $15, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000435; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000436 %1 = bitcast i16 %a0 to <16 x i1>
437 %2 = zext <16 x i1> %1 to <16 x i16>
438 ret <16 x i16> %2
439}
440
441define <32 x i8> @ext_i32_32i8(i32 %a0) {
442; SSE2-SSSE3-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000443; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000444; SSE2-SSSE3-NEXT: movd %edi, %xmm1
445; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
446; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
447; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
448; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
449; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
450; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
451; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
452; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
453; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
454; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
455; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
456; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
457; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
458; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
459; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000460; SSE2-SSSE3-NEXT: retq
461;
462; AVX1-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000463; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000464; AVX1-NEXT: vmovd %edi, %xmm0
465; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
466; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
467; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
468; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
469; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000470; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000471; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
472; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
473; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
474; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
475; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
476; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
477; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
478; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
479; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
480; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
481; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
482; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
483; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
484; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000485; AVX1-NEXT: retq
486;
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000487; AVX2-SLOW-LABEL: ext_i32_32i8:
488; AVX2-SLOW: # %bb.0:
489; AVX2-SLOW-NEXT: vmovd %edi, %xmm0
490; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
491; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
492; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
493; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
494; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
495; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
496; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
497; AVX2-SLOW-NEXT: vpand %ymm1, %ymm0, %ymm0
498; AVX2-SLOW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
499; AVX2-SLOW-NEXT: vpsrlw $7, %ymm0, %ymm0
500; AVX2-SLOW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
501; AVX2-SLOW-NEXT: retq
502;
503; AVX2-FAST-LABEL: ext_i32_32i8:
504; AVX2-FAST: # %bb.0:
505; AVX2-FAST-NEXT: vmovd %edi, %xmm0
506; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
507; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
508; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
509; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
510; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
511; AVX2-FAST-NEXT: vpand %ymm1, %ymm0, %ymm0
512; AVX2-FAST-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
513; AVX2-FAST-NEXT: vpsrlw $7, %ymm0, %ymm0
514; AVX2-FAST-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
515; AVX2-FAST-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000516;
Craig Topper5befc5b2017-11-28 01:36:31 +0000517; AVX512F-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000518; AVX512F: # %bb.0:
Craig Topper76adcc82018-01-23 14:25:39 +0000519; AVX512F-NEXT: kmovw %edi, %k1
520; AVX512F-NEXT: shrl $16, %edi
521; AVX512F-NEXT: kmovw %edi, %k2
Craig Topperddbc3402017-11-28 01:36:33 +0000522; AVX512F-NEXT: movl {{.*}}(%rip), %eax
Craig Topperddbc3402017-11-28 01:36:33 +0000523; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
524; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
525; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z}
526; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
527; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000528; AVX512F-NEXT: retq
529;
530; AVX512VLBW-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000531; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000532; AVX512VLBW-NEXT: kmovd %edi, %k1
533; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
534; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000535 %1 = bitcast i32 %a0 to <32 x i1>
536 %2 = zext <32 x i1> %1 to <32 x i8>
537 ret <32 x i8> %2
538}
539
540;
541; 512-bit vectors
542;
543
544define <8 x i64> @ext_i8_8i64(i8 %a0) {
545; SSE2-SSSE3-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000546; SSE2-SSSE3: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000547; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000548; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
549; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
550; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
551; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
552; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
553; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
554; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
555; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
556; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
557; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
558; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
559; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
560; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
561; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
562; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
563; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
564; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
565; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
566; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
567; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
568; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
569; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
570; SSE2-SSSE3-NEXT: psrlq $63, %xmm2
571; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
572; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
573; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
574; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000575; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000576; SSE2-SSSE3-NEXT: psrlq $63, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000577; SSE2-SSSE3-NEXT: retq
578;
579; AVX1-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000580; AVX1: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000581; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000582; AVX1-NEXT: vmovq %rdi, %xmm0
583; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
584; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
585; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
586; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
587; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm3
588; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
589; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
590; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
591; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
592; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
593; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
594; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
595; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
596; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
597; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm3
598; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
599; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
600; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
601; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
602; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
603; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000604; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000605; AVX1-NEXT: retq
606;
607; AVX2-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000608; AVX2: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000609; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000610; AVX2-NEXT: vmovq %rdi, %xmm0
611; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
612; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
613; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
614; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
615; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
616; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000617; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000618; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
619; AVX2-NEXT: vpsrlq $63, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000620; AVX2-NEXT: retq
621;
Craig Topper5befc5b2017-11-28 01:36:31 +0000622; AVX512F-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000623; AVX512F: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000624; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000625; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
626; AVX512F-NEXT: vpsrlq $63, %zmm0, %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000627; AVX512F-NEXT: retq
628;
629; AVX512VLBW-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000630; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000631; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000632; AVX512VLBW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
633; AVX512VLBW-NEXT: vpsrlq $63, %zmm0, %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000634; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000635 %1 = bitcast i8 %a0 to <8 x i1>
636 %2 = zext <8 x i1> %1 to <8 x i64>
637 ret <8 x i64> %2
638}
639
640define <16 x i32> @ext_i16_16i32(i16 %a0) {
641; SSE2-SSSE3-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000642; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000643; SSE2-SSSE3-NEXT: movd %edi, %xmm0
644; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
645; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
646; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
647; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
648; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
649; SSE2-SSSE3-NEXT: psrld $31, %xmm0
650; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000651; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000652; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
653; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
654; SSE2-SSSE3-NEXT: psrld $31, %xmm1
655; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000656; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000657; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000658; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
659; SSE2-SSSE3-NEXT: psrld $31, %xmm2
660; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000661; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000662; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
663; SSE2-SSSE3-NEXT: psrld $31, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000664; SSE2-SSSE3-NEXT: retq
665;
666; AVX1-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000667; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000668; AVX1-NEXT: vmovd %edi, %xmm0
669; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
670; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
671; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
672; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
673; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm3
674; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
675; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
676; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
677; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
678; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
679; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
680; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
681; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
682; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
683; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm3
684; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
685; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
686; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
687; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
688; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
689; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
690; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000691; AVX1-NEXT: retq
692;
693; AVX2-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000694; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000695; AVX2-NEXT: vmovd %edi, %xmm0
696; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
697; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
698; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
699; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
700; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
701; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000702; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000703; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
704; AVX2-NEXT: vpsrld $31, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000705; AVX2-NEXT: retq
706;
Craig Topper5befc5b2017-11-28 01:36:31 +0000707; AVX512F-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000708; AVX512F: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000709; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000710; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
711; AVX512F-NEXT: vpsrld $31, %zmm0, %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000712; AVX512F-NEXT: retq
713;
714; AVX512VLBW-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000715; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000716; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000717; AVX512VLBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
718; AVX512VLBW-NEXT: vpsrld $31, %zmm0, %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000719; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000720 %1 = bitcast i16 %a0 to <16 x i1>
721 %2 = zext <16 x i1> %1 to <16 x i32>
722 ret <16 x i32> %2
723}
724
725define <32 x i16> @ext_i32_32i16(i32 %a0) {
726; SSE2-SSSE3-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000727; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000728; SSE2-SSSE3-NEXT: movd %edi, %xmm2
Simon Pilgrimc7015962017-12-29 14:41:50 +0000729; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
730; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000731; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000732; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000733; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000734; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
735; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
736; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
737; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
738; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
739; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000740; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
741; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000742; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000743; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000744; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
745; SSE2-SSSE3-NEXT: psrlw $15, %xmm2
746; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
747; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
748; SSE2-SSSE3-NEXT: psrlw $15, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000749; SSE2-SSSE3-NEXT: retq
750;
751; AVX1-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000752; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000753; AVX1-NEXT: vmovd %edi, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000754; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
755; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000756; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
757; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000758; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000759; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
760; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4
761; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
762; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
763; AVX1-NEXT: vpsrlw $15, %xmm4, %xmm4
764; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
765; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
766; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
767; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
768; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000769; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
770; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000771; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000772; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000773; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm2
774; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
775; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
776; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
777; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
778; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
779; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
780; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000781; AVX1-NEXT: retq
782;
783; AVX2-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000784; AVX2: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000785; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000786; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
787; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
788; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
789; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
790; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
791; AVX2-NEXT: shrl $16, %edi
792; AVX2-NEXT: vmovd %edi, %xmm2
793; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
794; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
795; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
796; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000797; AVX2-NEXT: retq
798;
Craig Topper5befc5b2017-11-28 01:36:31 +0000799; AVX512F-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000800; AVX512F: # %bb.0:
Craig Topper76adcc82018-01-23 14:25:39 +0000801; AVX512F-NEXT: kmovw %edi, %k1
802; AVX512F-NEXT: shrl $16, %edi
803; AVX512F-NEXT: kmovw %edi, %k2
Craig Topperc3aab4b2018-02-10 08:06:52 +0000804; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
Craig Topper76adcc82018-01-23 14:25:39 +0000805; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Craig Topperc3aab4b2018-02-10 08:06:52 +0000806; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
807; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
Craig Topper76adcc82018-01-23 14:25:39 +0000808; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000809; AVX512F-NEXT: vpsrlw $15, %ymm1, %ymm1
Craig Topper5befc5b2017-11-28 01:36:31 +0000810; AVX512F-NEXT: retq
811;
812; AVX512VLBW-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000813; AVX512VLBW: # %bb.0:
Craig Topperc3aab4b2018-02-10 08:06:52 +0000814; AVX512VLBW-NEXT: kmovd %edi, %k0
815; AVX512VLBW-NEXT: vpmovm2w %k0, %zmm0
816; AVX512VLBW-NEXT: vpsrlw $15, %zmm0, %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000817; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000818 %1 = bitcast i32 %a0 to <32 x i1>
819 %2 = zext <32 x i1> %1 to <32 x i16>
820 ret <32 x i16> %2
821}
822
823define <64 x i8> @ext_i64_64i8(i64 %a0) {
824; SSE2-SSSE3-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000825; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000826; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
827; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
828; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
829; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
830; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
831; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
832; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
833; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
834; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
835; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
836; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
837; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
838; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
839; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
840; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
841; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
842; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
843; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
844; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
845; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
846; SSE2-SSSE3-NEXT: psrlw $7, %xmm2
847; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2
848; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
849; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
850; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
851; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
852; SSE2-SSSE3-NEXT: psrlw $7, %xmm3
853; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000854; SSE2-SSSE3-NEXT: retq
855;
856; AVX1-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000857; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000858; AVX1-NEXT: vmovq %rdi, %xmm0
859; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
860; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
861; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
862; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
863; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
864; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
865; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
866; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
867; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
868; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
869; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
870; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
871; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
872; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
873; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
874; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
875; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
876; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
877; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
878; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
879; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
880; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
881; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
882; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
883; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
884; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
885; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
886; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
887; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
888; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
889; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm2
890; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
891; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
892; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
893; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
894; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
895; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000896; AVX1-NEXT: retq
897;
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000898; AVX2-SLOW-LABEL: ext_i64_64i8:
899; AVX2-SLOW: # %bb.0:
900; AVX2-SLOW-NEXT: vmovq %rdi, %xmm0
901; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
902; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
903; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
904; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
905; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
906; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
907; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
908; AVX2-SLOW-NEXT: vpand %ymm2, %ymm0, %ymm0
909; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
910; AVX2-SLOW-NEXT: vpsrlw $7, %ymm0, %ymm0
911; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
912; AVX2-SLOW-NEXT: vpand %ymm3, %ymm0, %ymm0
913; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm4 = xmm1[0,1,2,3,4,4,5,5]
914; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,2,3,3]
915; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
916; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
917; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm4, %ymm1
918; AVX2-SLOW-NEXT: vpand %ymm2, %ymm1, %ymm1
919; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
920; AVX2-SLOW-NEXT: vpsrlw $7, %ymm1, %ymm1
921; AVX2-SLOW-NEXT: vpand %ymm3, %ymm1, %ymm1
922; AVX2-SLOW-NEXT: retq
923;
924; AVX2-FAST-LABEL: ext_i64_64i8:
925; AVX2-FAST: # %bb.0:
926; AVX2-FAST-NEXT: vmovq %rdi, %xmm0
927; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
928; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
929; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm1[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
930; AVX2-FAST-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
931; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
932; AVX2-FAST-NEXT: vpand %ymm2, %ymm0, %ymm0
933; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
934; AVX2-FAST-NEXT: vpsrlw $7, %ymm0, %ymm0
935; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
936; AVX2-FAST-NEXT: vpand %ymm3, %ymm0, %ymm0
937; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,9,8,9,8,9,8,9,10,11,10,11,10,11,10,11]
938; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,12,13,12,13,14,15,14,15,14,15,14,15]
939; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm4, %ymm1
940; AVX2-FAST-NEXT: vpand %ymm2, %ymm1, %ymm1
941; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
942; AVX2-FAST-NEXT: vpsrlw $7, %ymm1, %ymm1
943; AVX2-FAST-NEXT: vpand %ymm3, %ymm1, %ymm1
944; AVX2-FAST-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000945;
Craig Topper5befc5b2017-11-28 01:36:31 +0000946; AVX512F-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000947; AVX512F: # %bb.0:
Craig Topper76adcc82018-01-23 14:25:39 +0000948; AVX512F-NEXT: movq %rdi, %rax
949; AVX512F-NEXT: movq %rdi, %rcx
950; AVX512F-NEXT: kmovw %edi, %k1
951; AVX512F-NEXT: movl %edi, %edx
952; AVX512F-NEXT: shrl $16, %edx
953; AVX512F-NEXT: shrq $32, %rax
954; AVX512F-NEXT: shrq $48, %rcx
955; AVX512F-NEXT: kmovw %ecx, %k2
956; AVX512F-NEXT: kmovw %eax, %k3
957; AVX512F-NEXT: kmovw %edx, %k4
Craig Topperddbc3402017-11-28 01:36:33 +0000958; AVX512F-NEXT: movl {{.*}}(%rip), %eax
Craig Topperddbc3402017-11-28 01:36:33 +0000959; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
960; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
Craig Topper76adcc82018-01-23 14:25:39 +0000961; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k4} {z}
Craig Topperddbc3402017-11-28 01:36:33 +0000962; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
963; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
Craig Topper76adcc82018-01-23 14:25:39 +0000964; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k3} {z}
Craig Topperddbc3402017-11-28 01:36:33 +0000965; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
966; AVX512F-NEXT: vpbroadcastd %eax, %zmm2 {%k2} {z}
967; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
968; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
Craig Topper5befc5b2017-11-28 01:36:31 +0000969; AVX512F-NEXT: retq
970;
971; AVX512VLBW-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000972; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000973; AVX512VLBW-NEXT: kmovq %rdi, %k1
974; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
975; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000976 %1 = bitcast i64 %a0 to <64 x i1>
977 %2 = zext <64 x i1> %1 to <64 x i8>
978 ret <64 x i8> %2
979}