blob: 139fabd25c9542bfeef205440be94f5dbf402f1e [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
Zvi Rackover72b0bb12018-01-09 16:26:06 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST
Craig Topper5befc5b2017-11-28 01:36:31 +00007; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F
Zvi Rackover72b0bb12018-01-09 16:26:06 +00008; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VLBW
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00009
10;
11; 128-bit vectors
12;
13
14define <2 x i64> @ext_i2_2i64(i2 %a0) {
15; SSE2-SSSE3-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000016; SSE2-SSSE3: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000017; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000018; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
19; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
20; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
21; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
22; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
23; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
24; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
25; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000026; SSE2-SSSE3-NEXT: retq
27;
Simon Pilgrima705db92017-09-24 13:42:31 +000028; AVX1-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000029; AVX1: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000030; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000031; AVX1-NEXT: vmovq %rdi, %xmm0
32; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
33; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
34; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
35; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
36; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
37; AVX1-NEXT: retq
38;
39; AVX2-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000040; AVX2: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000041; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000042; AVX2-NEXT: vmovq %rdi, %xmm0
43; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
44; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
45; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
46; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
47; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0
48; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000049;
Craig Topper5befc5b2017-11-28 01:36:31 +000050; AVX512F-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000051; AVX512F: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +000052; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +000053; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
54; AVX512F-NEXT: vpsrlq $63, %xmm0, %xmm0
Craig Topper5befc5b2017-11-28 01:36:31 +000055; AVX512F-NEXT: vzeroupper
56; AVX512F-NEXT: retq
57;
58; AVX512VLBW-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000059; AVX512VLBW: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +000060; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +000061; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
62; AVX512VLBW-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
63; AVX512VLBW-NEXT: vpsrlq $63, %xmm0, %xmm0
Craig Topper5befc5b2017-11-28 01:36:31 +000064; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000065 %1 = bitcast i2 %a0 to <2 x i1>
66 %2 = zext <2 x i1> %1 to <2 x i64>
67 ret <2 x i64> %2
68}
69
70define <4 x i32> @ext_i4_4i32(i4 %a0) {
71; SSE2-SSSE3-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000072; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000073; SSE2-SSSE3-NEXT: movd %edi, %xmm0
74; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
75; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
76; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
77; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
78; SSE2-SSSE3-NEXT: psrld $31, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000079; SSE2-SSSE3-NEXT: retq
80;
81; AVX1-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000082; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000083; AVX1-NEXT: vmovd %edi, %xmm0
84; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
85; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
86; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
87; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
88; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000089; AVX1-NEXT: retq
90;
91; AVX2-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000092; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000093; AVX2-NEXT: vmovd %edi, %xmm0
94; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
95; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000096; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +000097; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
98; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000099; AVX2-NEXT: retq
100;
Craig Topper5befc5b2017-11-28 01:36:31 +0000101; AVX512F-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000102; AVX512F: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000103; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000104; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
105; AVX512F-NEXT: vpsrld $31, %xmm0, %xmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000106; AVX512F-NEXT: vzeroupper
107; AVX512F-NEXT: retq
108;
109; AVX512VLBW-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000110; AVX512VLBW: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000111; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000112; AVX512VLBW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
113; AVX512VLBW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
114; AVX512VLBW-NEXT: vpsrld $31, %xmm0, %xmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000115; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000116 %1 = bitcast i4 %a0 to <4 x i1>
117 %2 = zext <4 x i1> %1 to <4 x i32>
118 ret <4 x i32> %2
119}
120
121define <8 x i16> @ext_i8_8i16(i8 %a0) {
122; SSE2-SSSE3-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000123; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000124; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000125; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
126; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000127; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
128; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
129; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
130; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000131; SSE2-SSSE3-NEXT: retq
132;
Simon Pilgrima705db92017-09-24 13:42:31 +0000133; AVX1-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000134; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000135; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000136; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
137; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000138; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
139; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
140; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
141; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
142; AVX1-NEXT: retq
143;
144; AVX2-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000145; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000146; AVX2-NEXT: vmovd %edi, %xmm0
147; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
148; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
149; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
150; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
151; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0
152; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000153;
Craig Topper5befc5b2017-11-28 01:36:31 +0000154; AVX512F-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000155; AVX512F: # %bb.0:
Craig Topperddbc3402017-11-28 01:36:33 +0000156; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000157; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
Craig Topper276c7702017-12-05 01:45:46 +0000158; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Craig Topperc3aab4b2018-02-10 08:06:52 +0000159; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm0
Craig Topperddbc3402017-11-28 01:36:33 +0000160; AVX512F-NEXT: vzeroupper
Craig Topper5befc5b2017-11-28 01:36:31 +0000161; AVX512F-NEXT: retq
162;
163; AVX512VLBW-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000164; AVX512VLBW: # %bb.0:
Craig Topperc3aab4b2018-02-10 08:06:52 +0000165; AVX512VLBW-NEXT: kmovd %edi, %k0
166; AVX512VLBW-NEXT: vpmovm2w %k0, %xmm0
167; AVX512VLBW-NEXT: vpsrlw $15, %xmm0, %xmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000168; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000169 %1 = bitcast i8 %a0 to <8 x i1>
170 %2 = zext <8 x i1> %1 to <8 x i16>
171 ret <8 x i16> %2
172}
173
174define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000175; SSE2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000176; SSE2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000177; SSE2-NEXT: movd %edi, %xmm0
178; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
179; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
180; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
181; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
182; SSE2-NEXT: pand %xmm1, %xmm0
183; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
184; SSE2-NEXT: psrlw $7, %xmm0
185; SSE2-NEXT: pand {{.*}}(%rip), %xmm0
186; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000187;
Simon Pilgrima705db92017-09-24 13:42:31 +0000188; SSSE3-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000189; SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000190; SSSE3-NEXT: movd %edi, %xmm0
191; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
192; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
193; SSSE3-NEXT: pand %xmm1, %xmm0
194; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
195; SSSE3-NEXT: psrlw $7, %xmm0
196; SSSE3-NEXT: pand {{.*}}(%rip), %xmm0
197; SSSE3-NEXT: retq
198;
199; AVX1-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000200; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000201; AVX1-NEXT: vmovd %edi, %xmm0
202; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
Craig Topper67177942018-10-15 01:51:53 +0000203; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [-1.7939930131212661E-307,-1.7939930131212661E-307]
204; AVX1-NEXT: # xmm1 = mem[0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000205; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
206; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
207; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
208; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
209; AVX1-NEXT: retq
210;
211; AVX2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000212; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000213; AVX2-NEXT: vmovd %edi, %xmm0
214; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
215; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
216; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
217; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
218; AVX2-NEXT: vpsrlw $7, %xmm0, %xmm0
219; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
220; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000221;
Craig Topper5befc5b2017-11-28 01:36:31 +0000222; AVX512F-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000223; AVX512F: # %bb.0:
Craig Topperddbc3402017-11-28 01:36:33 +0000224; AVX512F-NEXT: kmovw %edi, %k1
225; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
226; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
227; AVX512F-NEXT: vzeroupper
Craig Topper5befc5b2017-11-28 01:36:31 +0000228; AVX512F-NEXT: retq
229;
230; AVX512VLBW-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000231; AVX512VLBW: # %bb.0:
Craig Topperddbc3402017-11-28 01:36:33 +0000232; AVX512VLBW-NEXT: kmovd %edi, %k1
233; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %xmm0 {%k1} {z}
Craig Topper5befc5b2017-11-28 01:36:31 +0000234; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000235 %1 = bitcast i16 %a0 to <16 x i1>
236 %2 = zext <16 x i1> %1 to <16 x i8>
237 ret <16 x i8> %2
238}
239
240;
241; 256-bit vectors
242;
243
244define <4 x i64> @ext_i4_4i64(i4 %a0) {
245; SSE2-SSSE3-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000246; SSE2-SSSE3: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000247; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000248; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
249; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
250; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
251; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
252; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
253; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
254; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
255; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
256; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
257; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
258; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
259; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
260; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
261; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
262; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000263; SSE2-SSSE3-NEXT: retq
264;
265; AVX1-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000266; AVX1: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000267; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000268; AVX1-NEXT: vmovq %rdi, %xmm0
269; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
270; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
271; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
272; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
273; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm2
274; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
275; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
276; AVX1-NEXT: vpsrlq $63, %xmm2, %xmm2
277; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
278; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
279; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
280; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
281; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000282; AVX1-NEXT: retq
283;
284; AVX2-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000285; AVX2: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000286; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000287; AVX2-NEXT: vmovq %rdi, %xmm0
288; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
289; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
290; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
291; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
292; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000293; AVX2-NEXT: retq
294;
Craig Topper5befc5b2017-11-28 01:36:31 +0000295; AVX512F-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000296; AVX512F: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000297; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000298; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
299; AVX512F-NEXT: vpsrlq $63, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000300; AVX512F-NEXT: retq
301;
302; AVX512VLBW-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000303; AVX512VLBW: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000304; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000305; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
306; AVX512VLBW-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
307; AVX512VLBW-NEXT: vpsrlq $63, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000308; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000309 %1 = bitcast i4 %a0 to <4 x i1>
310 %2 = zext <4 x i1> %1 to <4 x i64>
311 ret <4 x i64> %2
312}
313
314define <8 x i32> @ext_i8_8i32(i8 %a0) {
315; SSE2-SSSE3-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000316; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000317; SSE2-SSSE3-NEXT: movd %edi, %xmm0
318; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
319; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000320; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000321; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000322; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
323; SSE2-SSSE3-NEXT: psrld $31, %xmm0
324; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000325; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000326; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
327; SSE2-SSSE3-NEXT: psrld $31, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000328; SSE2-SSSE3-NEXT: retq
329;
330; AVX1-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000331; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000332; AVX1-NEXT: vmovd %edi, %xmm0
333; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
334; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000335; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000336; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
337; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm2
338; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
339; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
340; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
341; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
342; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
343; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
344; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
345; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000346; AVX1-NEXT: retq
347;
348; AVX2-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000349; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000350; AVX2-NEXT: vmovd %edi, %xmm0
351; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
352; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000353; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000354; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
355; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000356; AVX2-NEXT: retq
357;
Craig Topper5befc5b2017-11-28 01:36:31 +0000358; AVX512F-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000359; AVX512F: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000360; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000361; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
362; AVX512F-NEXT: vpsrld $31, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000363; AVX512F-NEXT: retq
364;
365; AVX512VLBW-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000366; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000367; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000368; AVX512VLBW-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
369; AVX512VLBW-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
370; AVX512VLBW-NEXT: vpsrld $31, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000371; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000372 %1 = bitcast i8 %a0 to <8 x i1>
373 %2 = zext <8 x i1> %1 to <8 x i32>
374 ret <8 x i32> %2
375}
376
377define <16 x i16> @ext_i16_16i16(i16 %a0) {
378; SSE2-SSSE3-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000379; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000380; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000381; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
382; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000383; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000384; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000385; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000386; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
387; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
388; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000389; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000390; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
391; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000392; SSE2-SSSE3-NEXT: retq
393;
394; AVX1-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000395; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000396; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000397; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
398; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000399; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
400; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
401; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
402; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2
403; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
404; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm2
405; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
406; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
407; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
408; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
409; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
410; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000411; AVX1-NEXT: retq
412;
413; AVX2-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000414; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000415; AVX2-NEXT: vmovd %edi, %xmm0
416; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
417; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
418; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
419; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
420; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000421; AVX2-NEXT: retq
422;
Craig Topper5befc5b2017-11-28 01:36:31 +0000423; AVX512F-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000424; AVX512F: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000425; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000426; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
Craig Topper5befc5b2017-11-28 01:36:31 +0000427; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Craig Topperc3aab4b2018-02-10 08:06:52 +0000428; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000429; AVX512F-NEXT: retq
430;
431; AVX512VLBW-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000432; AVX512VLBW: # %bb.0:
Craig Topperc3aab4b2018-02-10 08:06:52 +0000433; AVX512VLBW-NEXT: kmovd %edi, %k0
434; AVX512VLBW-NEXT: vpmovm2w %k0, %ymm0
435; AVX512VLBW-NEXT: vpsrlw $15, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000436; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000437 %1 = bitcast i16 %a0 to <16 x i1>
438 %2 = zext <16 x i1> %1 to <16 x i16>
439 ret <16 x i16> %2
440}
441
442define <32 x i8> @ext_i32_32i8(i32 %a0) {
443; SSE2-SSSE3-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000444; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000445; SSE2-SSSE3-NEXT: movd %edi, %xmm1
446; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
447; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
448; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
449; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
450; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
451; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
452; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
453; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
454; SSE2-SSSE3-NEXT: pand %xmm3, %xmm0
455; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
456; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
457; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
458; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
459; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
460; SSE2-SSSE3-NEXT: pand %xmm3, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000461; SSE2-SSSE3-NEXT: retq
462;
463; AVX1-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000464; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000465; AVX1-NEXT: vmovd %edi, %xmm0
466; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
467; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
468; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
469; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
470; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000471; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000472; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
473; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
474; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
475; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
476; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
477; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
478; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
479; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
480; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1
481; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
482; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
483; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
484; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0
485; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000486; AVX1-NEXT: retq
487;
Simon Pilgrimeb806d52018-10-21 17:07:50 +0000488; AVX2-LABEL: ext_i32_32i8:
489; AVX2: # %bb.0:
490; AVX2-NEXT: vmovd %edi, %xmm0
491; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
492; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
493; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
494; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
495; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
496; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
497; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
498; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000499;
Craig Topper5befc5b2017-11-28 01:36:31 +0000500; AVX512F-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000501; AVX512F: # %bb.0:
Craig Topper76adcc82018-01-23 14:25:39 +0000502; AVX512F-NEXT: kmovw %edi, %k1
503; AVX512F-NEXT: shrl $16, %edi
504; AVX512F-NEXT: kmovw %edi, %k2
Craig Topperddbc3402017-11-28 01:36:33 +0000505; AVX512F-NEXT: movl {{.*}}(%rip), %eax
Craig Topperddbc3402017-11-28 01:36:33 +0000506; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
507; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
508; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z}
509; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
510; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000511; AVX512F-NEXT: retq
512;
513; AVX512VLBW-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000514; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000515; AVX512VLBW-NEXT: kmovd %edi, %k1
516; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z}
517; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000518 %1 = bitcast i32 %a0 to <32 x i1>
519 %2 = zext <32 x i1> %1 to <32 x i8>
520 ret <32 x i8> %2
521}
522
523;
524; 512-bit vectors
525;
526
527define <8 x i64> @ext_i8_8i64(i8 %a0) {
528; SSE2-SSSE3-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000529; SSE2-SSSE3: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000530; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000531; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
532; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
533; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
534; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
535; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
536; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
537; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
538; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
539; SSE2-SSSE3-NEXT: psrlq $63, %xmm0
540; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
541; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
542; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
543; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
544; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
545; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
546; SSE2-SSSE3-NEXT: psrlq $63, %xmm1
547; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
548; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
549; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
550; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
551; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
552; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
553; SSE2-SSSE3-NEXT: psrlq $63, %xmm2
554; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
555; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
556; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
557; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000558; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000559; SSE2-SSSE3-NEXT: psrlq $63, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000560; SSE2-SSSE3-NEXT: retq
561;
562; AVX1-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000563; AVX1: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000564; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000565; AVX1-NEXT: vmovq %rdi, %xmm0
566; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
567; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
568; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
569; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
570; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm3
571; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
572; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
573; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
574; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
575; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
576; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
577; AVX1-NEXT: vpsrlq $63, %xmm0, %xmm0
578; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
579; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
580; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm3
581; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
582; AVX1-NEXT: vpsrlq $63, %xmm3, %xmm3
583; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
584; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
585; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
586; AVX1-NEXT: vpsrlq $63, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000587; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000588; AVX1-NEXT: retq
589;
590; AVX2-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000591; AVX2: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000592; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000593; AVX2-NEXT: vmovq %rdi, %xmm0
594; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
595; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
596; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
597; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
598; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0
599; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000600; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000601; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
602; AVX2-NEXT: vpsrlq $63, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000603; AVX2-NEXT: retq
604;
Craig Topper5befc5b2017-11-28 01:36:31 +0000605; AVX512F-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000606; AVX512F: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000607; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000608; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
609; AVX512F-NEXT: vpsrlq $63, %zmm0, %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000610; AVX512F-NEXT: retq
611;
612; AVX512VLBW-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000613; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000614; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000615; AVX512VLBW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
616; AVX512VLBW-NEXT: vpsrlq $63, %zmm0, %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000617; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000618 %1 = bitcast i8 %a0 to <8 x i1>
619 %2 = zext <8 x i1> %1 to <8 x i64>
620 ret <8 x i64> %2
621}
622
623define <16 x i32> @ext_i16_16i32(i16 %a0) {
624; SSE2-SSSE3-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000625; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000626; SSE2-SSSE3-NEXT: movd %edi, %xmm0
627; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
628; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
629; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
630; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
631; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
632; SSE2-SSSE3-NEXT: psrld $31, %xmm0
633; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000634; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000635; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
636; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
637; SSE2-SSSE3-NEXT: psrld $31, %xmm1
638; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000639; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000640; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000641; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
642; SSE2-SSSE3-NEXT: psrld $31, %xmm2
643; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000644; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima705db92017-09-24 13:42:31 +0000645; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
646; SSE2-SSSE3-NEXT: psrld $31, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000647; SSE2-SSSE3-NEXT: retq
648;
649; AVX1-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000650; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000651; AVX1-NEXT: vmovd %edi, %xmm0
652; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
653; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
654; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
655; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
656; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm3
657; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
658; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
659; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
660; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
661; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
662; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
663; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
664; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
665; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
666; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm3
667; AVX1-NEXT: vpxor %xmm4, %xmm3, %xmm3
668; AVX1-NEXT: vpsrld $31, %xmm3, %xmm3
669; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
670; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
671; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
672; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
673; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000674; AVX1-NEXT: retq
675;
676; AVX2-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000677; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000678; AVX2-NEXT: vmovd %edi, %xmm0
679; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
680; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
681; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
682; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
683; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0
684; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000685; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000686; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
687; AVX2-NEXT: vpsrld $31, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000688; AVX2-NEXT: retq
689;
Craig Topper5befc5b2017-11-28 01:36:31 +0000690; AVX512F-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000691; AVX512F: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000692; AVX512F-NEXT: kmovw %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000693; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
694; AVX512F-NEXT: vpsrld $31, %zmm0, %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000695; AVX512F-NEXT: retq
696;
697; AVX512VLBW-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000698; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000699; AVX512VLBW-NEXT: kmovd %edi, %k1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000700; AVX512VLBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
701; AVX512VLBW-NEXT: vpsrld $31, %zmm0, %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000702; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000703 %1 = bitcast i16 %a0 to <16 x i1>
704 %2 = zext <16 x i1> %1 to <16 x i32>
705 ret <16 x i32> %2
706}
707
708define <32 x i16> @ext_i32_32i16(i32 %a0) {
709; SSE2-SSSE3-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000710; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000711; SSE2-SSSE3-NEXT: movd %edi, %xmm2
Simon Pilgrimc7015962017-12-29 14:41:50 +0000712; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
713; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000714; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000715; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000716; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000717; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
718; SSE2-SSSE3-NEXT: psrlw $15, %xmm0
719; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
720; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
721; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
722; SSE2-SSSE3-NEXT: psrlw $15, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000723; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
724; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000725; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000726; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000727; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
728; SSE2-SSSE3-NEXT: psrlw $15, %xmm2
729; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
730; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
731; SSE2-SSSE3-NEXT: psrlw $15, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000732; SSE2-SSSE3-NEXT: retq
733;
734; AVX1-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000735; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000736; AVX1-NEXT: vmovd %edi, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000737; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
738; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000739; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
740; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000741; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000742; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
743; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm4
744; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
745; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
746; AVX1-NEXT: vpsrlw $15, %xmm4, %xmm4
747; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
748; AVX1-NEXT: vpcmpeqw %xmm3, %xmm0, %xmm0
749; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
750; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0
751; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000752; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
753; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000754; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000755; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000756; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm2
757; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
758; AVX1-NEXT: vpsrlw $15, %xmm2, %xmm2
759; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
760; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1
761; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
762; AVX1-NEXT: vpsrlw $15, %xmm1, %xmm1
763; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000764; AVX1-NEXT: retq
765;
766; AVX2-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000767; AVX2: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000768; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000769; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
770; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
771; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
772; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
773; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0
774; AVX2-NEXT: shrl $16, %edi
775; AVX2-NEXT: vmovd %edi, %xmm2
776; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
777; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
778; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
779; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000780; AVX2-NEXT: retq
781;
Craig Topper5befc5b2017-11-28 01:36:31 +0000782; AVX512F-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000783; AVX512F: # %bb.0:
Craig Topper76adcc82018-01-23 14:25:39 +0000784; AVX512F-NEXT: kmovw %edi, %k1
785; AVX512F-NEXT: shrl $16, %edi
786; AVX512F-NEXT: kmovw %edi, %k2
Craig Topperc3aab4b2018-02-10 08:06:52 +0000787; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
Craig Topper76adcc82018-01-23 14:25:39 +0000788; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
Craig Topperc3aab4b2018-02-10 08:06:52 +0000789; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
790; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
Craig Topper76adcc82018-01-23 14:25:39 +0000791; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
Craig Topperc3aab4b2018-02-10 08:06:52 +0000792; AVX512F-NEXT: vpsrlw $15, %ymm1, %ymm1
Craig Topper5befc5b2017-11-28 01:36:31 +0000793; AVX512F-NEXT: retq
794;
795; AVX512VLBW-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000796; AVX512VLBW: # %bb.0:
Craig Topperc3aab4b2018-02-10 08:06:52 +0000797; AVX512VLBW-NEXT: kmovd %edi, %k0
798; AVX512VLBW-NEXT: vpmovm2w %k0, %zmm0
799; AVX512VLBW-NEXT: vpsrlw $15, %zmm0, %zmm0
Craig Topper5befc5b2017-11-28 01:36:31 +0000800; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000801 %1 = bitcast i32 %a0 to <32 x i1>
802 %2 = zext <32 x i1> %1 to <32 x i16>
803 ret <32 x i16> %2
804}
805
806define <64 x i8> @ext_i64_64i8(i64 %a0) {
807; SSE2-SSSE3-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000808; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000809; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
810; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
811; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
812; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
813; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
814; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
815; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
816; SSE2-SSSE3-NEXT: psrlw $7, %xmm0
817; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
818; SSE2-SSSE3-NEXT: pand %xmm5, %xmm0
819; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
820; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
821; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
822; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
823; SSE2-SSSE3-NEXT: psrlw $7, %xmm1
824; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
825; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
826; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
827; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
828; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
829; SSE2-SSSE3-NEXT: psrlw $7, %xmm2
830; SSE2-SSSE3-NEXT: pand %xmm5, %xmm2
831; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
832; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
833; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
834; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
835; SSE2-SSSE3-NEXT: psrlw $7, %xmm3
836; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000837; SSE2-SSSE3-NEXT: retq
838;
839; AVX1-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000840; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000841; AVX1-NEXT: vmovq %rdi, %xmm0
842; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
843; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
844; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
845; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
846; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
847; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
848; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
849; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
850; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
851; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
852; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
853; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
854; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
855; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3
856; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
857; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3
858; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
859; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
860; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0
861; AVX1-NEXT: vpand %xmm6, %xmm0, %xmm0
862; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
863; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
864; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
865; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
866; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
867; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
868; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
869; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
870; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
871; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
872; AVX1-NEXT: vpsrlw $7, %xmm2, %xmm2
873; AVX1-NEXT: vpand %xmm6, %xmm2, %xmm2
874; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
875; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
876; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1
877; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1
878; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000879; AVX1-NEXT: retq
880;
Simon Pilgrimeb806d52018-10-21 17:07:50 +0000881; AVX2-LABEL: ext_i64_64i8:
882; AVX2: # %bb.0:
883; AVX2-NEXT: vmovq %rdi, %xmm0
884; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
885; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
886; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
887; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
888; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
889; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0
890; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
891; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
892; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
893; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
894; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
895; AVX2-NEXT: vpsrlw $7, %ymm1, %ymm1
896; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
897; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000898;
Craig Topper5befc5b2017-11-28 01:36:31 +0000899; AVX512F-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000900; AVX512F: # %bb.0:
Craig Topper76adcc82018-01-23 14:25:39 +0000901; AVX512F-NEXT: movq %rdi, %rax
902; AVX512F-NEXT: movq %rdi, %rcx
903; AVX512F-NEXT: kmovw %edi, %k1
904; AVX512F-NEXT: movl %edi, %edx
905; AVX512F-NEXT: shrl $16, %edx
906; AVX512F-NEXT: shrq $32, %rax
907; AVX512F-NEXT: shrq $48, %rcx
908; AVX512F-NEXT: kmovw %ecx, %k2
909; AVX512F-NEXT: kmovw %eax, %k3
910; AVX512F-NEXT: kmovw %edx, %k4
Craig Topperddbc3402017-11-28 01:36:33 +0000911; AVX512F-NEXT: movl {{.*}}(%rip), %eax
Craig Topperddbc3402017-11-28 01:36:33 +0000912; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
913; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
Craig Topper76adcc82018-01-23 14:25:39 +0000914; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k4} {z}
Craig Topperddbc3402017-11-28 01:36:33 +0000915; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
916; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
Craig Topper76adcc82018-01-23 14:25:39 +0000917; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k3} {z}
Craig Topperddbc3402017-11-28 01:36:33 +0000918; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
919; AVX512F-NEXT: vpbroadcastd %eax, %zmm2 {%k2} {z}
920; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
921; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
Craig Topper5befc5b2017-11-28 01:36:31 +0000922; AVX512F-NEXT: retq
923;
924; AVX512VLBW-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000925; AVX512VLBW: # %bb.0:
Craig Topper5befc5b2017-11-28 01:36:31 +0000926; AVX512VLBW-NEXT: kmovq %rdi, %k1
927; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z}
928; AVX512VLBW-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000929 %1 = bitcast i64 %a0 to <64 x i1>
930 %2 = zext <64 x i1> %1 to <64 x i8>
931 ret <64 x i8> %2
932}