blob: c022d7908a105cb101c29e3c3b7fca89dfbd0a0e [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
Zvi Rackover72b0bb12018-01-09 16:26:06 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX512
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00008
9;
10; 128-bit vectors
11;
12
13define <2 x i64> @ext_i2_2i64(i2 %a0) {
14; SSE2-SSSE3-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000015; SSE2-SSSE3: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000016; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000017; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
18; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
19; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
20; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
21; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
22; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
23; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000024; SSE2-SSSE3-NEXT: retq
25;
Simon Pilgrima705db92017-09-24 13:42:31 +000026; AVX1-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000027; AVX1: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000028; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000029; AVX1-NEXT: vmovq %rdi, %xmm0
30; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
31; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
32; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
34; AVX1-NEXT: retq
35;
36; AVX2-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000037; AVX2: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000038; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000039; AVX2-NEXT: vmovq %rdi, %xmm0
40; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
41; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
42; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
43; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
44; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000045;
46; AVX512-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000047; AVX512: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +000048; AVX512-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +000049; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
50; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000051; AVX512-NEXT: retq
52 %1 = bitcast i2 %a0 to <2 x i1>
53 %2 = sext <2 x i1> %1 to <2 x i64>
54 ret <2 x i64> %2
55}
56
57define <4 x i32> @ext_i4_4i32(i4 %a0) {
58; SSE2-SSSE3-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000059; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000060; SSE2-SSSE3-NEXT: movd %edi, %xmm0
61; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
62; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
63; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
64; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000065; SSE2-SSSE3-NEXT: retq
66;
Simon Pilgrima705db92017-09-24 13:42:31 +000067; AVX1-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000068; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000069; AVX1-NEXT: vmovd %edi, %xmm0
70; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
71; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
72; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
73; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
74; AVX1-NEXT: retq
75;
76; AVX2-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000077; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000078; AVX2-NEXT: vmovd %edi, %xmm0
79; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
80; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
81; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
82; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
83; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000084;
85; AVX512-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000086; AVX512: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +000087; AVX512-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +000088; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
89; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000090; AVX512-NEXT: retq
91 %1 = bitcast i4 %a0 to <4 x i1>
92 %2 = sext <4 x i1> %1 to <4 x i32>
93 ret <4 x i32> %2
94}
95
96define <8 x i16> @ext_i8_8i16(i8 %a0) {
97; SSE2-SSSE3-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000098; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000099; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000100; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
101; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000102; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
103; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
104; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000105; SSE2-SSSE3-NEXT: retq
106;
Simon Pilgrima705db92017-09-24 13:42:31 +0000107; AVX1-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000108; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000109; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000110; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
111; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000112; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
113; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
114; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
115; AVX1-NEXT: retq
116;
117; AVX2-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000118; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000119; AVX2-NEXT: vmovd %edi, %xmm0
120; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
121; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
122; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
123; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
124; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000125;
126; AVX512-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000127; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000128; AVX512-NEXT: kmovd %edi, %k0
129; AVX512-NEXT: vpmovm2w %k0, %xmm0
130; AVX512-NEXT: retq
131 %1 = bitcast i8 %a0 to <8 x i1>
132 %2 = sext <8 x i1> %1 to <8 x i16>
133 ret <8 x i16> %2
134}
135
136define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000137; SSE2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000138; SSE2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000139; SSE2-NEXT: movd %edi, %xmm0
140; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
141; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
142; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
143; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
144; SSE2-NEXT: pand %xmm1, %xmm0
145; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
146; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000147;
Simon Pilgrima705db92017-09-24 13:42:31 +0000148; SSSE3-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000149; SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000150; SSSE3-NEXT: movd %edi, %xmm0
151; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
152; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
153; SSSE3-NEXT: pand %xmm1, %xmm0
154; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
155; SSSE3-NEXT: retq
156;
157; AVX1-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000158; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000159; AVX1-NEXT: vmovd %edi, %xmm0
160; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
Craig Topper67177942018-10-15 01:51:53 +0000161; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [-1.7939930131212661E-307,-1.7939930131212661E-307]
162; AVX1-NEXT: # xmm1 = mem[0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000163; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
164; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
165; AVX1-NEXT: retq
166;
167; AVX2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000168; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000169; AVX2-NEXT: vmovd %edi, %xmm0
170; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
171; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
172; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
173; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
174; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000175;
176; AVX512-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000177; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000178; AVX512-NEXT: kmovd %edi, %k0
179; AVX512-NEXT: vpmovm2b %k0, %xmm0
180; AVX512-NEXT: retq
181 %1 = bitcast i16 %a0 to <16 x i1>
182 %2 = sext <16 x i1> %1 to <16 x i8>
183 ret <16 x i8> %2
184}
185
186;
187; 256-bit vectors
188;
189
190define <4 x i64> @ext_i4_4i64(i4 %a0) {
191; SSE2-SSSE3-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000192; SSE2-SSSE3: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000193; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000194; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
195; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
196; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
197; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
198; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
199; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
200; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
201; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
202; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
203; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
204; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
205; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
206; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000207; SSE2-SSSE3-NEXT: retq
208;
209; AVX1-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000210; AVX1: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000211; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000212; AVX1-NEXT: vmovq %rdi, %xmm0
213; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
214; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
215; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
216; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
217; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
218; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
219; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
220; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
221; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
222; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
223; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000224; AVX1-NEXT: retq
225;
226; AVX2-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000227; AVX2: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000228; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000229; AVX2-NEXT: vmovq %rdi, %xmm0
230; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
231; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
232; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
233; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000234; AVX2-NEXT: retq
235;
236; AVX512-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000237; AVX512: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000238; AVX512-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +0000239; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
240; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000241; AVX512-NEXT: retq
242 %1 = bitcast i4 %a0 to <4 x i1>
243 %2 = sext <4 x i1> %1 to <4 x i64>
244 ret <4 x i64> %2
245}
246
247define <8 x i32> @ext_i8_8i32(i8 %a0) {
248; SSE2-SSSE3-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000249; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000250; SSE2-SSSE3-NEXT: movd %edi, %xmm0
251; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
252; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000253; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000254; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
255; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
256; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
257; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
258; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000259; SSE2-SSSE3-NEXT: retq
260;
261; AVX1-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000262; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000263; AVX1-NEXT: vmovd %edi, %xmm0
264; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
265; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
266; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
267; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
268; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
269; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
270; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
271; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
272; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
273; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
274; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000275; AVX1-NEXT: retq
276;
277; AVX2-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000278; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000279; AVX2-NEXT: vmovd %edi, %xmm0
280; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
281; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
282; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
283; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000284; AVX2-NEXT: retq
285;
286; AVX512-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000287; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000288; AVX512-NEXT: kmovd %edi, %k1
289; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
290; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
291; AVX512-NEXT: retq
292 %1 = bitcast i8 %a0 to <8 x i1>
293 %2 = sext <8 x i1> %1 to <8 x i32>
294 ret <8 x i32> %2
295}
296
297define <16 x i16> @ext_i16_16i16(i16 %a0) {
298; SSE2-SSSE3-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000299; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000300; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000301; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
302; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000303; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000304; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000305; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
306; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
307; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
308; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
309; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000310; SSE2-SSSE3-NEXT: retq
311;
312; AVX1-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000313; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000314; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000315; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
316; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000317; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
318; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
319; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
320; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
321; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
322; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
323; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
324; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
325; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
326; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000327; AVX1-NEXT: retq
328;
329; AVX2-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000330; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000331; AVX2-NEXT: vmovd %edi, %xmm0
332; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
333; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
334; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
335; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000336; AVX2-NEXT: retq
337;
338; AVX512-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000339; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000340; AVX512-NEXT: kmovd %edi, %k0
341; AVX512-NEXT: vpmovm2w %k0, %ymm0
342; AVX512-NEXT: retq
343 %1 = bitcast i16 %a0 to <16 x i1>
344 %2 = sext <16 x i1> %1 to <16 x i16>
345 ret <16 x i16> %2
346}
347
348define <32 x i8> @ext_i32_32i8(i32 %a0) {
349; SSE2-SSSE3-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000350; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000351; SSE2-SSSE3-NEXT: movd %edi, %xmm1
352; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
353; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
354; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
355; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
356; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
357; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
358; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
359; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
360; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
361; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000362; SSE2-SSSE3-NEXT: retq
363;
364; AVX1-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000365; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000366; AVX1-NEXT: vmovd %edi, %xmm0
367; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
368; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
369; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
370; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
371; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000372; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000373; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
374; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
375; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
376; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
377; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
378; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
379; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
380; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
381; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000382; AVX1-NEXT: retq
383;
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000384; AVX2-SLOW-LABEL: ext_i32_32i8:
385; AVX2-SLOW: # %bb.0:
386; AVX2-SLOW-NEXT: vmovd %edi, %xmm0
387; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
388; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
389; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
390; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
391; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
392; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
393; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
394; AVX2-SLOW-NEXT: vpand %ymm1, %ymm0, %ymm0
395; AVX2-SLOW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
396; AVX2-SLOW-NEXT: retq
397;
398; AVX2-FAST-LABEL: ext_i32_32i8:
399; AVX2-FAST: # %bb.0:
400; AVX2-FAST-NEXT: vmovd %edi, %xmm0
401; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
402; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
403; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
404; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
405; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
406; AVX2-FAST-NEXT: vpand %ymm1, %ymm0, %ymm0
407; AVX2-FAST-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
408; AVX2-FAST-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000409;
410; AVX512-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000411; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000412; AVX512-NEXT: kmovd %edi, %k0
413; AVX512-NEXT: vpmovm2b %k0, %ymm0
414; AVX512-NEXT: retq
415 %1 = bitcast i32 %a0 to <32 x i1>
416 %2 = sext <32 x i1> %1 to <32 x i8>
417 ret <32 x i8> %2
418}
419
420;
421; 512-bit vectors
422;
423
424define <8 x i64> @ext_i8_8i64(i8 %a0) {
425; SSE2-SSSE3-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000426; SSE2-SSSE3: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000427; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000428; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
429; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
430; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
431; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
432; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
433; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
434; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
435; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
436; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
437; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
438; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
439; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
440; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
441; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
442; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
443; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
444; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
445; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
446; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
447; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
448; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
449; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
450; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
451; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
452; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000453; SSE2-SSSE3-NEXT: retq
454;
455; AVX1-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000456; AVX1: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000457; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000458; AVX1-NEXT: vmovq %rdi, %xmm0
459; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
460; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
461; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
462; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
463; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
464; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
465; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
466; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
467; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
468; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
469; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
470; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
471; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
472; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
473; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
474; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
475; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
476; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000477; AVX1-NEXT: retq
478;
479; AVX2-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000480; AVX2: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000481; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000482; AVX2-NEXT: vmovq %rdi, %xmm0
483; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
484; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
485; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
486; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
487; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
488; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
489; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000490; AVX2-NEXT: retq
491;
492; AVX512-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000493; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000494; AVX512-NEXT: kmovd %edi, %k1
495; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
496; AVX512-NEXT: retq
497 %1 = bitcast i8 %a0 to <8 x i1>
498 %2 = sext <8 x i1> %1 to <8 x i64>
499 ret <8 x i64> %2
500}
501
502define <16 x i32> @ext_i16_16i32(i16 %a0) {
503; SSE2-SSSE3-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000504; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000505; SSE2-SSSE3-NEXT: movd %edi, %xmm0
506; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
507; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
508; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
509; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
510; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
511; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000512; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000513; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
514; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
515; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000516; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000517; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
518; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
519; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
520; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
521; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000522; SSE2-SSSE3-NEXT: retq
523;
524; AVX1-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000525; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000526; AVX1-NEXT: vmovd %edi, %xmm0
527; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
528; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
529; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
530; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
531; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
532; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
533; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
534; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
535; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
536; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
537; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
538; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
539; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
540; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
541; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
542; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
543; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
544; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000545; AVX1-NEXT: retq
546;
547; AVX2-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000548; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000549; AVX2-NEXT: vmovd %edi, %xmm0
550; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
551; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
552; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
553; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
554; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
555; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
556; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000557; AVX2-NEXT: retq
558;
559; AVX512-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000560; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000561; AVX512-NEXT: kmovd %edi, %k1
562; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
563; AVX512-NEXT: retq
564 %1 = bitcast i16 %a0 to <16 x i1>
565 %2 = sext <16 x i1> %1 to <16 x i32>
566 ret <16 x i32> %2
567}
568
569define <32 x i16> @ext_i32_32i16(i32 %a0) {
570; SSE2-SSSE3-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000571; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000572; SSE2-SSSE3-NEXT: movd %edi, %xmm2
Simon Pilgrimc7015962017-12-29 14:41:50 +0000573; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
574; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000575; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000576; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000577; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
578; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
579; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
580; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
581; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000582; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
583; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000584; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000585; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
586; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
587; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
588; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000589; SSE2-SSSE3-NEXT: retq
590;
591; AVX1-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000592; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000593; AVX1-NEXT: vmovd %edi, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000594; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
595; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000596; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
597; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
598; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
599; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
600; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
601; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
602; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
603; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
604; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
605; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
606; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000607; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
608; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000609; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
610; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
611; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
612; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
613; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
614; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
615; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
616; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000617; AVX1-NEXT: retq
618;
619; AVX2-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000620; AVX2: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000621; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000622; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
623; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
624; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
625; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
626; AVX2-NEXT: shrl $16, %edi
627; AVX2-NEXT: vmovd %edi, %xmm2
628; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
629; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
630; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000631; AVX2-NEXT: retq
632;
633; AVX512-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000634; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000635; AVX512-NEXT: kmovd %edi, %k0
636; AVX512-NEXT: vpmovm2w %k0, %zmm0
637; AVX512-NEXT: retq
638 %1 = bitcast i32 %a0 to <32 x i1>
639 %2 = sext <32 x i1> %1 to <32 x i16>
640 ret <32 x i16> %2
641}
642
643define <64 x i8> @ext_i64_64i8(i64 %a0) {
644; SSE2-SSSE3-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000645; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000646; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
647; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
648; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
649; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
650; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
651; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
652; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
653; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
654; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
655; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
656; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
657; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
658; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
659; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
660; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
661; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
662; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
663; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
664; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000665; SSE2-SSSE3-NEXT: retq
666;
667; AVX1-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000668; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000669; AVX1-NEXT: vmovq %rdi, %xmm0
670; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
671; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
672; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
673; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
674; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
675; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
676; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
677; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
678; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
679; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
680; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
681; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
682; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
683; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
684; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
685; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
686; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
687; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
688; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
689; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
690; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
691; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
692; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
693; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
694; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
695; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
696; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000697; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000698; AVX1-NEXT: retq
699;
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000700; AVX2-SLOW-LABEL: ext_i64_64i8:
701; AVX2-SLOW: # %bb.0:
702; AVX2-SLOW-NEXT: vmovq %rdi, %xmm0
703; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
704; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
705; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
706; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
707; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
708; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
709; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
710; AVX2-SLOW-NEXT: vpand %ymm2, %ymm0, %ymm0
711; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
712; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
713; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
714; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
715; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
716; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
717; AVX2-SLOW-NEXT: vpand %ymm2, %ymm1, %ymm1
718; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
719; AVX2-SLOW-NEXT: retq
720;
721; AVX2-FAST-LABEL: ext_i64_64i8:
722; AVX2-FAST: # %bb.0:
723; AVX2-FAST-NEXT: vmovq %rdi, %xmm0
724; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
725; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
726; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm1[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
727; AVX2-FAST-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
728; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
729; AVX2-FAST-NEXT: vpand %ymm2, %ymm0, %ymm0
730; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
731; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[8,9,8,9,8,9,8,9,10,11,10,11,10,11,10,11]
732; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,12,13,12,13,14,15,14,15,14,15,14,15]
733; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
734; AVX2-FAST-NEXT: vpand %ymm2, %ymm1, %ymm1
735; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
736; AVX2-FAST-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000737;
738; AVX512-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000739; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000740; AVX512-NEXT: kmovq %rdi, %k0
741; AVX512-NEXT: vpmovm2b %k0, %zmm0
742; AVX512-NEXT: retq
743 %1 = bitcast i64 %a0 to <64 x i1>
744 %2 = sext <64 x i1> %1 to <64 x i8>
745 ret <64 x i8> %2
746}