blob: b1a63ffedf3fc7ba4e5cd2c78e4244adf98fcf10 [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
Zvi Rackover72b0bb12018-01-09 16:26:06 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX512
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00008
9;
10; 128-bit vectors
11;
12
13define <2 x i64> @ext_i2_2i64(i2 %a0) {
14; SSE2-SSSE3-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000015; SSE2-SSSE3: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000016; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000017; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
18; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
19; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
20; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
21; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
22; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
23; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000024; SSE2-SSSE3-NEXT: retq
25;
Simon Pilgrima705db92017-09-24 13:42:31 +000026; AVX1-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000027; AVX1: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000028; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000029; AVX1-NEXT: vmovq %rdi, %xmm0
30; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
31; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
32; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
34; AVX1-NEXT: retq
35;
36; AVX2-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000037; AVX2: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +000038; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000039; AVX2-NEXT: vmovq %rdi, %xmm0
40; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
41; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
42; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
43; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
44; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000045;
46; AVX512-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000047; AVX512: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +000048; AVX512-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +000049; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
50; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000051; AVX512-NEXT: retq
52 %1 = bitcast i2 %a0 to <2 x i1>
53 %2 = sext <2 x i1> %1 to <2 x i64>
54 ret <2 x i64> %2
55}
56
57define <4 x i32> @ext_i4_4i32(i4 %a0) {
58; SSE2-SSSE3-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000059; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000060; SSE2-SSSE3-NEXT: movd %edi, %xmm0
61; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
62; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
63; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
64; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000065; SSE2-SSSE3-NEXT: retq
66;
Simon Pilgrima705db92017-09-24 13:42:31 +000067; AVX1-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000068; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000069; AVX1-NEXT: vmovd %edi, %xmm0
70; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
71; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
72; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
73; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
74; AVX1-NEXT: retq
75;
76; AVX2-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000077; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000078; AVX2-NEXT: vmovd %edi, %xmm0
79; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
80; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
81; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
82; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
83; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000084;
85; AVX512-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000086; AVX512: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +000087; AVX512-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +000088; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
89; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000090; AVX512-NEXT: retq
91 %1 = bitcast i4 %a0 to <4 x i1>
92 %2 = sext <4 x i1> %1 to <4 x i32>
93 ret <4 x i32> %2
94}
95
96define <8 x i16> @ext_i8_8i16(i8 %a0) {
97; SSE2-SSSE3-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000098; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000099; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000100; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
101; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000102; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
103; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
104; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000105; SSE2-SSSE3-NEXT: retq
106;
Simon Pilgrima705db92017-09-24 13:42:31 +0000107; AVX1-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000108; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000109; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000110; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
111; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000112; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
113; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
114; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
115; AVX1-NEXT: retq
116;
117; AVX2-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000118; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000119; AVX2-NEXT: vmovd %edi, %xmm0
120; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
121; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
122; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
123; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
124; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000125;
126; AVX512-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000127; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000128; AVX512-NEXT: kmovd %edi, %k0
129; AVX512-NEXT: vpmovm2w %k0, %xmm0
130; AVX512-NEXT: retq
131 %1 = bitcast i8 %a0 to <8 x i1>
132 %2 = sext <8 x i1> %1 to <8 x i16>
133 ret <8 x i16> %2
134}
135
136define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000137; SSE2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000138; SSE2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000139; SSE2-NEXT: movd %edi, %xmm0
140; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
141; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
142; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
143; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
144; SSE2-NEXT: pand %xmm1, %xmm0
145; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
146; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000147;
Simon Pilgrima705db92017-09-24 13:42:31 +0000148; SSSE3-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000149; SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000150; SSSE3-NEXT: movd %edi, %xmm0
151; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
152; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
153; SSSE3-NEXT: pand %xmm1, %xmm0
154; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
155; SSSE3-NEXT: retq
156;
157; AVX1-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000158; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000159; AVX1-NEXT: vmovd %edi, %xmm0
160; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
Craig Topper67177942018-10-15 01:51:53 +0000161; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [-1.7939930131212661E-307,-1.7939930131212661E-307]
162; AVX1-NEXT: # xmm1 = mem[0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000163; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
164; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
165; AVX1-NEXT: retq
166;
167; AVX2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000168; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000169; AVX2-NEXT: vmovd %edi, %xmm0
170; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
171; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
172; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
173; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
174; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000175;
176; AVX512-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000177; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000178; AVX512-NEXT: kmovd %edi, %k0
179; AVX512-NEXT: vpmovm2b %k0, %xmm0
180; AVX512-NEXT: retq
181 %1 = bitcast i16 %a0 to <16 x i1>
182 %2 = sext <16 x i1> %1 to <16 x i8>
183 ret <16 x i8> %2
184}
185
186;
187; 256-bit vectors
188;
189
190define <4 x i64> @ext_i4_4i64(i4 %a0) {
191; SSE2-SSSE3-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000192; SSE2-SSSE3: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000193; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000194; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
195; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
196; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
197; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
198; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
199; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
200; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
201; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
202; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
203; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
204; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
205; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
206; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000207; SSE2-SSSE3-NEXT: retq
208;
209; AVX1-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000210; AVX1: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000211; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000212; AVX1-NEXT: vmovq %rdi, %xmm0
213; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
214; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
215; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
216; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
217; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
218; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
219; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
220; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
221; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
222; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
223; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000224; AVX1-NEXT: retq
225;
226; AVX2-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000227; AVX2: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000228; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000229; AVX2-NEXT: vmovq %rdi, %xmm0
230; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
231; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
232; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
233; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000234; AVX2-NEXT: retq
235;
236; AVX512-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000237; AVX512: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000238; AVX512-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +0000239; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
240; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000241; AVX512-NEXT: retq
242 %1 = bitcast i4 %a0 to <4 x i1>
243 %2 = sext <4 x i1> %1 to <4 x i64>
244 ret <4 x i64> %2
245}
246
247define <8 x i32> @ext_i8_8i32(i8 %a0) {
248; SSE2-SSSE3-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000249; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000250; SSE2-SSSE3-NEXT: movd %edi, %xmm0
251; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
252; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000253; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000254; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
255; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
256; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
257; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
258; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000259; SSE2-SSSE3-NEXT: retq
260;
261; AVX1-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000262; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000263; AVX1-NEXT: vmovd %edi, %xmm0
264; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
265; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
266; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
267; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
268; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
269; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
270; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
271; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
272; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
273; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
274; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000275; AVX1-NEXT: retq
276;
277; AVX2-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000278; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000279; AVX2-NEXT: vmovd %edi, %xmm0
280; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
281; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
282; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
283; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000284; AVX2-NEXT: retq
285;
286; AVX512-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000287; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000288; AVX512-NEXT: kmovd %edi, %k1
289; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
290; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
291; AVX512-NEXT: retq
292 %1 = bitcast i8 %a0 to <8 x i1>
293 %2 = sext <8 x i1> %1 to <8 x i32>
294 ret <8 x i32> %2
295}
296
297define <16 x i16> @ext_i16_16i16(i16 %a0) {
298; SSE2-SSSE3-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000299; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000300; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000301; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
302; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000303; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000304; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000305; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
306; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
307; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
308; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
309; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000310; SSE2-SSSE3-NEXT: retq
311;
312; AVX1-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000313; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000314; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000315; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
316; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000317; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
318; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
319; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
320; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
321; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
322; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
323; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
324; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
325; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
326; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000327; AVX1-NEXT: retq
328;
329; AVX2-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000330; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000331; AVX2-NEXT: vmovd %edi, %xmm0
332; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
333; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
334; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
335; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000336; AVX2-NEXT: retq
337;
338; AVX512-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000339; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000340; AVX512-NEXT: kmovd %edi, %k0
341; AVX512-NEXT: vpmovm2w %k0, %ymm0
342; AVX512-NEXT: retq
343 %1 = bitcast i16 %a0 to <16 x i1>
344 %2 = sext <16 x i1> %1 to <16 x i16>
345 ret <16 x i16> %2
346}
347
348define <32 x i8> @ext_i32_32i8(i32 %a0) {
349; SSE2-SSSE3-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000350; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000351; SSE2-SSSE3-NEXT: movd %edi, %xmm1
352; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
353; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
354; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
355; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
356; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
357; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
358; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
359; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
360; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
361; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000362; SSE2-SSSE3-NEXT: retq
363;
364; AVX1-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000365; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000366; AVX1-NEXT: vmovd %edi, %xmm0
367; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
368; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
369; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
370; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
371; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000372; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000373; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
374; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
375; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
376; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
377; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
378; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
379; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
380; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
381; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000382; AVX1-NEXT: retq
383;
Simon Pilgrimeb806d52018-10-21 17:07:50 +0000384; AVX2-LABEL: ext_i32_32i8:
385; AVX2: # %bb.0:
386; AVX2-NEXT: vmovd %edi, %xmm0
387; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
388; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
389; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
390; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
391; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
392; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000393;
394; AVX512-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000395; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000396; AVX512-NEXT: kmovd %edi, %k0
397; AVX512-NEXT: vpmovm2b %k0, %ymm0
398; AVX512-NEXT: retq
399 %1 = bitcast i32 %a0 to <32 x i1>
400 %2 = sext <32 x i1> %1 to <32 x i8>
401 ret <32 x i8> %2
402}
403
404;
405; 512-bit vectors
406;
407
408define <8 x i64> @ext_i8_8i64(i8 %a0) {
409; SSE2-SSSE3-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000410; SSE2-SSSE3: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000411; SSE2-SSSE3-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000412; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
413; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
414; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
415; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
416; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
417; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
418; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
419; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
420; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
421; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
422; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
423; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
424; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
425; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
426; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
427; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
428; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
429; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
430; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
431; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
432; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
433; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
434; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
435; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
436; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000437; SSE2-SSSE3-NEXT: retq
438;
439; AVX1-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000440; AVX1: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000441; AVX1-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000442; AVX1-NEXT: vmovq %rdi, %xmm0
443; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
444; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
445; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
446; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
447; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
448; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
449; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
450; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
451; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
452; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
453; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
454; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
455; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
456; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
457; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
458; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
459; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
460; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000461; AVX1-NEXT: retq
462;
463; AVX2-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000464; AVX2: # %bb.0:
Puyan Lotfi43e94b12018-01-31 22:04:26 +0000465; AVX2-NEXT: # kill: def $edi killed $edi def $rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000466; AVX2-NEXT: vmovq %rdi, %xmm0
467; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
468; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
469; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
470; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
471; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
472; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
473; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000474; AVX2-NEXT: retq
475;
476; AVX512-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000477; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000478; AVX512-NEXT: kmovd %edi, %k1
479; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
480; AVX512-NEXT: retq
481 %1 = bitcast i8 %a0 to <8 x i1>
482 %2 = sext <8 x i1> %1 to <8 x i64>
483 ret <8 x i64> %2
484}
485
486define <16 x i32> @ext_i16_16i32(i16 %a0) {
487; SSE2-SSSE3-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000488; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000489; SSE2-SSSE3-NEXT: movd %edi, %xmm0
490; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
491; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
492; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
493; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
494; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
495; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000496; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000497; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
498; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
499; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000500; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000501; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
502; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
503; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
504; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
505; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000506; SSE2-SSSE3-NEXT: retq
507;
508; AVX1-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000509; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000510; AVX1-NEXT: vmovd %edi, %xmm0
511; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
512; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
513; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
514; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
515; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
516; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
517; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
518; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
519; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
520; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
521; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
522; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
523; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
524; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
525; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
526; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
527; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
528; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000529; AVX1-NEXT: retq
530;
531; AVX2-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000532; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000533; AVX2-NEXT: vmovd %edi, %xmm0
534; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
535; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
536; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
537; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
538; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
539; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
540; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000541; AVX2-NEXT: retq
542;
543; AVX512-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000544; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000545; AVX512-NEXT: kmovd %edi, %k1
546; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
547; AVX512-NEXT: retq
548 %1 = bitcast i16 %a0 to <16 x i1>
549 %2 = sext <16 x i1> %1 to <16 x i32>
550 ret <16 x i32> %2
551}
552
553define <32 x i16> @ext_i32_32i16(i32 %a0) {
554; SSE2-SSSE3-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000555; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000556; SSE2-SSSE3-NEXT: movd %edi, %xmm2
Simon Pilgrimc7015962017-12-29 14:41:50 +0000557; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
558; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000559; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000560; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000561; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
562; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
563; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
564; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
565; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000566; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
567; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000568; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000569; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
570; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
571; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
572; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000573; SSE2-SSSE3-NEXT: retq
574;
575; AVX1-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000576; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000577; AVX1-NEXT: vmovd %edi, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000578; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
579; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000580; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
581; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
582; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
583; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
584; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
585; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
586; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
587; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
588; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
589; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
590; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000591; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
592; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000593; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
594; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
595; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
596; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
597; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
598; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
599; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
600; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000601; AVX1-NEXT: retq
602;
603; AVX2-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000604; AVX2: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000605; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000606; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
607; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
608; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
609; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
610; AVX2-NEXT: shrl $16, %edi
611; AVX2-NEXT: vmovd %edi, %xmm2
612; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
613; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
614; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000615; AVX2-NEXT: retq
616;
617; AVX512-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000618; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000619; AVX512-NEXT: kmovd %edi, %k0
620; AVX512-NEXT: vpmovm2w %k0, %zmm0
621; AVX512-NEXT: retq
622 %1 = bitcast i32 %a0 to <32 x i1>
623 %2 = sext <32 x i1> %1 to <32 x i16>
624 ret <32 x i16> %2
625}
626
627define <64 x i8> @ext_i64_64i8(i64 %a0) {
628; SSE2-SSSE3-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000629; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000630; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
631; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
632; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
633; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
634; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
635; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
636; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
637; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
638; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
639; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
640; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
641; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
642; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
643; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
644; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
645; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
646; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
647; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
648; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000649; SSE2-SSSE3-NEXT: retq
650;
651; AVX1-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000652; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000653; AVX1-NEXT: vmovq %rdi, %xmm0
654; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
655; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
656; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
657; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
658; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
659; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Craig Topper8315d992018-10-26 17:21:26 +0000660; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-1.7939930131212661E-307,-1.7939930131212661E-307,-1.7939930131212661E-307,-1.7939930131212661E-307]
Simon Pilgrima705db92017-09-24 13:42:31 +0000661; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
662; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
663; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
664; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
665; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
666; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
667; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
668; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
669; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
670; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
671; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
672; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
673; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
674; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
675; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
676; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
677; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
678; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
679; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
680; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000681; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000682; AVX1-NEXT: retq
683;
Simon Pilgrimeb806d52018-10-21 17:07:50 +0000684; AVX2-LABEL: ext_i64_64i8:
685; AVX2: # %bb.0:
686; AVX2-NEXT: vmovq %rdi, %xmm0
687; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
688; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
689; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
690; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
691; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
692; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
693; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
694; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
695; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000696;
697; AVX512-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000698; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000699; AVX512-NEXT: kmovq %rdi, %k0
700; AVX512-NEXT: vpmovm2b %k0, %zmm0
701; AVX512-NEXT: retq
702 %1 = bitcast i64 %a0 to <64 x i1>
703 %2 = sext <64 x i1> %1 to <64 x i8>
704 ret <64 x i8> %2
705}