blob: a5ef66eadf5727ee3818340e27624a84661ba77b [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
Zvi Rackover72b0bb12018-01-09 16:26:06 +00005; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX512
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00008
9;
10; 128-bit vectors
11;
12
13define <2 x i64> @ext_i2_2i64(i2 %a0) {
14; SSE2-SSSE3-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000015; SSE2-SSSE3: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000016; SSE2-SSSE3-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000017; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
18; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
19; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
20; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
21; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
22; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
23; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000024; SSE2-SSSE3-NEXT: retq
25;
Simon Pilgrima705db92017-09-24 13:42:31 +000026; AVX1-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000027; AVX1: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000028; AVX1-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000029; AVX1-NEXT: vmovq %rdi, %xmm0
30; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
31; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
32; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
33; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
34; AVX1-NEXT: retq
35;
36; AVX2-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000037; AVX2: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000038; AVX2-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000039; AVX2-NEXT: vmovq %rdi, %xmm0
40; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
41; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
42; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
43; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
44; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000045;
46; AVX512-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000047; AVX512: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +000048; AVX512-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +000049; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
50; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000051; AVX512-NEXT: retq
52 %1 = bitcast i2 %a0 to <2 x i1>
53 %2 = sext <2 x i1> %1 to <2 x i64>
54 ret <2 x i64> %2
55}
56
57define <4 x i32> @ext_i4_4i32(i4 %a0) {
58; SSE2-SSSE3-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000059; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000060; SSE2-SSSE3-NEXT: movd %edi, %xmm0
61; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
62; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
63; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
64; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000065; SSE2-SSSE3-NEXT: retq
66;
Simon Pilgrima705db92017-09-24 13:42:31 +000067; AVX1-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000068; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000069; AVX1-NEXT: vmovd %edi, %xmm0
70; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
71; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
72; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
73; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
74; AVX1-NEXT: retq
75;
76; AVX2-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000077; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000078; AVX2-NEXT: vmovd %edi, %xmm0
79; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
80; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
81; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
82; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
83; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000084;
85; AVX512-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000086; AVX512: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +000087; AVX512-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +000088; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
89; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000090; AVX512-NEXT: retq
91 %1 = bitcast i4 %a0 to <4 x i1>
92 %2 = sext <4 x i1> %1 to <4 x i32>
93 ret <4 x i32> %2
94}
95
96define <8 x i16> @ext_i8_8i16(i8 %a0) {
97; SSE2-SSSE3-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000098; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000099; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000100; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
101; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000102; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
103; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
104; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000105; SSE2-SSSE3-NEXT: retq
106;
Simon Pilgrima705db92017-09-24 13:42:31 +0000107; AVX1-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000108; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000109; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000110; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
111; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000112; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
113; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
114; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
115; AVX1-NEXT: retq
116;
117; AVX2-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000118; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000119; AVX2-NEXT: vmovd %edi, %xmm0
120; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
121; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
122; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
123; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
124; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000125;
126; AVX512-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000127; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000128; AVX512-NEXT: kmovd %edi, %k0
129; AVX512-NEXT: vpmovm2w %k0, %xmm0
130; AVX512-NEXT: retq
131 %1 = bitcast i8 %a0 to <8 x i1>
132 %2 = sext <8 x i1> %1 to <8 x i16>
133 ret <8 x i16> %2
134}
135
136define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000137; SSE2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000138; SSE2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000139; SSE2-NEXT: movd %edi, %xmm0
140; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
141; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
142; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
143; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
144; SSE2-NEXT: pand %xmm1, %xmm0
145; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
146; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000147;
Simon Pilgrima705db92017-09-24 13:42:31 +0000148; SSSE3-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000149; SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000150; SSSE3-NEXT: movd %edi, %xmm0
151; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
152; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
153; SSSE3-NEXT: pand %xmm1, %xmm0
154; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
155; SSSE3-NEXT: retq
156;
157; AVX1-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000158; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000159; AVX1-NEXT: vmovd %edi, %xmm0
160; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
161; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
162; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
163; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
164; AVX1-NEXT: retq
165;
166; AVX2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000167; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000168; AVX2-NEXT: vmovd %edi, %xmm0
169; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
170; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
171; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
172; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
173; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000174;
175; AVX512-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000176; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000177; AVX512-NEXT: kmovd %edi, %k0
178; AVX512-NEXT: vpmovm2b %k0, %xmm0
179; AVX512-NEXT: retq
180 %1 = bitcast i16 %a0 to <16 x i1>
181 %2 = sext <16 x i1> %1 to <16 x i8>
182 ret <16 x i8> %2
183}
184
185;
186; 256-bit vectors
187;
188
189define <4 x i64> @ext_i4_4i64(i4 %a0) {
190; SSE2-SSSE3-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000191; SSE2-SSSE3: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000192; SSE2-SSSE3-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000193; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
194; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
195; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
196; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
197; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
198; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
199; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
200; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
201; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
202; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
203; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
204; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
205; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000206; SSE2-SSSE3-NEXT: retq
207;
208; AVX1-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000209; AVX1: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000210; AVX1-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000211; AVX1-NEXT: vmovq %rdi, %xmm0
212; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
213; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
214; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
215; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
216; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
217; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
218; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
219; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
220; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
221; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
222; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000223; AVX1-NEXT: retq
224;
225; AVX2-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000226; AVX2: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000227; AVX2-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000228; AVX2-NEXT: vmovq %rdi, %xmm0
229; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
230; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
231; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
232; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000233; AVX2-NEXT: retq
234;
235; AVX512-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000236; AVX512: # %bb.0:
Craig Topper876ec0b2017-12-31 07:38:41 +0000237; AVX512-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +0000238; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
239; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000240; AVX512-NEXT: retq
241 %1 = bitcast i4 %a0 to <4 x i1>
242 %2 = sext <4 x i1> %1 to <4 x i64>
243 ret <4 x i64> %2
244}
245
246define <8 x i32> @ext_i8_8i32(i8 %a0) {
247; SSE2-SSSE3-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000248; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000249; SSE2-SSSE3-NEXT: movd %edi, %xmm0
250; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
251; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000252; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000253; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
254; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
255; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
256; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
257; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000258; SSE2-SSSE3-NEXT: retq
259;
260; AVX1-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000261; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000262; AVX1-NEXT: vmovd %edi, %xmm0
263; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
264; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
265; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
266; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
267; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
268; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
269; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
270; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
271; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
272; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
273; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000274; AVX1-NEXT: retq
275;
276; AVX2-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000277; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000278; AVX2-NEXT: vmovd %edi, %xmm0
279; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
280; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
281; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
282; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000283; AVX2-NEXT: retq
284;
285; AVX512-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000286; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000287; AVX512-NEXT: kmovd %edi, %k1
288; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
289; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
290; AVX512-NEXT: retq
291 %1 = bitcast i8 %a0 to <8 x i1>
292 %2 = sext <8 x i1> %1 to <8 x i32>
293 ret <8 x i32> %2
294}
295
296define <16 x i16> @ext_i16_16i16(i16 %a0) {
297; SSE2-SSSE3-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000298; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000299; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000300; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
301; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000302; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000303; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000304; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
305; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
306; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
307; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
308; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000309; SSE2-SSSE3-NEXT: retq
310;
311; AVX1-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000312; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000313; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000314; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
315; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000316; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
317; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
318; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
319; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
320; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
321; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
322; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
323; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
324; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
325; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000326; AVX1-NEXT: retq
327;
328; AVX2-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000329; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000330; AVX2-NEXT: vmovd %edi, %xmm0
331; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
332; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
333; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
334; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000335; AVX2-NEXT: retq
336;
337; AVX512-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000338; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000339; AVX512-NEXT: kmovd %edi, %k0
340; AVX512-NEXT: vpmovm2w %k0, %ymm0
341; AVX512-NEXT: retq
342 %1 = bitcast i16 %a0 to <16 x i1>
343 %2 = sext <16 x i1> %1 to <16 x i16>
344 ret <16 x i16> %2
345}
346
347define <32 x i8> @ext_i32_32i8(i32 %a0) {
348; SSE2-SSSE3-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000349; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000350; SSE2-SSSE3-NEXT: movd %edi, %xmm1
351; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
352; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
353; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
354; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
355; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
356; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
357; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
358; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
359; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
360; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000361; SSE2-SSSE3-NEXT: retq
362;
363; AVX1-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000364; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000365; AVX1-NEXT: vmovd %edi, %xmm0
366; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
367; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
368; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
369; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
370; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000371; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000372; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
373; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
374; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
375; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
376; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
377; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
378; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
379; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
380; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000381; AVX1-NEXT: retq
382;
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000383; AVX2-SLOW-LABEL: ext_i32_32i8:
384; AVX2-SLOW: # %bb.0:
385; AVX2-SLOW-NEXT: vmovd %edi, %xmm0
386; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
387; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
388; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
389; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
390; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
391; AVX2-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
392; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
393; AVX2-SLOW-NEXT: vpand %ymm1, %ymm0, %ymm0
394; AVX2-SLOW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
395; AVX2-SLOW-NEXT: retq
396;
397; AVX2-FAST-LABEL: ext_i32_32i8:
398; AVX2-FAST: # %bb.0:
399; AVX2-FAST-NEXT: vmovd %edi, %xmm0
400; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
401; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
402; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
403; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
404; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
405; AVX2-FAST-NEXT: vpand %ymm1, %ymm0, %ymm0
406; AVX2-FAST-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
407; AVX2-FAST-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000408;
409; AVX512-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000410; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000411; AVX512-NEXT: kmovd %edi, %k0
412; AVX512-NEXT: vpmovm2b %k0, %ymm0
413; AVX512-NEXT: retq
414 %1 = bitcast i32 %a0 to <32 x i1>
415 %2 = sext <32 x i1> %1 to <32 x i8>
416 ret <32 x i8> %2
417}
418
419;
420; 512-bit vectors
421;
422
423define <8 x i64> @ext_i8_8i64(i8 %a0) {
424; SSE2-SSSE3-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000425; SSE2-SSSE3: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000426; SSE2-SSSE3-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000427; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
428; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
429; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
430; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
431; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
432; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
433; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
434; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
435; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
436; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
437; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
438; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
439; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
440; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
441; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
442; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
443; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
444; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
445; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
446; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
447; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
448; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
449; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
450; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
451; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000452; SSE2-SSSE3-NEXT: retq
453;
454; AVX1-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000455; AVX1: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000456; AVX1-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000457; AVX1-NEXT: vmovq %rdi, %xmm0
458; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
459; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
460; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
461; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
462; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
463; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
464; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
465; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
466; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
467; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
468; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
469; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
470; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
471; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
472; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
473; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
474; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
475; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000476; AVX1-NEXT: retq
477;
478; AVX2-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000479; AVX2: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000480; AVX2-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000481; AVX2-NEXT: vmovq %rdi, %xmm0
482; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
483; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
484; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
485; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
486; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
487; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
488; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000489; AVX2-NEXT: retq
490;
491; AVX512-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000492; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000493; AVX512-NEXT: kmovd %edi, %k1
494; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
495; AVX512-NEXT: retq
496 %1 = bitcast i8 %a0 to <8 x i1>
497 %2 = sext <8 x i1> %1 to <8 x i64>
498 ret <8 x i64> %2
499}
500
501define <16 x i32> @ext_i16_16i32(i16 %a0) {
502; SSE2-SSSE3-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000503; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000504; SSE2-SSSE3-NEXT: movd %edi, %xmm0
505; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
506; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
507; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
508; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
509; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
510; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000511; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000512; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
513; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
514; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000515; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000516; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
517; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
518; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
519; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
520; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000521; SSE2-SSSE3-NEXT: retq
522;
523; AVX1-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000524; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000525; AVX1-NEXT: vmovd %edi, %xmm0
526; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
527; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
528; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
529; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
530; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
531; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
532; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
533; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
534; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
535; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
536; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
537; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
538; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
539; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
540; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
541; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
542; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
543; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000544; AVX1-NEXT: retq
545;
546; AVX2-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000547; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000548; AVX2-NEXT: vmovd %edi, %xmm0
549; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
550; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
551; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
552; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
553; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
554; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
555; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000556; AVX2-NEXT: retq
557;
558; AVX512-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000559; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000560; AVX512-NEXT: kmovd %edi, %k1
561; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
562; AVX512-NEXT: retq
563 %1 = bitcast i16 %a0 to <16 x i1>
564 %2 = sext <16 x i1> %1 to <16 x i32>
565 ret <16 x i32> %2
566}
567
568define <32 x i16> @ext_i32_32i16(i32 %a0) {
569; SSE2-SSSE3-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000570; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000571; SSE2-SSSE3-NEXT: movd %edi, %xmm2
Simon Pilgrimc7015962017-12-29 14:41:50 +0000572; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
573; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000574; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000575; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000576; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
577; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
578; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
579; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
580; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000581; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
582; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000583; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000584; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
585; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
586; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
587; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000588; SSE2-SSSE3-NEXT: retq
589;
590; AVX1-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000591; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000592; AVX1-NEXT: vmovd %edi, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000593; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
594; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000595; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
596; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
597; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
598; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
599; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
600; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
601; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
602; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
603; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
604; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
605; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000606; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
607; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000608; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
609; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
610; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
611; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
612; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
613; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
614; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
615; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000616; AVX1-NEXT: retq
617;
618; AVX2-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000619; AVX2: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000620; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000621; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
622; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
623; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
624; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
625; AVX2-NEXT: shrl $16, %edi
626; AVX2-NEXT: vmovd %edi, %xmm2
627; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
628; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
629; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000630; AVX2-NEXT: retq
631;
632; AVX512-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000633; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000634; AVX512-NEXT: kmovd %edi, %k0
635; AVX512-NEXT: vpmovm2w %k0, %zmm0
636; AVX512-NEXT: retq
637 %1 = bitcast i32 %a0 to <32 x i1>
638 %2 = sext <32 x i1> %1 to <32 x i16>
639 ret <32 x i16> %2
640}
641
642define <64 x i8> @ext_i64_64i8(i64 %a0) {
643; SSE2-SSSE3-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000644; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000645; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
646; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
647; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
648; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
649; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
650; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
651; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
652; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
653; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
654; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
655; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
656; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
657; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
658; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
659; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
660; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
661; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
662; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
663; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000664; SSE2-SSSE3-NEXT: retq
665;
666; AVX1-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000667; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000668; AVX1-NEXT: vmovq %rdi, %xmm0
669; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
670; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
671; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
672; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
673; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
674; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
675; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
676; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
677; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
678; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
679; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
680; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
681; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
682; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
683; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
684; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
685; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
686; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
687; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
688; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
689; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
690; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
691; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
692; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
693; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
694; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
695; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000696; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000697; AVX1-NEXT: retq
698;
Zvi Rackover72b0bb12018-01-09 16:26:06 +0000699; AVX2-SLOW-LABEL: ext_i64_64i8:
700; AVX2-SLOW: # %bb.0:
701; AVX2-SLOW-NEXT: vmovq %rdi, %xmm0
702; AVX2-SLOW-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
703; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
704; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
705; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
706; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
707; AVX2-SLOW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
708; AVX2-SLOW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
709; AVX2-SLOW-NEXT: vpand %ymm2, %ymm0, %ymm0
710; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
711; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
712; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
713; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
714; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
715; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
716; AVX2-SLOW-NEXT: vpand %ymm2, %ymm1, %ymm1
717; AVX2-SLOW-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
718; AVX2-SLOW-NEXT: retq
719;
720; AVX2-FAST-LABEL: ext_i64_64i8:
721; AVX2-FAST: # %bb.0:
722; AVX2-FAST-NEXT: vmovq %rdi, %xmm0
723; AVX2-FAST-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
724; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm1[0,1,0,1,0,1,0,1,2,3,2,3,2,3,2,3]
725; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm1[4,5,4,5,4,5,4,5,6,7,6,7,6,7,6,7]
726; AVX2-FAST-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
727; AVX2-FAST-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
728; AVX2-FAST-NEXT: vpand %ymm2, %ymm0, %ymm0
729; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
730; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm3 = xmm1[8,9,8,9,8,9,8,9,10,11,10,11,10,11,10,11]
731; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[12,13,12,13,12,13,12,13,14,15,14,15,14,15,14,15]
732; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
733; AVX2-FAST-NEXT: vpand %ymm2, %ymm1, %ymm1
734; AVX2-FAST-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
735; AVX2-FAST-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000736;
737; AVX512-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000738; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000739; AVX512-NEXT: kmovq %rdi, %k0
740; AVX512-NEXT: vpmovm2b %k0, %zmm0
741; AVX512-NEXT: retq
742 %1 = bitcast i64 %a0 to <64 x i1>
743 %2 = sext <64 x i1> %1 to <64 x i8>
744 ret <64 x i8> %2
745}