blob: ba6d7738d4eda1bf9eff267dabb64048da77c661 [file] [log] [blame]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512
7
8;
9; 128-bit vectors
10;
11
12define <2 x i64> @ext_i2_2i64(i2 %a0) {
13; SSE2-SSSE3-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000014; SSE2-SSSE3: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000015; SSE2-SSSE3-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000016; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
17; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
18; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
19; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
20; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
21; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
22; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000023; SSE2-SSSE3-NEXT: retq
24;
Simon Pilgrima705db92017-09-24 13:42:31 +000025; AVX1-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000026; AVX1: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000027; AVX1-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000028; AVX1-NEXT: vmovq %rdi, %xmm0
29; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
30; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
31; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
32; AVX1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
33; AVX1-NEXT: retq
34;
35; AVX2-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000036; AVX2: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +000037; AVX2-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +000038; AVX2-NEXT: vmovq %rdi, %xmm0
39; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
40; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2]
41; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
42; AVX2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
43; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000044;
45; AVX512-LABEL: ext_i2_2i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000046; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000047; AVX512-NEXT: andb $3, %dil
Craig Topper876ec0b2017-12-31 07:38:41 +000048; AVX512-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +000049; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
50; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000051; AVX512-NEXT: retq
52 %1 = bitcast i2 %a0 to <2 x i1>
53 %2 = sext <2 x i1> %1 to <2 x i64>
54 ret <2 x i64> %2
55}
56
57define <4 x i32> @ext_i4_4i32(i4 %a0) {
58; SSE2-SSSE3-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000059; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000060; SSE2-SSSE3-NEXT: movd %edi, %xmm0
61; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
62; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
63; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
64; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000065; SSE2-SSSE3-NEXT: retq
66;
Simon Pilgrima705db92017-09-24 13:42:31 +000067; AVX1-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000068; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000069; AVX1-NEXT: vmovd %edi, %xmm0
70; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
71; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
72; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
73; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
74; AVX1-NEXT: retq
75;
76; AVX2-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000077; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +000078; AVX2-NEXT: vmovd %edi, %xmm0
79; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0
80; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
81; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
82; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
83; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000084;
85; AVX512-LABEL: ext_i4_4i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000086; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000087; AVX512-NEXT: andb $15, %dil
Craig Topper876ec0b2017-12-31 07:38:41 +000088; AVX512-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +000089; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
90; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +000091; AVX512-NEXT: retq
92 %1 = bitcast i4 %a0 to <4 x i1>
93 %2 = sext <4 x i1> %1 to <4 x i32>
94 ret <4 x i32> %2
95}
96
97define <8 x i16> @ext_i8_8i16(i8 %a0) {
98; SSE2-SSSE3-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +000099; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000100; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000101; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
102; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000103; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
104; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
105; SSE2-SSSE3-NEXT: pcmpeqw %xmm1, %xmm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000106; SSE2-SSSE3-NEXT: retq
107;
Simon Pilgrima705db92017-09-24 13:42:31 +0000108; AVX1-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000109; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000110; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000111; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
112; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000113; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
114; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
115; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
116; AVX1-NEXT: retq
117;
118; AVX2-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000119; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000120; AVX2-NEXT: vmovd %edi, %xmm0
121; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
122; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
123; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
124; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
125; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000126;
127; AVX512-LABEL: ext_i8_8i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000128; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000129; AVX512-NEXT: kmovd %edi, %k0
130; AVX512-NEXT: vpmovm2w %k0, %xmm0
131; AVX512-NEXT: retq
132 %1 = bitcast i8 %a0 to <8 x i1>
133 %2 = sext <8 x i1> %1 to <8 x i16>
134 ret <8 x i16> %2
135}
136
137define <16 x i8> @ext_i16_16i8(i16 %a0) {
Simon Pilgrima705db92017-09-24 13:42:31 +0000138; SSE2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000139; SSE2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000140; SSE2-NEXT: movd %edi, %xmm0
141; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
142; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
143; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
144; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
145; SSE2-NEXT: pand %xmm1, %xmm0
146; SSE2-NEXT: pcmpeqb %xmm1, %xmm0
147; SSE2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000148;
Simon Pilgrima705db92017-09-24 13:42:31 +0000149; SSSE3-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000150; SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000151; SSSE3-NEXT: movd %edi, %xmm0
152; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
153; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
154; SSSE3-NEXT: pand %xmm1, %xmm0
155; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0
156; SSSE3-NEXT: retq
157;
158; AVX1-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000159; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000160; AVX1-NEXT: vmovd %edi, %xmm0
161; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
162; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
163; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
164; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
165; AVX1-NEXT: retq
166;
167; AVX2-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000168; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000169; AVX2-NEXT: vmovd %edi, %xmm0
170; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
171; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
172; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
173; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
174; AVX2-NEXT: retq
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000175;
176; AVX512-LABEL: ext_i16_16i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000177; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000178; AVX512-NEXT: kmovd %edi, %k0
179; AVX512-NEXT: vpmovm2b %k0, %xmm0
180; AVX512-NEXT: retq
181 %1 = bitcast i16 %a0 to <16 x i1>
182 %2 = sext <16 x i1> %1 to <16 x i8>
183 ret <16 x i8> %2
184}
185
186;
187; 256-bit vectors
188;
189
190define <4 x i64> @ext_i4_4i64(i4 %a0) {
191; SSE2-SSSE3-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000192; SSE2-SSSE3: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000193; SSE2-SSSE3-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000194; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
195; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
196; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
197; SSE2-SSSE3-NEXT: movdqa %xmm2, %xmm1
198; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
199; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
200; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
201; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
202; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
203; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
204; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
205; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
206; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000207; SSE2-SSSE3-NEXT: retq
208;
209; AVX1-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000210; AVX1: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000211; AVX1-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000212; AVX1-NEXT: vmovq %rdi, %xmm0
213; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
214; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
215; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
216; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
217; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
218; AVX1-NEXT: vpcmpeqq %xmm2, %xmm1, %xmm1
219; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
220; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
221; AVX1-NEXT: vpcmpeqq %xmm2, %xmm0, %xmm0
222; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
223; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000224; AVX1-NEXT: retq
225;
226; AVX2-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000227; AVX2: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000228; AVX2-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000229; AVX2-NEXT: vmovq %rdi, %xmm0
230; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0
231; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
232; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
233; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000234; AVX2-NEXT: retq
235;
236; AVX512-LABEL: ext_i4_4i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000237; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000238; AVX512-NEXT: andb $15, %dil
Craig Topper876ec0b2017-12-31 07:38:41 +0000239; AVX512-NEXT: kmovd %edi, %k1
Craig Topper55cf8802017-12-28 19:46:11 +0000240; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
241; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000242; AVX512-NEXT: retq
243 %1 = bitcast i4 %a0 to <4 x i1>
244 %2 = sext <4 x i1> %1 to <4 x i64>
245 ret <4 x i64> %2
246}
247
248define <8 x i32> @ext_i8_8i32(i8 %a0) {
249; SSE2-SSSE3-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000250; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000251; SSE2-SSSE3-NEXT: movd %edi, %xmm0
252; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
253; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000254; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000255; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
256; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm0
257; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
258; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
259; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000260; SSE2-SSSE3-NEXT: retq
261;
262; AVX1-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000263; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000264; AVX1-NEXT: vmovd %edi, %xmm0
265; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
266; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
267; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
268; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
269; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
270; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
271; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
272; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
273; AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
274; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
275; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000276; AVX1-NEXT: retq
277;
278; AVX2-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000279; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000280; AVX2-NEXT: vmovd %edi, %xmm0
281; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
282; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
283; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
284; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000285; AVX2-NEXT: retq
286;
287; AVX512-LABEL: ext_i8_8i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000288; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000289; AVX512-NEXT: kmovd %edi, %k1
290; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
291; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
292; AVX512-NEXT: retq
293 %1 = bitcast i8 %a0 to <8 x i1>
294 %2 = sext <8 x i1> %1 to <8 x i32>
295 ret <8 x i32> %2
296}
297
298define <16 x i16> @ext_i16_16i16(i16 %a0) {
299; SSE2-SSSE3-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000300; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000301; SSE2-SSSE3-NEXT: movd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000302; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
303; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000304; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000305; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000306; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
307; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm0
308; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
309; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
310; SSE2-SSSE3-NEXT: pcmpeqw %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000311; SSE2-SSSE3-NEXT: retq
312;
313; AVX1-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000314; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000315; AVX1-NEXT: vmovd %edi, %xmm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000316; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
317; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000318; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
319; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
320; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
321; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
322; AVX1-NEXT: vpcmpeqw %xmm2, %xmm1, %xmm1
323; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
324; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
325; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0
326; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
327; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000328; AVX1-NEXT: retq
329;
330; AVX2-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000331; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000332; AVX2-NEXT: vmovd %edi, %xmm0
333; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
334; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
335; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
336; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000337; AVX2-NEXT: retq
338;
339; AVX512-LABEL: ext_i16_16i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000340; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000341; AVX512-NEXT: kmovd %edi, %k0
342; AVX512-NEXT: vpmovm2w %k0, %ymm0
343; AVX512-NEXT: retq
344 %1 = bitcast i16 %a0 to <16 x i1>
345 %2 = sext <16 x i1> %1 to <16 x i16>
346 ret <16 x i16> %2
347}
348
349define <32 x i8> @ext_i32_32i8(i32 %a0) {
350; SSE2-SSSE3-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000351; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000352; SSE2-SSSE3-NEXT: movd %edi, %xmm1
353; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
354; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
355; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
356; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
357; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0
358; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
359; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
360; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
361; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
362; SSE2-SSSE3-NEXT: pcmpeqb %xmm2, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000363; SSE2-SSSE3-NEXT: retq
364;
365; AVX1-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000366; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000367; AVX1-NEXT: vmovd %edi, %xmm0
368; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
369; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
370; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
371; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
372; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000373; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000374; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
375; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
376; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
377; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1
378; AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3
379; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm1
380; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
381; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
382; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000383; AVX1-NEXT: retq
384;
385; AVX2-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000386; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000387; AVX2-NEXT: vmovd %edi, %xmm0
388; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
389; AVX2-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
390; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
391; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
392; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000393; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000394; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
395; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
396; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000397; AVX2-NEXT: retq
398;
399; AVX512-LABEL: ext_i32_32i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000400; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000401; AVX512-NEXT: kmovd %edi, %k0
402; AVX512-NEXT: vpmovm2b %k0, %ymm0
403; AVX512-NEXT: retq
404 %1 = bitcast i32 %a0 to <32 x i1>
405 %2 = sext <32 x i1> %1 to <32 x i8>
406 ret <32 x i8> %2
407}
408
409;
410; 512-bit vectors
411;
412
413define <8 x i64> @ext_i8_8i64(i8 %a0) {
414; SSE2-SSSE3-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000415; SSE2-SSSE3: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000416; SSE2-SSSE3-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000417; SSE2-SSSE3-NEXT: movq %rdi, %xmm0
418; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
419; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [1,2]
420; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1
421; SSE2-SSSE3-NEXT: pand %xmm0, %xmm1
422; SSE2-SSSE3-NEXT: pcmpeqd %xmm0, %xmm1
423; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
424; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
425; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [4,8]
426; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm2
427; SSE2-SSSE3-NEXT: pand %xmm1, %xmm2
428; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
429; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
430; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
431; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32]
432; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm3
433; SSE2-SSSE3-NEXT: pand %xmm2, %xmm3
434; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm3
435; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
436; SSE2-SSSE3-NEXT: pand %xmm3, %xmm2
437; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [64,128]
438; SSE2-SSSE3-NEXT: pand %xmm3, %xmm4
439; SSE2-SSSE3-NEXT: pcmpeqd %xmm3, %xmm4
440; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
441; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000442; SSE2-SSSE3-NEXT: retq
443;
444; AVX1-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000445; AVX1: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000446; AVX1-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000447; AVX1-NEXT: vmovq %rdi, %xmm0
448; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
449; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
450; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
451; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
452; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
453; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
454; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
455; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
456; AVX1-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
457; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
458; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
459; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
460; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
461; AVX1-NEXT: vpcmpeqq %xmm3, %xmm2, %xmm2
462; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
463; AVX1-NEXT: vpcmpeqq %xmm3, %xmm1, %xmm1
464; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
465; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000466; AVX1-NEXT: retq
467;
468; AVX2-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000469; AVX2: # %bb.0:
Francis Visoiu Mistriha8a83d12017-12-07 10:40:31 +0000470; AVX2-NEXT: # kill: def %edi killed %edi def %rdi
Simon Pilgrima705db92017-09-24 13:42:31 +0000471; AVX2-NEXT: vmovq %rdi, %xmm0
472; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
473; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
474; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
475; AVX2-NEXT: vpcmpeqq %ymm0, %ymm2, %ymm0
476; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
477; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
478; AVX2-NEXT: vpcmpeqq %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000479; AVX2-NEXT: retq
480;
481; AVX512-LABEL: ext_i8_8i64:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000482; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000483; AVX512-NEXT: kmovd %edi, %k1
484; AVX512-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
485; AVX512-NEXT: retq
486 %1 = bitcast i8 %a0 to <8 x i1>
487 %2 = sext <8 x i1> %1 to <8 x i64>
488 ret <8 x i64> %2
489}
490
491define <16 x i32> @ext_i16_16i32(i16 %a0) {
492; SSE2-SSSE3-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000493; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000494; SSE2-SSSE3-NEXT: movd %edi, %xmm0
495; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
496; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8]
497; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm0
498; SSE2-SSSE3-NEXT: pand %xmm1, %xmm0
499; SSE2-SSSE3-NEXT: pcmpeqd %xmm1, %xmm0
500; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000501; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm1
Simon Pilgrima705db92017-09-24 13:42:31 +0000502; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1
503; SSE2-SSSE3-NEXT: pcmpeqd %xmm2, %xmm1
504; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000505; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000506; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
507; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm2
508; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
509; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
510; SSE2-SSSE3-NEXT: pcmpeqd %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000511; SSE2-SSSE3-NEXT: retq
512;
513; AVX1-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000514; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000515; AVX1-NEXT: vmovd %edi, %xmm0
516; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
517; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
518; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm0
519; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
520; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
521; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
522; AVX1-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4
523; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
524; AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
525; AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0
526; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
527; AVX1-NEXT: vandps {{.*}}(%rip), %ymm1, %ymm1
528; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
529; AVX1-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
530; AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
531; AVX1-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
532; AVX1-NEXT: vpxor %xmm4, %xmm1, %xmm1
533; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000534; AVX1-NEXT: retq
535;
536; AVX2-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000537; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000538; AVX2-NEXT: vmovd %edi, %xmm0
539; AVX2-NEXT: vpbroadcastd %xmm0, %ymm1
540; AVX2-NEXT: vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
541; AVX2-NEXT: vpand %ymm0, %ymm1, %ymm2
542; AVX2-NEXT: vpcmpeqd %ymm0, %ymm2, %ymm0
543; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
544; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
545; AVX2-NEXT: vpcmpeqd %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000546; AVX2-NEXT: retq
547;
548; AVX512-LABEL: ext_i16_16i32:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000549; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000550; AVX512-NEXT: kmovd %edi, %k1
551; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
552; AVX512-NEXT: retq
553 %1 = bitcast i16 %a0 to <16 x i1>
554 %2 = sext <16 x i1> %1 to <16 x i32>
555 ret <16 x i32> %2
556}
557
558define <32 x i16> @ext_i32_32i16(i32 %a0) {
559; SSE2-SSSE3-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000560; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000561; SSE2-SSSE3-NEXT: movd %edi, %xmm2
Simon Pilgrimc7015962017-12-29 14:41:50 +0000562; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
563; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000564; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000565; SSE2-SSSE3-NEXT: movdqa %xmm1, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000566; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
567; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm0
568; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
569; SSE2-SSSE3-NEXT: pand %xmm5, %xmm1
570; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000571; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
572; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000573; SSE2-SSSE3-NEXT: movdqa %xmm3, %xmm2
Simon Pilgrima705db92017-09-24 13:42:31 +0000574; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
575; SSE2-SSSE3-NEXT: pcmpeqw %xmm4, %xmm2
576; SSE2-SSSE3-NEXT: pand %xmm5, %xmm3
577; SSE2-SSSE3-NEXT: pcmpeqw %xmm5, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000578; SSE2-SSSE3-NEXT: retq
579;
580; AVX1-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000581; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000582; AVX1-NEXT: vmovd %edi, %xmm1
Simon Pilgrimc7015962017-12-29 14:41:50 +0000583; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
584; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000585; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
586; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
587; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
588; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
589; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
590; AVX1-NEXT: vpcmpeqw %xmm4, %xmm3, %xmm3
591; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
592; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
593; AVX1-NEXT: vpcmpeqw %xmm4, %xmm0, %xmm0
594; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
595; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
Simon Pilgrimc7015962017-12-29 14:41:50 +0000596; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
597; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
Simon Pilgrima705db92017-09-24 13:42:31 +0000598; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
599; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
600; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
601; AVX1-NEXT: vpcmpeqw %xmm4, %xmm2, %xmm2
602; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
603; AVX1-NEXT: vpcmpeqw %xmm4, %xmm1, %xmm1
604; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
605; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000606; AVX1-NEXT: retq
607;
608; AVX2-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000609; AVX2: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000610; AVX2-NEXT: vmovd %edi, %xmm0
Simon Pilgrima705db92017-09-24 13:42:31 +0000611; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
612; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
613; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
614; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
615; AVX2-NEXT: shrl $16, %edi
616; AVX2-NEXT: vmovd %edi, %xmm2
617; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
618; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm2
619; AVX2-NEXT: vpcmpeqw %ymm1, %ymm2, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000620; AVX2-NEXT: retq
621;
622; AVX512-LABEL: ext_i32_32i16:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000623; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000624; AVX512-NEXT: kmovd %edi, %k0
625; AVX512-NEXT: vpmovm2w %k0, %zmm0
626; AVX512-NEXT: retq
627 %1 = bitcast i32 %a0 to <32 x i1>
628 %2 = sext <32 x i1> %1 to <32 x i16>
629 ret <32 x i16> %2
630}
631
632define <64 x i8> @ext_i64_64i8(i64 %a0) {
633; SSE2-SSSE3-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000634; SSE2-SSSE3: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000635; SSE2-SSSE3-NEXT: movq %rdi, %xmm3
636; SSE2-SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
637; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
638; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
639; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
640; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0
641; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
642; SSE2-SSSE3-NEXT: pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
643; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
644; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1
645; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm1
646; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
647; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
648; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2
649; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm2
650; SSE2-SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
651; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
652; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3
653; SSE2-SSSE3-NEXT: pcmpeqb %xmm4, %xmm3
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000654; SSE2-SSSE3-NEXT: retq
655;
656; AVX1-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000657; AVX1: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000658; AVX1-NEXT: vmovq %rdi, %xmm0
659; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
660; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
661; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
662; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
663; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
664; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
665; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
666; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
667; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
668; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4
669; AVX1-NEXT: vpcmpeqb %xmm4, %xmm3, %xmm3
670; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
671; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
672; AVX1-NEXT: vpcmpeqb %xmm4, %xmm0, %xmm0
673; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
674; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
675; AVX1-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
676; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
677; AVX1-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
678; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
679; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
680; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
681; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
682; AVX1-NEXT: vpcmpeqb %xmm4, %xmm2, %xmm2
683; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2
684; AVX1-NEXT: vpcmpeqb %xmm4, %xmm1, %xmm1
685; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000686; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000687; AVX1-NEXT: retq
688;
689; AVX2-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000690; AVX2: # %bb.0:
Simon Pilgrima705db92017-09-24 13:42:31 +0000691; AVX2-NEXT: vmovq %rdi, %xmm0
692; AVX2-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
693; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
694; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
695; AVX2-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
696; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,1,1]
697; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
698; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
699; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
700; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
701; AVX2-NEXT: vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
702; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
703; AVX2-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
704; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
705; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm3, %ymm1
706; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
707; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000708; AVX2-NEXT: retq
709;
710; AVX512-LABEL: ext_i64_64i8:
Francis Visoiu Mistrih25528d62017-12-04 17:18:51 +0000711; AVX512: # %bb.0:
Simon Pilgrima80cb1d2017-07-06 19:33:10 +0000712; AVX512-NEXT: kmovq %rdi, %k0
713; AVX512-NEXT: vpmovm2b %k0, %zmm0
714; AVX512-NEXT: retq
715 %1 = bitcast i64 %a0 to <64 x i1>
716 %2 = sext <64 x i1> %1 to <64 x i8>
717 ret <64 x i8> %2
718}