blob: e55314558005d2ff33c324df0ed0a1095d6fa577 [file] [log] [blame]
Simon Pilgrim8b2996f2017-01-17 15:02:01 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
Simon Pilgrim60662cb2017-01-17 17:33:18 +00005; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512
Simon Pilgrim8b2996f2017-01-17 15:02:01 +00006
Simon Pilgrimbd26de52017-02-03 17:31:01 +00007define i64 @test_v2f64_sext(<2 x double> %a0, <2 x double> %a1) {
8; SSE-LABEL: test_v2f64_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +00009; SSE: # BB#0:
10; SSE-NEXT: cmpltpd %xmm0, %xmm1
11; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
12; SSE-NEXT: pand %xmm1, %xmm0
13; SSE-NEXT: movd %xmm0, %rax
14; SSE-NEXT: retq
15;
Simon Pilgrimbd26de52017-02-03 17:31:01 +000016; AVX-LABEL: test_v2f64_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +000017; AVX: # BB#0:
18; AVX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
19; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
20; AVX-NEXT: vandpd %xmm1, %xmm0, %xmm0
21; AVX-NEXT: vmovq %xmm0, %rax
22; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +000023;
Simon Pilgrimbd26de52017-02-03 17:31:01 +000024; AVX512-LABEL: test_v2f64_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +000025; AVX512: # BB#0:
26; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k1
27; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
28; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
29; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
30; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
31; AVX512-NEXT: vmovq %xmm0, %rax
32; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +000033 %c = fcmp ogt <2 x double> %a0, %a1
34 %s = sext <2 x i1> %c to <2 x i64>
35 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
36 %2 = and <2 x i64> %s, %1
37 %3 = extractelement <2 x i64> %2, i32 0
38 ret i64 %3
39}
40
Simon Pilgrimbd26de52017-02-03 17:31:01 +000041define i64 @test_v4f64_sext(<4 x double> %a0, <4 x double> %a1) {
42; SSE-LABEL: test_v4f64_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +000043; SSE: # BB#0:
44; SSE-NEXT: cmpltpd %xmm1, %xmm3
45; SSE-NEXT: cmpltpd %xmm0, %xmm2
46; SSE-NEXT: andpd %xmm3, %xmm2
47; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
48; SSE-NEXT: pand %xmm2, %xmm0
49; SSE-NEXT: movd %xmm0, %rax
50; SSE-NEXT: retq
51;
Simon Pilgrimbd26de52017-02-03 17:31:01 +000052; AVX-LABEL: test_v4f64_sext:
Simon Pilgrim20ab6b82017-02-02 11:52:33 +000053; AVX: # BB#0:
54; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
55; AVX-NEXT: vmovmskpd %ymm0, %eax
56; AVX-NEXT: xorl %ecx, %ecx
57; AVX-NEXT: cmpl $15, %eax
58; AVX-NEXT: movq $-1, %rax
59; AVX-NEXT: cmovneq %rcx, %rax
60; AVX-NEXT: vzeroupper
61; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +000062;
Simon Pilgrimbd26de52017-02-03 17:31:01 +000063; AVX512-LABEL: test_v4f64_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +000064; AVX512: # BB#0:
65; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
66; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
67; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
68; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
69; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
70; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
71; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
72; AVX512-NEXT: vmovq %xmm0, %rax
73; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +000074 %c = fcmp ogt <4 x double> %a0, %a1
75 %s = sext <4 x i1> %c to <4 x i64>
76 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
77 %2 = and <4 x i64> %s, %1
78 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
79 %4 = and <4 x i64> %2, %3
80 %5 = extractelement <4 x i64> %4, i64 0
81 ret i64 %5
82}
83
Simon Pilgrimbd26de52017-02-03 17:31:01 +000084define i64 @test_v4f64_legal_sext(<4 x double> %a0, <4 x double> %a1) {
85; SSE-LABEL: test_v4f64_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +000086; SSE: # BB#0:
87; SSE-NEXT: cmpltpd %xmm1, %xmm3
88; SSE-NEXT: cmpltpd %xmm0, %xmm2
89; SSE-NEXT: packsswb %xmm3, %xmm2
Simon Pilgrim20ab6b82017-02-02 11:52:33 +000090; SSE-NEXT: movmskps %xmm2, %eax
91; SSE-NEXT: xorl %ecx, %ecx
92; SSE-NEXT: cmpl $15, %eax
93; SSE-NEXT: movl $-1, %eax
94; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +000095; SSE-NEXT: cltq
96; SSE-NEXT: retq
97;
Simon Pilgrimbd26de52017-02-03 17:31:01 +000098; AVX-LABEL: test_v4f64_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +000099; AVX: # BB#0:
100; AVX-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
101; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
102; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000103; AVX-NEXT: vmovmskps %xmm0, %eax
104; AVX-NEXT: xorl %ecx, %ecx
105; AVX-NEXT: cmpl $15, %eax
106; AVX-NEXT: movl $-1, %eax
107; AVX-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000108; AVX-NEXT: cltq
109; AVX-NEXT: vzeroupper
110; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000111;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000112; AVX512-LABEL: test_v4f64_legal_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000113; AVX512: # BB#0:
114; AVX512-NEXT: vcmpltpd %ymm0, %ymm1, %k1
115; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
116; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
117; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
118; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
119; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
120; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
121; AVX512-NEXT: vmovd %xmm0, %eax
122; AVX512-NEXT: cltq
123; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000124 %c = fcmp ogt <4 x double> %a0, %a1
125 %s = sext <4 x i1> %c to <4 x i32>
126 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
127 %2 = and <4 x i32> %s, %1
128 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
129 %4 = and <4 x i32> %2, %3
130 %5 = extractelement <4 x i32> %4, i64 0
131 %6 = sext i32 %5 to i64
132 ret i64 %6
133}
134
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000135define i32 @test_v4f32_sext(<4 x float> %a0, <4 x float> %a1) {
136; SSE-LABEL: test_v4f32_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000137; SSE: # BB#0:
138; SSE-NEXT: cmpltps %xmm0, %xmm1
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000139; SSE-NEXT: movmskps %xmm1, %eax
140; SSE-NEXT: xorl %ecx, %ecx
141; SSE-NEXT: cmpl $15, %eax
142; SSE-NEXT: movl $-1, %eax
143; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000144; SSE-NEXT: retq
145;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000146; AVX-LABEL: test_v4f32_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000147; AVX: # BB#0:
148; AVX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000149; AVX-NEXT: vmovmskps %xmm0, %eax
150; AVX-NEXT: xorl %ecx, %ecx
151; AVX-NEXT: cmpl $15, %eax
152; AVX-NEXT: movl $-1, %eax
153; AVX-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000154; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000155;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000156; AVX512-LABEL: test_v4f32_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000157; AVX512: # BB#0:
158; AVX512-NEXT: vcmpltps %xmm0, %xmm1, %k1
159; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
160; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
161; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
162; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
163; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
164; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
165; AVX512-NEXT: vmovd %xmm0, %eax
166; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000167 %c = fcmp ogt <4 x float> %a0, %a1
168 %s = sext <4 x i1> %c to <4 x i32>
169 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
170 %2 = and <4 x i32> %s, %1
171 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
172 %4 = and <4 x i32> %2, %3
173 %5 = extractelement <4 x i32> %4, i32 0
174 ret i32 %5
175}
176
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000177define i32 @test_v8f32_sext(<8 x float> %a0, <8 x float> %a1) {
178; SSE-LABEL: test_v8f32_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000179; SSE: # BB#0:
180; SSE-NEXT: cmpltps %xmm1, %xmm3
181; SSE-NEXT: cmpltps %xmm0, %xmm2
182; SSE-NEXT: andps %xmm3, %xmm2
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000183; SSE-NEXT: movmskps %xmm2, %eax
184; SSE-NEXT: xorl %ecx, %ecx
185; SSE-NEXT: cmpl $15, %eax
186; SSE-NEXT: movl $-1, %eax
187; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000188; SSE-NEXT: retq
189;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000190; AVX-LABEL: test_v8f32_sext:
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000191; AVX: # BB#0:
192; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
193; AVX-NEXT: vmovmskps %ymm0, %eax
194; AVX-NEXT: xorl %ecx, %ecx
195; AVX-NEXT: cmpl $255, %eax
196; AVX-NEXT: movl $-1, %eax
197; AVX-NEXT: cmovnel %ecx, %eax
198; AVX-NEXT: vzeroupper
199; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000200;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000201; AVX512-LABEL: test_v8f32_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000202; AVX512: # BB#0:
203; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k1
204; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
205; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
206; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
207; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
208; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
209; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
210; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
211; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
212; AVX512-NEXT: vmovd %xmm0, %eax
213; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000214 %c = fcmp ogt <8 x float> %a0, %a1
215 %s = sext <8 x i1> %c to <8 x i32>
216 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
217 %2 = and <8 x i32> %s, %1
218 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
219 %4 = and <8 x i32> %2, %3
220 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
221 %6 = and <8 x i32> %4, %5
222 %7 = extractelement <8 x i32> %6, i32 0
223 ret i32 %7
224}
225
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000226define i32 @test_v8f32_legal_sext(<8 x float> %a0, <8 x float> %a1) {
227; SSE-LABEL: test_v8f32_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000228; SSE: # BB#0:
229; SSE-NEXT: cmpltps %xmm1, %xmm3
230; SSE-NEXT: cmpltps %xmm0, %xmm2
231; SSE-NEXT: packsswb %xmm3, %xmm2
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000232; SSE-NEXT: pmovmskb %xmm2, %eax
233; SSE-NEXT: xorl %ecx, %ecx
234; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
235; SSE-NEXT: movl $-1, %eax
236; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000237; SSE-NEXT: retq
238;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000239; AVX-LABEL: test_v8f32_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000240; AVX: # BB#0:
241; AVX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
242; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
243; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000244; AVX-NEXT: vpmovmskb %xmm0, %eax
245; AVX-NEXT: xorl %ecx, %ecx
246; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
247; AVX-NEXT: movl $-1, %eax
248; AVX-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000249; AVX-NEXT: vzeroupper
250; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000251;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000252; AVX512-LABEL: test_v8f32_legal_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000253; AVX512: # BB#0:
254; AVX512-NEXT: vcmpltps %ymm0, %ymm1, %k0
255; AVX512-NEXT: vpmovm2w %k0, %xmm0
256; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
257; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
258; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
259; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
260; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
261; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
262; AVX512-NEXT: vmovd %xmm0, %eax
263; AVX512-NEXT: cwtl
264; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000265 %c = fcmp ogt <8 x float> %a0, %a1
266 %s = sext <8 x i1> %c to <8 x i16>
267 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
268 %2 = and <8 x i16> %s, %1
269 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
270 %4 = and <8 x i16> %2, %3
271 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
272 %6 = and <8 x i16> %4, %5
273 %7 = extractelement <8 x i16> %6, i32 0
274 %8 = sext i16 %7 to i32
275 ret i32 %8
276}
277
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000278define i64 @test_v2i64_sext(<2 x i64> %a0, <2 x i64> %a1) {
279; SSE-LABEL: test_v2i64_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000280; SSE: # BB#0:
281; SSE-NEXT: pcmpgtq %xmm1, %xmm0
282; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
283; SSE-NEXT: pand %xmm0, %xmm1
284; SSE-NEXT: movd %xmm1, %rax
285; SSE-NEXT: retq
286;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000287; AVX-LABEL: test_v2i64_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000288; AVX: # BB#0:
289; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
290; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
291; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
292; AVX-NEXT: vmovq %xmm0, %rax
293; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000294;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000295; AVX512-LABEL: test_v2i64_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000296; AVX512: # BB#0:
297; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
298; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
299; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
300; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
301; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
302; AVX512-NEXT: vmovq %xmm0, %rax
303; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000304 %c = icmp sgt <2 x i64> %a0, %a1
305 %s = sext <2 x i1> %c to <2 x i64>
306 %1 = shufflevector <2 x i64> %s, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
307 %2 = and <2 x i64> %s, %1
308 %3 = extractelement <2 x i64> %2, i32 0
309 ret i64 %3
310}
311
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000312define i64 @test_v4i64_sext(<4 x i64> %a0, <4 x i64> %a1) {
313; SSE-LABEL: test_v4i64_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000314; SSE: # BB#0:
315; SSE-NEXT: pcmpgtq %xmm3, %xmm1
316; SSE-NEXT: pcmpgtq %xmm2, %xmm0
317; SSE-NEXT: pand %xmm1, %xmm0
318; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
319; SSE-NEXT: pand %xmm0, %xmm1
320; SSE-NEXT: movd %xmm1, %rax
321; SSE-NEXT: retq
322;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000323; AVX1-LABEL: test_v4i64_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000324; AVX1: # BB#0:
325; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
326; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
327; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
328; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
329; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000330; AVX1-NEXT: vmovmskpd %ymm0, %eax
331; AVX1-NEXT: xorl %ecx, %ecx
332; AVX1-NEXT: cmpl $15, %eax
333; AVX1-NEXT: movq $-1, %rax
334; AVX1-NEXT: cmovneq %rcx, %rax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000335; AVX1-NEXT: vzeroupper
336; AVX1-NEXT: retq
337;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000338; AVX2-LABEL: test_v4i64_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000339; AVX2: # BB#0:
340; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000341; AVX2-NEXT: vmovmskpd %ymm0, %eax
342; AVX2-NEXT: xorl %ecx, %ecx
343; AVX2-NEXT: cmpl $15, %eax
344; AVX2-NEXT: movq $-1, %rax
345; AVX2-NEXT: cmovneq %rcx, %rax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000346; AVX2-NEXT: vzeroupper
347; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000348;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000349; AVX512-LABEL: test_v4i64_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000350; AVX512: # BB#0:
351; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
352; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
353; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
354; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
355; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
356; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
357; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
358; AVX512-NEXT: vmovq %xmm0, %rax
359; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000360 %c = icmp sgt <4 x i64> %a0, %a1
361 %s = sext <4 x i1> %c to <4 x i64>
362 %1 = shufflevector <4 x i64> %s, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
363 %2 = and <4 x i64> %s, %1
364 %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
365 %4 = and <4 x i64> %2, %3
366 %5 = extractelement <4 x i64> %4, i64 0
367 ret i64 %5
368}
369
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000370define i64 @test_v4i64_legal_sext(<4 x i64> %a0, <4 x i64> %a1) {
371; SSE-LABEL: test_v4i64_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000372; SSE: # BB#0:
373; SSE-NEXT: pcmpgtq %xmm3, %xmm1
374; SSE-NEXT: pcmpgtq %xmm2, %xmm0
375; SSE-NEXT: packsswb %xmm1, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000376; SSE-NEXT: movmskps %xmm0, %eax
377; SSE-NEXT: xorl %ecx, %ecx
378; SSE-NEXT: cmpl $15, %eax
379; SSE-NEXT: movl $-1, %eax
380; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000381; SSE-NEXT: cltq
382; SSE-NEXT: retq
383;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000384; AVX1-LABEL: test_v4i64_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000385; AVX1: # BB#0:
386; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
387; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
388; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
389; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
390; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000391; AVX1-NEXT: vmovmskps %xmm0, %eax
392; AVX1-NEXT: xorl %ecx, %ecx
393; AVX1-NEXT: cmpl $15, %eax
394; AVX1-NEXT: movl $-1, %eax
395; AVX1-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000396; AVX1-NEXT: cltq
397; AVX1-NEXT: vzeroupper
398; AVX1-NEXT: retq
399;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000400; AVX2-LABEL: test_v4i64_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000401; AVX2: # BB#0:
402; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
403; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
404; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000405; AVX2-NEXT: vmovmskps %xmm0, %eax
406; AVX2-NEXT: xorl %ecx, %ecx
407; AVX2-NEXT: cmpl $15, %eax
408; AVX2-NEXT: movl $-1, %eax
409; AVX2-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000410; AVX2-NEXT: cltq
411; AVX2-NEXT: vzeroupper
412; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000413;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000414; AVX512-LABEL: test_v4i64_legal_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000415; AVX512: # BB#0:
416; AVX512-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
417; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
418; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
419; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
420; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
421; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
422; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
423; AVX512-NEXT: vmovd %xmm0, %eax
424; AVX512-NEXT: cltq
425; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000426 %c = icmp sgt <4 x i64> %a0, %a1
427 %s = sext <4 x i1> %c to <4 x i32>
428 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
429 %2 = and <4 x i32> %s, %1
430 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
431 %4 = and <4 x i32> %2, %3
432 %5 = extractelement <4 x i32> %4, i64 0
433 %6 = sext i32 %5 to i64
434 ret i64 %6
435}
436
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000437define i32 @test_v4i32_sext(<4 x i32> %a0, <4 x i32> %a1) {
438; SSE-LABEL: test_v4i32_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000439; SSE: # BB#0:
440; SSE-NEXT: pcmpgtd %xmm1, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000441; SSE-NEXT: movmskps %xmm0, %eax
442; SSE-NEXT: xorl %ecx, %ecx
443; SSE-NEXT: cmpl $15, %eax
444; SSE-NEXT: movl $-1, %eax
445; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000446; SSE-NEXT: retq
447;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000448; AVX-LABEL: test_v4i32_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000449; AVX: # BB#0:
450; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000451; AVX-NEXT: vmovmskps %xmm0, %eax
452; AVX-NEXT: xorl %ecx, %ecx
453; AVX-NEXT: cmpl $15, %eax
454; AVX-NEXT: movl $-1, %eax
455; AVX-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000456; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000457;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000458; AVX512-LABEL: test_v4i32_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000459; AVX512: # BB#0:
460; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
461; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
462; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
463; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
464; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
465; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
466; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
467; AVX512-NEXT: vmovd %xmm0, %eax
468; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000469 %c = icmp sgt <4 x i32> %a0, %a1
470 %s = sext <4 x i1> %c to <4 x i32>
471 %1 = shufflevector <4 x i32> %s, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
472 %2 = and <4 x i32> %s, %1
473 %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
474 %4 = and <4 x i32> %2, %3
475 %5 = extractelement <4 x i32> %4, i32 0
476 ret i32 %5
477}
478
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000479define i32 @test_v8i32_sext(<8 x i32> %a0, <8 x i32> %a1) {
480; SSE-LABEL: test_v8i32_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000481; SSE: # BB#0:
482; SSE-NEXT: pcmpgtd %xmm3, %xmm1
483; SSE-NEXT: pcmpgtd %xmm2, %xmm0
484; SSE-NEXT: pand %xmm1, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000485; SSE-NEXT: movmskps %xmm0, %eax
486; SSE-NEXT: xorl %ecx, %ecx
487; SSE-NEXT: cmpl $15, %eax
488; SSE-NEXT: movl $-1, %eax
489; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000490; SSE-NEXT: retq
491;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000492; AVX1-LABEL: test_v8i32_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000493; AVX1: # BB#0:
494; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
495; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
496; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
497; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
498; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000499; AVX1-NEXT: vmovmskps %ymm0, %eax
500; AVX1-NEXT: xorl %ecx, %ecx
501; AVX1-NEXT: cmpl $255, %eax
502; AVX1-NEXT: movl $-1, %eax
503; AVX1-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000504; AVX1-NEXT: vzeroupper
505; AVX1-NEXT: retq
506;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000507; AVX2-LABEL: test_v8i32_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000508; AVX2: # BB#0:
509; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000510; AVX2-NEXT: vmovmskps %ymm0, %eax
511; AVX2-NEXT: xorl %ecx, %ecx
512; AVX2-NEXT: cmpl $255, %eax
513; AVX2-NEXT: movl $-1, %eax
514; AVX2-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000515; AVX2-NEXT: vzeroupper
516; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000517;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000518; AVX512-LABEL: test_v8i32_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000519; AVX512: # BB#0:
520; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k1
521; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
522; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
523; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
524; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
525; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
526; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
527; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
528; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
529; AVX512-NEXT: vmovd %xmm0, %eax
530; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000531 %c = icmp sgt <8 x i32> %a0, %a1
532 %s = sext <8 x i1> %c to <8 x i32>
533 %1 = shufflevector <8 x i32> %s, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
534 %2 = and <8 x i32> %s, %1
535 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
536 %4 = and <8 x i32> %2, %3
537 %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
538 %6 = and <8 x i32> %4, %5
539 %7 = extractelement <8 x i32> %6, i32 0
540 ret i32 %7
541}
542
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000543define i32 @test_v8i32_legal_sext(<8 x i32> %a0, <8 x i32> %a1) {
544; SSE-LABEL: test_v8i32_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000545; SSE: # BB#0:
546; SSE-NEXT: pcmpgtd %xmm3, %xmm1
547; SSE-NEXT: pcmpgtd %xmm2, %xmm0
548; SSE-NEXT: packsswb %xmm1, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000549; SSE-NEXT: pmovmskb %xmm0, %eax
550; SSE-NEXT: xorl %ecx, %ecx
551; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
552; SSE-NEXT: movl $-1, %eax
553; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000554; SSE-NEXT: retq
555;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000556; AVX1-LABEL: test_v8i32_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000557; AVX1: # BB#0:
558; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
559; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
560; AVX1-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
561; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
562; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000563; AVX1-NEXT: vpmovmskb %xmm0, %eax
564; AVX1-NEXT: xorl %ecx, %ecx
565; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
566; AVX1-NEXT: movl $-1, %eax
567; AVX1-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000568; AVX1-NEXT: vzeroupper
569; AVX1-NEXT: retq
570;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000571; AVX2-LABEL: test_v8i32_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000572; AVX2: # BB#0:
573; AVX2-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
574; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
575; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000576; AVX2-NEXT: vpmovmskb %xmm0, %eax
577; AVX2-NEXT: xorl %ecx, %ecx
578; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
579; AVX2-NEXT: movl $-1, %eax
580; AVX2-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000581; AVX2-NEXT: vzeroupper
582; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000583;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000584; AVX512-LABEL: test_v8i32_legal_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000585; AVX512: # BB#0:
586; AVX512-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
587; AVX512-NEXT: vpmovm2w %k0, %xmm0
588; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
589; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
590; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
591; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
592; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
593; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
594; AVX512-NEXT: vmovd %xmm0, %eax
595; AVX512-NEXT: cwtl
596; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000597 %c = icmp sgt <8 x i32> %a0, %a1
598 %s = sext <8 x i1> %c to <8 x i16>
599 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
600 %2 = and <8 x i16> %s, %1
601 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
602 %4 = and <8 x i16> %2, %3
603 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
604 %6 = and <8 x i16> %4, %5
605 %7 = extractelement <8 x i16> %6, i32 0
606 %8 = sext i16 %7 to i32
607 ret i32 %8
608}
609
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000610define i16 @test_v8i16_sext(<8 x i16> %a0, <8 x i16> %a1) {
611; SSE-LABEL: test_v8i16_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000612; SSE: # BB#0:
613; SSE-NEXT: pcmpgtw %xmm1, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000614; SSE-NEXT: pmovmskb %xmm0, %eax
615; SSE-NEXT: xorl %ecx, %ecx
616; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
617; SSE-NEXT: movl $-1, %eax
618; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000619; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
620; SSE-NEXT: retq
621;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000622; AVX-LABEL: test_v8i16_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000623; AVX: # BB#0:
624; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000625; AVX-NEXT: vpmovmskb %xmm0, %eax
626; AVX-NEXT: xorl %ecx, %ecx
627; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
628; AVX-NEXT: movl $-1, %eax
629; AVX-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000630; AVX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
631; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000632;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000633; AVX512-LABEL: test_v8i16_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000634; AVX512: # BB#0:
635; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0
636; AVX512-NEXT: vpmovm2w %k0, %xmm0
637; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
638; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
639; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
640; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
641; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
642; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
643; AVX512-NEXT: vmovd %xmm0, %eax
644; AVX512-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
645; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000646 %c = icmp sgt <8 x i16> %a0, %a1
647 %s = sext <8 x i1> %c to <8 x i16>
648 %1 = shufflevector <8 x i16> %s, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
649 %2 = and <8 x i16> %s, %1
650 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
651 %4 = and <8 x i16> %2, %3
652 %5 = shufflevector <8 x i16> %4, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
653 %6 = and <8 x i16> %4, %5
654 %7 = extractelement <8 x i16> %6, i32 0
655 ret i16 %7
656}
657
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000658define i16 @test_v16i16_sext(<16 x i16> %a0, <16 x i16> %a1) {
659; SSE-LABEL: test_v16i16_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000660; SSE: # BB#0:
661; SSE-NEXT: pcmpgtw %xmm3, %xmm1
662; SSE-NEXT: pcmpgtw %xmm2, %xmm0
663; SSE-NEXT: pand %xmm1, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000664; SSE-NEXT: pmovmskb %xmm0, %eax
665; SSE-NEXT: xorl %ecx, %ecx
666; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
667; SSE-NEXT: movl $-1, %eax
668; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000669; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
670; SSE-NEXT: retq
671;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000672; AVX1-LABEL: test_v16i16_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000673; AVX1: # BB#0:
674; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
675; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
676; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
677; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
678; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
679; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
680; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
681; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
682; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
683; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
684; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
685; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
686; AVX1-NEXT: vmovd %xmm0, %eax
687; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
688; AVX1-NEXT: vzeroupper
689; AVX1-NEXT: retq
690;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000691; AVX2-LABEL: test_v16i16_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000692; AVX2: # BB#0:
693; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000694; AVX2-NEXT: vpmovmskb %ymm0, %ecx
695; AVX2-NEXT: xorl %eax, %eax
696; AVX2-NEXT: cmpl $-1, %ecx
697; AVX2-NEXT: cmovel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000698; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
699; AVX2-NEXT: vzeroupper
700; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000701;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000702; AVX512-LABEL: test_v16i16_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000703; AVX512: # BB#0:
704; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
705; AVX512-NEXT: vpmovm2w %k0, %ymm0
706; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
707; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
708; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
709; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
710; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
711; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
712; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
713; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
714; AVX512-NEXT: vmovd %xmm0, %eax
715; AVX512-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
716; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000717 %c = icmp sgt <16 x i16> %a0, %a1
718 %s = sext <16 x i1> %c to <16 x i16>
719 %1 = shufflevector <16 x i16> %s, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
720 %2 = and <16 x i16> %s, %1
721 %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
722 %4 = and <16 x i16> %2, %3
723 %5 = shufflevector <16 x i16> %4, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
724 %6 = and <16 x i16> %4, %5
725 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
726 %8 = and <16 x i16> %6, %7
727 %9 = extractelement <16 x i16> %8, i32 0
728 ret i16 %9
729}
730
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000731define i16 @test_v16i16_legal_sext(<16 x i16> %a0, <16 x i16> %a1) {
732; SSE-LABEL: test_v16i16_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000733; SSE: # BB#0:
734; SSE-NEXT: pcmpgtw %xmm3, %xmm1
735; SSE-NEXT: pcmpgtw %xmm2, %xmm0
736; SSE-NEXT: packsswb %xmm1, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000737; SSE-NEXT: pmovmskb %xmm0, %eax
738; SSE-NEXT: xorl %ecx, %ecx
739; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
740; SSE-NEXT: movl $-1, %eax
741; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000742; SSE-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
743; SSE-NEXT: retq
744;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000745; AVX1-LABEL: test_v16i16_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000746; AVX1: # BB#0:
747; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
748; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
749; AVX1-NEXT: vpcmpgtw %xmm2, %xmm3, %xmm2
750; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
751; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000752; AVX1-NEXT: vpmovmskb %xmm0, %eax
753; AVX1-NEXT: xorl %ecx, %ecx
754; AVX1-NEXT: cmpl $65535, %eax # imm = 0xFFFF
755; AVX1-NEXT: movl $-1, %eax
756; AVX1-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000757; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
758; AVX1-NEXT: vzeroupper
759; AVX1-NEXT: retq
760;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000761; AVX2-LABEL: test_v16i16_legal_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000762; AVX2: # BB#0:
763; AVX2-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
764; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
765; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000766; AVX2-NEXT: vpmovmskb %xmm0, %eax
767; AVX2-NEXT: xorl %ecx, %ecx
768; AVX2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
769; AVX2-NEXT: movl $-1, %eax
770; AVX2-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000771; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
772; AVX2-NEXT: vzeroupper
773; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000774;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000775; AVX512-LABEL: test_v16i16_legal_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000776; AVX512: # BB#0:
777; AVX512-NEXT: vpcmpgtw %ymm1, %ymm0, %k0
778; AVX512-NEXT: vpmovm2b %k0, %xmm0
779; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
780; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
781; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
782; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
783; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
784; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
785; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
786; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
787; AVX512-NEXT: vpextrb $0, %xmm0, %eax
788; AVX512-NEXT: movsbl %al, %eax
789; AVX512-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
790; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000791 %c = icmp sgt <16 x i16> %a0, %a1
792 %s = sext <16 x i1> %c to <16 x i8>
793 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
794 %2 = and <16 x i8> %s, %1
795 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
796 %4 = and <16 x i8> %2, %3
797 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
798 %6 = and <16 x i8> %4, %5
799 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
800 %8 = and <16 x i8> %6, %7
801 %9 = extractelement <16 x i8> %8, i32 0
802 %10 = sext i8 %9 to i16
803 ret i16 %10
804}
805
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000806define i8 @test_v16i8_sext(<16 x i8> %a0, <16 x i8> %a1) {
807; SSE-LABEL: test_v16i8_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000808; SSE: # BB#0:
809; SSE-NEXT: pcmpgtb %xmm1, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000810; SSE-NEXT: pmovmskb %xmm0, %eax
811; SSE-NEXT: xorl %ecx, %ecx
812; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
813; SSE-NEXT: movl $-1, %eax
814; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000815; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
816; SSE-NEXT: retq
817;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000818; AVX-LABEL: test_v16i8_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000819; AVX: # BB#0:
820; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000821; AVX-NEXT: vpmovmskb %xmm0, %eax
822; AVX-NEXT: xorl %ecx, %ecx
823; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
824; AVX-NEXT: movl $-1, %eax
825; AVX-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000826; AVX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
827; AVX-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000828;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000829; AVX512-LABEL: test_v16i8_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000830; AVX512: # BB#0:
831; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0
832; AVX512-NEXT: vpmovm2b %k0, %xmm0
833; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
834; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
835; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
836; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
837; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
838; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
839; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
840; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0
841; AVX512-NEXT: vpextrb $0, %xmm0, %eax
842; AVX512-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
843; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000844 %c = icmp sgt <16 x i8> %a0, %a1
845 %s = sext <16 x i1> %c to <16 x i8>
846 %1 = shufflevector <16 x i8> %s, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
847 %2 = and <16 x i8> %s, %1
848 %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
849 %4 = and <16 x i8> %2, %3
850 %5 = shufflevector <16 x i8> %4, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
851 %6 = and <16 x i8> %4, %5
852 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
853 %8 = and <16 x i8> %6, %7
854 %9 = extractelement <16 x i8> %8, i32 0
855 ret i8 %9
856}
857
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000858define i8 @test_v32i8_sext(<32 x i8> %a0, <32 x i8> %a1) {
859; SSE-LABEL: test_v32i8_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000860; SSE: # BB#0:
861; SSE-NEXT: pcmpgtb %xmm3, %xmm1
862; SSE-NEXT: pcmpgtb %xmm2, %xmm0
863; SSE-NEXT: pand %xmm1, %xmm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000864; SSE-NEXT: pmovmskb %xmm0, %eax
865; SSE-NEXT: xorl %ecx, %ecx
866; SSE-NEXT: cmpl $65535, %eax # imm = 0xFFFF
867; SSE-NEXT: movl $-1, %eax
868; SSE-NEXT: cmovnel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000869; SSE-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
870; SSE-NEXT: retq
871;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000872; AVX1-LABEL: test_v32i8_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000873; AVX1: # BB#0:
874; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
875; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
876; AVX1-NEXT: vpcmpgtb %xmm2, %xmm3, %xmm2
877; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
878; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
879; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
880; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
881; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
882; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
883; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
884; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
885; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
886; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
887; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
888; AVX1-NEXT: vpextrb $0, %xmm0, %eax
889; AVX1-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
890; AVX1-NEXT: vzeroupper
891; AVX1-NEXT: retq
892;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000893; AVX2-LABEL: test_v32i8_sext:
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000894; AVX2: # BB#0:
895; AVX2-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
Simon Pilgrim20ab6b82017-02-02 11:52:33 +0000896; AVX2-NEXT: vpmovmskb %ymm0, %ecx
897; AVX2-NEXT: xorl %eax, %eax
898; AVX2-NEXT: cmpl $-1, %ecx
899; AVX2-NEXT: cmovel %ecx, %eax
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000900; AVX2-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
901; AVX2-NEXT: vzeroupper
902; AVX2-NEXT: retq
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000903;
Simon Pilgrimbd26de52017-02-03 17:31:01 +0000904; AVX512-LABEL: test_v32i8_sext:
Simon Pilgrim60662cb2017-01-17 17:33:18 +0000905; AVX512: # BB#0:
906; AVX512-NEXT: vpcmpgtb %ymm1, %ymm0, %k0
907; AVX512-NEXT: vpmovm2b %k0, %ymm0
908; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
909; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
910; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
911; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
912; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
913; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
914; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1
915; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
916; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
917; AVX512-NEXT: vpand %ymm1, %ymm0, %ymm0
918; AVX512-NEXT: vpextrb $0, %xmm0, %eax
919; AVX512-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
920; AVX512-NEXT: retq
Simon Pilgrim8b2996f2017-01-17 15:02:01 +0000921 %c = icmp sgt <32 x i8> %a0, %a1
922 %s = sext <32 x i1> %c to <32 x i8>
923 %1 = shufflevector <32 x i8> %s, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
924 %2 = and <32 x i8> %s, %1
925 %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
926 %4 = and <32 x i8> %2, %3
927 %5 = shufflevector <32 x i8> %4, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
928 %6 = and <32 x i8> %4, %5
929 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
930 %8 = and <32 x i8> %6, %7
931 %9 = shufflevector <32 x i8> %8, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
932 %10 = and <32 x i8> %8, %9
933 %11 = extractelement <32 x i8> %10, i32 0
934 ret i8 %11
935}