blob: 8d09b40adf701f8d8d6d3ceb0fc1c1932c5e6c4d [file] [log] [blame]
Jina Nahias98c7f912017-10-01 14:25:21 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512CD
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw| FileCheck %s --check-prefix=ALL --check-prefix=AVX512VLCDBW
4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl,avx512cd,+avx512bw| FileCheck %s --check-prefix=ALL --check-prefix=X86-AVX512VLCDBW
5
6define <2 x i64> @test_mm_epi64(<8 x i16> %a, <8 x i16> %b) {
7; AVX512CD-LABEL: test_mm_epi64:
8; AVX512CD: # BB#0: # %entry
9; AVX512CD-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
10; AVX512CD-NEXT: vpmovsxwq %xmm0, %zmm0
11; AVX512CD-NEXT: vpsllq $63, %zmm0, %zmm0
12; AVX512CD-NEXT: vptestmq %zmm0, %zmm0, %k0
13; AVX512CD-NEXT: kmovw %k0, %eax
14; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0
15; AVX512CD-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
16; AVX512CD-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
17; AVX512CD-NEXT: vzeroupper
18; AVX512CD-NEXT: retq
19;
20; AVX512VLCDBW-LABEL: test_mm_epi64:
21; AVX512VLCDBW: # BB#0: # %entry
22; AVX512VLCDBW-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
23; AVX512VLCDBW-NEXT: kmovd %k0, %eax
24; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
25; AVX512VLCDBW-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
26; AVX512VLCDBW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
27; AVX512VLCDBW-NEXT: retq
28;
29; X86-AVX512VLCDBW-LABEL: test_mm_epi64:
30; X86-AVX512VLCDBW: # BB#0: # %entry
31; X86-AVX512VLCDBW-NEXT: vpcmpeqw %xmm1, %xmm0, %k0
32; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
33; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax
34; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0
35; X86-AVX512VLCDBW-NEXT: vpbroadcastq %xmm0, %xmm0
36; X86-AVX512VLCDBW-NEXT: retl
37entry:
38 %0 = icmp eq <8 x i16> %a, %b
39 %1 = bitcast <8 x i1> %0 to i8
40 %conv.i = zext i8 %1 to i64
41 %vecinit.i.i = insertelement <2 x i64> undef, i64 %conv.i, i32 0
42 %vecinit1.i.i = shufflevector <2 x i64> %vecinit.i.i, <2 x i64> undef, <2 x i32> zeroinitializer
43 ret <2 x i64> %vecinit1.i.i
44}
45
46define <4 x i32> @test_mm_epi32(<16 x i8> %a, <16 x i8> %b) {
47; AVX512CD-LABEL: test_mm_epi32:
48; AVX512CD: # BB#0: # %entry
49; AVX512CD-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
50; AVX512CD-NEXT: vpmovsxbd %xmm0, %zmm0
51; AVX512CD-NEXT: vpslld $31, %zmm0, %zmm0
52; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0
53; AVX512CD-NEXT: kmovw %k0, %eax
54; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0
55; AVX512CD-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
56; AVX512CD-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
57; AVX512CD-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
58; AVX512CD-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
59; AVX512CD-NEXT: vzeroupper
60; AVX512CD-NEXT: retq
61;
62; AVX512VLCDBW-LABEL: test_mm_epi32:
63; AVX512VLCDBW: # BB#0: # %entry
64; AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
65; AVX512VLCDBW-NEXT: kmovd %k0, %eax
66; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
67; AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
68; AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
69; AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
70; AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
71; AVX512VLCDBW-NEXT: retq
72;
73; X86-AVX512VLCDBW-LABEL: test_mm_epi32:
74; X86-AVX512VLCDBW: # BB#0: # %entry
75; X86-AVX512VLCDBW-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
76; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
77; X86-AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
78; X86-AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
79; X86-AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
80; X86-AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
81; X86-AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
82; X86-AVX512VLCDBW-NEXT: retl
83entry:
84 %0 = icmp eq <16 x i8> %a, %b
85 %1 = bitcast <16 x i1> %0 to i16
86 %conv.i = zext i16 %1 to i32
87 %vecinit.i.i = insertelement <4 x i32> undef, i32 %conv.i, i32 0
88 %vecinit3.i.i = shufflevector <4 x i32> %vecinit.i.i, <4 x i32> undef, <4 x i32> zeroinitializer
89 ret <4 x i32> %vecinit3.i.i
90}
91
92define <16 x i32> @test_mm512_epi32(<16 x i32> %a, <16 x i32> %b) {
93; AVX512CD-LABEL: test_mm512_epi32:
94; AVX512CD: # BB#0: # %entry
95; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
96; AVX512CD-NEXT: kmovw %k0, %eax
97; AVX512CD-NEXT: vpbroadcastd %eax, %zmm0
98; AVX512CD-NEXT: retq
99;
100; AVX512VLCDBW-LABEL: test_mm512_epi32:
101; AVX512VLCDBW: # BB#0: # %entry
102; AVX512VLCDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
103; AVX512VLCDBW-NEXT: kmovd %k0, %eax
104; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
105; AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
106; AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
107; AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
108; AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
109; AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
110; AVX512VLCDBW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
111; AVX512VLCDBW-NEXT: retq
112;
113; X86-AVX512VLCDBW-LABEL: test_mm512_epi32:
114; X86-AVX512VLCDBW: # BB#0: # %entry
115; X86-AVX512VLCDBW-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
116; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
117; X86-AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
118; X86-AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
119; X86-AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
120; X86-AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
121; X86-AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
122; X86-AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
123; X86-AVX512VLCDBW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
124; X86-AVX512VLCDBW-NEXT: retl
125entry:
126 %0 = icmp eq <16 x i32> %a, %b
127 %1 = bitcast <16 x i1> %0 to i16
128 %conv.i = zext i16 %1 to i32
129 %vecinit.i.i = insertelement <16 x i32> undef, i32 %conv.i, i32 0
130 %vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer
131 ret <16 x i32> %vecinit15.i.i
132}
133
134define <8 x i64> @test_mm512_epi64(<8 x i32> %a, <8 x i32> %b) {
135; AVX512CD-LABEL: test_mm512_epi64:
136; AVX512CD: # BB#0: # %entry
137; AVX512CD-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
138; AVX512CD-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
139; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
140; AVX512CD-NEXT: kmovw %k0, %eax
141; AVX512CD-NEXT: movzbl %al, %eax
142; AVX512CD-NEXT: vpbroadcastq %rax, %zmm0
143; AVX512CD-NEXT: retq
144;
145; AVX512VLCDBW-LABEL: test_mm512_epi64:
146; AVX512VLCDBW: # BB#0: # %entry
147; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
148; AVX512VLCDBW-NEXT: kmovd %k0, %eax
149; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
150; AVX512VLCDBW-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
151; AVX512VLCDBW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
152; AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
153; AVX512VLCDBW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
154; AVX512VLCDBW-NEXT: retq
155;
156; X86-AVX512VLCDBW-LABEL: test_mm512_epi64:
157; X86-AVX512VLCDBW: # BB#0: # %entry
158; X86-AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
159; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
160; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax
161; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0
162; X86-AVX512VLCDBW-NEXT: vpbroadcastq %xmm0, %xmm0
163; X86-AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
164; X86-AVX512VLCDBW-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
165; X86-AVX512VLCDBW-NEXT: retl
166entry:
167 %0 = icmp eq <8 x i32> %a, %b
168 %1 = bitcast <8 x i1> %0 to i8
169 %conv.i = zext i8 %1 to i64
170 %vecinit.i.i = insertelement <8 x i64> undef, i64 %conv.i, i32 0
171 %vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer
172 ret <8 x i64> %vecinit7.i.i
173}
174
175define <4 x i64> @test_mm256_epi64(<8 x i32> %a, <8 x i32> %b) {
176; AVX512CD-LABEL: test_mm256_epi64:
177; AVX512CD: # BB#0: # %entry
178; AVX512CD-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
179; AVX512CD-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
180; AVX512CD-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
181; AVX512CD-NEXT: kmovw %k0, %eax
182; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0
183; AVX512CD-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
184; AVX512CD-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
185; AVX512CD-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
186; AVX512CD-NEXT: retq
187;
188; AVX512VLCDBW-LABEL: test_mm256_epi64:
189; AVX512VLCDBW: # BB#0: # %entry
190; AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
191; AVX512VLCDBW-NEXT: kmovd %k0, %eax
192; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
193; AVX512VLCDBW-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
194; AVX512VLCDBW-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
195; AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
196; AVX512VLCDBW-NEXT: retq
197;
198; X86-AVX512VLCDBW-LABEL: test_mm256_epi64:
199; X86-AVX512VLCDBW: # BB#0: # %entry
200; X86-AVX512VLCDBW-NEXT: vpcmpeqd %ymm1, %ymm0, %k0
201; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
202; X86-AVX512VLCDBW-NEXT: movzbl %al, %eax
203; X86-AVX512VLCDBW-NEXT: vmovd %eax, %xmm0
204; X86-AVX512VLCDBW-NEXT: vpbroadcastq %xmm0, %xmm0
205; X86-AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
206; X86-AVX512VLCDBW-NEXT: retl
207entry:
208 %0 = icmp eq <8 x i32> %a, %b
209 %1 = bitcast <8 x i1> %0 to i8
210 %conv.i = zext i8 %1 to i64
211 %vecinit.i.i = insertelement <4 x i64> undef, i64 %conv.i, i32 0
212 %vecinit3.i.i = shufflevector <4 x i64> %vecinit.i.i, <4 x i64> undef, <4 x i32> zeroinitializer
213 ret <4 x i64> %vecinit3.i.i
214}
215
216define <8 x i32> @test_mm256_epi32(<16 x i16> %a, <16 x i16> %b) {
217; AVX512CD-LABEL: test_mm256_epi32:
218; AVX512CD: # BB#0: # %entry
219; AVX512CD-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
220; AVX512CD-NEXT: vpmovsxwd %ymm0, %zmm0
221; AVX512CD-NEXT: vpslld $31, %zmm0, %zmm0
222; AVX512CD-NEXT: vptestmd %zmm0, %zmm0, %k0
223; AVX512CD-NEXT: kmovw %k0, %eax
224; AVX512CD-NEXT: vpxor %xmm0, %xmm0, %xmm0
225; AVX512CD-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
226; AVX512CD-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
227; AVX512CD-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
228; AVX512CD-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
229; AVX512CD-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
230; AVX512CD-NEXT: retq
231;
232; AVX512VLCDBW-LABEL: test_mm256_epi32:
233; AVX512VLCDBW: # BB#0: # %entry
234; AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
235; AVX512VLCDBW-NEXT: kmovd %k0, %eax
236; AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
237; AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
238; AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
239; AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
240; AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
241; AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
242; AVX512VLCDBW-NEXT: retq
243;
244; X86-AVX512VLCDBW-LABEL: test_mm256_epi32:
245; X86-AVX512VLCDBW: # BB#0: # %entry
246; X86-AVX512VLCDBW-NEXT: vpcmpeqw %ymm1, %ymm0, %k0
247; X86-AVX512VLCDBW-NEXT: kmovd %k0, %eax
248; X86-AVX512VLCDBW-NEXT: vpxor %xmm0, %xmm0, %xmm0
249; X86-AVX512VLCDBW-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
250; X86-AVX512VLCDBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
251; X86-AVX512VLCDBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
252; X86-AVX512VLCDBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
253; X86-AVX512VLCDBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
254; X86-AVX512VLCDBW-NEXT: retl
255entry:
256 %0 = icmp eq <16 x i16> %a, %b
257 %1 = bitcast <16 x i1> %0 to i16
258 %conv.i = zext i16 %1 to i32
259 %vecinit.i.i = insertelement <8 x i32> undef, i32 %conv.i, i32 0
260 %vecinit7.i.i = shufflevector <8 x i32> %vecinit.i.i, <8 x i32> undef, <8 x i32> zeroinitializer
261 ret <8 x i32> %vecinit7.i.i
262}
263