blob: 555d998eddbf5e4f5ce031f70ff810f550bb4960 [file] [log] [blame]
Elena Demikhovskycaaceef2016-11-03 03:23:55 +00001; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+avx512vl,+avx512dq,+avx512bw < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX
3; RUN: llc -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL
4
5target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6target triple = "x86_64-unknown-linux-gnu"
7
8
9
10define <16 x float> @test1(float* %base) {
11; ALL-LABEL: test1:
12; ALL: # BB#0:
13; ALL-NEXT: movw $-2049, %ax # imm = 0xF7FF
14; ALL-NEXT: kmovw %eax, %k1
15; ALL-NEXT: vexpandps (%rdi), %zmm0 {%k1} {z}
16; ALL-NEXT: retq
17 %res = call <16 x float> @llvm.masked.expandload.v16f32(float* %base, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
18 ret <16 x float>%res
19}
20
21define <16 x float> @test2(float* %base, <16 x float> %src0) {
22; ALL-LABEL: test2:
23; ALL: # BB#0:
24; ALL-NEXT: movw $30719, %ax # imm = 0x77FF
25; ALL-NEXT: kmovw %eax, %k1
26; ALL-NEXT: vexpandps (%rdi), %zmm0 {%k1}
27; ALL-NEXT: retq
28 %res = call <16 x float> @llvm.masked.expandload.v16f32(float* %base, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x float> %src0)
29 ret <16 x float>%res
30}
31
32define <8 x double> @test3(double* %base, <8 x double> %src0, <8 x i1> %mask) {
33; SKX-LABEL: test3:
34; SKX: # BB#0:
35; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
36; SKX-NEXT: vpmovw2m %xmm1, %k1
37; SKX-NEXT: vexpandpd (%rdi), %zmm0 {%k1}
38; SKX-NEXT: retq
39;
40; KNL-LABEL: test3:
41; KNL: # BB#0:
42; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
43; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
44; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
45; KNL-NEXT: vexpandpd (%rdi), %zmm0 {%k1}
46; KNL-NEXT: retq
47 %res = call <8 x double> @llvm.masked.expandload.v8f64(double* %base, <8 x i1> %mask, <8 x double> %src0)
48 ret <8 x double>%res
49}
50
51define <4 x float> @test4(float* %base, <4 x float> %src0) {
52; SKX-LABEL: test4:
53; SKX: # BB#0:
54; SKX-NEXT: movb $7, %al
55; SKX-NEXT: kmovb %eax, %k1
56; SKX-NEXT: vexpandps (%rdi), %xmm0 {%k1}
57; SKX-NEXT: retq
58;
59; KNL-LABEL: test4:
60; KNL: # BB#0:
61; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
62; KNL-NEXT: movw $7, %ax
63; KNL-NEXT: kmovw %eax, %k1
64; KNL-NEXT: vexpandps (%rdi), %zmm0 {%k1}
65; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
66; KNL-NEXT: retq
67 %res = call <4 x float> @llvm.masked.expandload.v4f32(float* %base, <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> %src0)
68 ret <4 x float>%res
69}
70
71define <2 x i64> @test5(i64* %base, <2 x i64> %src0) {
72; SKX-LABEL: test5:
73; SKX: # BB#0:
74; SKX-NEXT: movb $2, %al
75; SKX-NEXT: kmovb %eax, %k1
76; SKX-NEXT: vpexpandq (%rdi), %xmm0 {%k1}
77; SKX-NEXT: retq
78;
79; KNL-LABEL: test5:
80; KNL: # BB#0:
81; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
82; KNL-NEXT: movb $2, %al
83; KNL-NEXT: kmovw %eax, %k1
84; KNL-NEXT: vpexpandq (%rdi), %zmm0 {%k1}
85; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
86; KNL-NEXT: retq
87 %res = call <2 x i64> @llvm.masked.expandload.v2i64(i64* %base, <2 x i1> <i1 false, i1 true>, <2 x i64> %src0)
88 ret <2 x i64>%res
89}
90
91declare <16 x float> @llvm.masked.expandload.v16f32(float*, <16 x i1>, <16 x float>)
92declare <8 x double> @llvm.masked.expandload.v8f64(double*, <8 x i1>, <8 x double>)
93declare <4 x float> @llvm.masked.expandload.v4f32(float*, <4 x i1>, <4 x float>)
94declare <2 x i64> @llvm.masked.expandload.v2i64(i64*, <2 x i1>, <2 x i64>)
95
96define void @test6(float* %base, <16 x float> %V) {
97; ALL-LABEL: test6:
98; ALL: # BB#0:
99; ALL-NEXT: movw $-2049, %ax # imm = 0xF7FF
100; ALL-NEXT: kmovw %eax, %k1
101; ALL-NEXT: vcompressps %zmm0, (%rdi) {%k1}
102; ALL-NEXT: retq
103 call void @llvm.masked.compressstore.v16f32(<16 x float> %V, float* %base, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true>)
104 ret void
105}
106
107define void @test7(float* %base, <8 x float> %V, <8 x i1> %mask) {
108; SKX-LABEL: test7:
109; SKX: # BB#0:
110; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
111; SKX-NEXT: vpmovw2m %xmm1, %k1
112; SKX-NEXT: vcompressps %ymm0, (%rdi) {%k1}
113; SKX-NEXT: retq
114;
115; KNL-LABEL: test7:
116; KNL: # BB#0:
117; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
118; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
119; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
120; KNL-NEXT: vptestmq %zmm1, %zmm1, %k0
121; KNL-NEXT: kshiftlw $8, %k0, %k0
122; KNL-NEXT: kshiftrw $8, %k0, %k1
123; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1}
124; KNL-NEXT: retq
125 call void @llvm.masked.compressstore.v8f32(<8 x float> %V, float* %base, <8 x i1> %mask)
126 ret void
127}
128
129define void @test8(double* %base, <8 x double> %V, <8 x i1> %mask) {
130; SKX-LABEL: test8:
131; SKX: # BB#0:
132; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
133; SKX-NEXT: vpmovw2m %xmm1, %k1
134; SKX-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
135; SKX-NEXT: retq
136;
137; KNL-LABEL: test8:
138; KNL: # BB#0:
139; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
140; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
141; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
142; KNL-NEXT: vcompresspd %zmm0, (%rdi) {%k1}
143; KNL-NEXT: retq
144 call void @llvm.masked.compressstore.v8f64(<8 x double> %V, double* %base, <8 x i1> %mask)
145 ret void
146}
147
148define void @test9(i64* %base, <8 x i64> %V, <8 x i1> %mask) {
149; SKX-LABEL: test9:
150; SKX: # BB#0:
151; SKX-NEXT: vpsllw $15, %xmm1, %xmm1
152; SKX-NEXT: vpmovw2m %xmm1, %k1
153; SKX-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
154; SKX-NEXT: retq
155;
156; KNL-LABEL: test9:
157; KNL: # BB#0:
158; KNL-NEXT: vpmovsxwq %xmm1, %zmm1
159; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
160; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
161; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
162; KNL-NEXT: retq
163 call void @llvm.masked.compressstore.v8i64(<8 x i64> %V, i64* %base, <8 x i1> %mask)
164 ret void
165}
166
167define void @test10(i64* %base, <4 x i64> %V, <4 x i1> %mask) {
168; SKX-LABEL: test10:
169; SKX: # BB#0:
170; SKX-NEXT: vpslld $31, %xmm1, %xmm1
171; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
172; SKX-NEXT: vpcompressq %ymm0, (%rdi) {%k1}
173; SKX-NEXT: retq
174;
175; KNL-LABEL: test10:
176; KNL: # BB#0:
177; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
178; KNL-NEXT: vpslld $31, %xmm1, %xmm1
179; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
180; KNL-NEXT: vpmovsxdq %xmm1, %ymm1
181; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
182; KNL-NEXT: vinserti64x4 $0, %ymm1, %zmm2, %zmm1
183; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
184; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
185; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
186; KNL-NEXT: retq
187 call void @llvm.masked.compressstore.v4i64(<4 x i64> %V, i64* %base, <4 x i1> %mask)
188 ret void
189}
190
191define void @test11(i64* %base, <2 x i64> %V, <2 x i1> %mask) {
192; SKX-LABEL: test11:
193; SKX: # BB#0:
194; SKX-NEXT: vpsllq $63, %xmm1, %xmm1
195; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1
196; SKX-NEXT: vpcompressq %xmm0, (%rdi) {%k1}
197; SKX-NEXT: retq
198;
199; KNL-LABEL: test11:
200; KNL: # BB#0:
201; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
202; KNL-NEXT: vpsllq $63, %xmm1, %xmm1
203; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
204; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
205; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
206; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1
207; KNL-NEXT: vpsllq $63, %zmm1, %zmm1
208; KNL-NEXT: vptestmq %zmm1, %zmm1, %k1
209; KNL-NEXT: vpcompressq %zmm0, (%rdi) {%k1}
210; KNL-NEXT: retq
211 call void @llvm.masked.compressstore.v2i64(<2 x i64> %V, i64* %base, <2 x i1> %mask)
212 ret void
213}
214
215define void @test12(float* %base, <4 x float> %V, <4 x i1> %mask) {
216; SKX-LABEL: test12:
217; SKX: # BB#0:
218; SKX-NEXT: vpslld $31, %xmm1, %xmm1
219; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1
220; SKX-NEXT: vcompressps %xmm0, (%rdi) {%k1}
221; SKX-NEXT: retq
222;
223; KNL-LABEL: test12:
224; KNL: # BB#0:
225; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
226; KNL-NEXT: vpslld $31, %xmm1, %xmm1
227; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
228; KNL-NEXT: vpxord %zmm2, %zmm2, %zmm2
229; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm2, %zmm1
230; KNL-NEXT: vpslld $31, %zmm1, %zmm1
231; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1
232; KNL-NEXT: vcompressps %zmm0, (%rdi) {%k1}
233; KNL-NEXT: retq
234 call void @llvm.masked.compressstore.v4f32(<4 x float> %V, float* %base, <4 x i1> %mask)
235 ret void
236}
237
238declare void @llvm.masked.compressstore.v16f32(<16 x float>, float* , <16 x i1>)
239declare void @llvm.masked.compressstore.v8f32(<8 x float>, float* , <8 x i1>)
240declare void @llvm.masked.compressstore.v8f64(<8 x double>, double* , <8 x i1>)
241declare void @llvm.masked.compressstore.v16i32(<16 x i32>, i32* , <16 x i1>)
242declare void @llvm.masked.compressstore.v8i32(<8 x i32>, i32* , <8 x i1>)
243declare void @llvm.masked.compressstore.v8i64(<8 x i64>, i64* , <8 x i1>)
244declare void @llvm.masked.compressstore.v4i32(<4 x i32>, i32* , <4 x i1>)
245declare void @llvm.masked.compressstore.v4f32(<4 x float>, float* , <4 x i1>)
246declare void @llvm.masked.compressstore.v4i64(<4 x i64>, i64* , <4 x i1>)
247declare void @llvm.masked.compressstore.v2i64(<2 x i64>, i64* , <2 x i1>)