blob: 691d1fbc1d449f823c2fb62a9a7bd90b07d94cf3 [file] [log] [blame]
Elena Demikhovskycf088092013-12-11 14:31:04 +00001; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +00002
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00003declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4; CHECK-LABEL: test_kortestz
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +00005; CHECK: kortestw
6; CHECK: sete
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007define i32 @test_kortestz(i16 %a0, i16 %a1) {
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00008 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +00009 ret i32 %res
10}
11
Elena Demikhovskye382c3f2013-12-10 13:53:10 +000012declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13; CHECK-LABEL: test_kortestc
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000014; CHECK: kortestw
15; CHECK: sbbl
Elena Demikhovskya3a71402013-10-09 08:16:14 +000016define i32 @test_kortestc(i16 %a0, i16 %a1) {
Elena Demikhovskye382c3f2013-12-10 13:53:10 +000017 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000018 ret i32 %res
19}
20
Elena Demikhovskye382c3f2013-12-10 13:53:10 +000021declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22; CHECK-LABEL: test_kand
23; CHECK: kandw
24; CHECK: kandw
25define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
28 ret i16 %t2
29}
30
31declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32; CHECK-LABEL: test_knot
33; CHECK: knotw
34define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
36 ret i16 %res
37}
38
39declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
40
41; CHECK-LABEL: unpckbw_test
42; CHECK: kunpckbw
43; CHECK:ret
44define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
46 ret i16 %res
47}
48
Elena Demikhovskya3a71402013-10-09 08:16:14 +000049define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000050 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000052 ret <16 x float> %res
53}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000054declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000055
Elena Demikhovskya3a71402013-10-09 08:16:14 +000056define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000057 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000059 ret <8 x double> %res
60}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000061declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000062
Elena Demikhovskyde3f7512014-01-01 15:12:34 +000063declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
64
65define <8 x double> @test7(<8 x double> %a) {
66; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
Elena Demikhovskye73333a2014-05-04 13:35:37 +000067 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
Elena Demikhovskyde3f7512014-01-01 15:12:34 +000068 ret <8 x double>%res
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000069}
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000070
Elena Demikhovskyde3f7512014-01-01 15:12:34 +000071declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000072
Elena Demikhovskyde3f7512014-01-01 15:12:34 +000073define <16 x float> @test8(<16 x float> %a) {
74; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
Elena Demikhovskye73333a2014-05-04 13:35:37 +000075 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
Elena Demikhovskyde3f7512014-01-01 15:12:34 +000076 ret <16 x float>%res
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000077}
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000078
Elena Demikhovskya3a71402013-10-09 08:16:14 +000079define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000080 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000082 ret <16 x float> %res
83}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000084declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000085
Elena Demikhovskya3a71402013-10-09 08:16:14 +000086define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000087 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
Elena Demikhovskya3a71402013-10-09 08:16:14 +000089 ret <4 x float> %res
90}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000091declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
Elena Demikhovskya3a71402013-10-09 08:16:14 +000092
Elena Demikhovskya3a71402013-10-09 08:16:14 +000093define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000094 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
Elena Demikhovskya3a71402013-10-09 08:16:14 +000096 ret <4 x float> %res
97}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000098declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
Elena Demikhovskya3a71402013-10-09 08:16:14 +000099
Elena Demikhovskya3a71402013-10-09 08:16:14 +0000100define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000101 ; CHECK: vsqrtpd
Elena Demikhovskyf1648592014-07-22 11:07:31 +0000102 %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1]
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000103 ret <8 x double> %res
104}
Elena Demikhovskyf1648592014-07-22 11:07:31 +0000105declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000106
Elena Demikhovskya3a71402013-10-09 08:16:14 +0000107define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000108 ; CHECK: vsqrtps
Elena Demikhovskyf1648592014-07-22 11:07:31 +0000109 %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1]
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000110 ret <16 x float> %res
111}
Elena Demikhovskyf1648592014-07-22 11:07:31 +0000112declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000113
Elena Demikhovskya3a71402013-10-09 08:16:14 +0000114define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000115 ; CHECK: vsqrtss {{.*}}encoding: [0x62
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000116 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
117 ret <4 x float> %res
118}
119declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
120
Elena Demikhovskya3a71402013-10-09 08:16:14 +0000121define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000122 ; CHECK: vsqrtsd {{.*}}encoding: [0x62
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000123 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
124 ret <2 x double> %res
125}
126declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
127
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000128define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000129 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000130 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
131 ret i64 %res
132}
133declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
134
135define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000136 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000137 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
138 ret <2 x double> %res
139}
140declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
141
142define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000143 ; CHECK: vcvtusi2sdq {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000144 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
145 ret <2 x double> %res
146}
147declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone
148
149define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000150 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000151 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
152 ret i64 %res
153}
154declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
155
156
157define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000158 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000159 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
160 ret i64 %res
161}
162declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
163
164
165define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000166 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000167 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
168 ret <4 x float> %res
169}
170declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
171
172
173define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000174 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000175 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
176 ret i64 %res
177}
178declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
179
180define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000181 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000182 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
183 ret i64 %res
184}
185declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000186
187define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
Elena Demikhovskya30e4372014-02-05 07:05:03 +0000188 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
189 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000190 ret <16 x float> %res
191}
Elena Demikhovskya30e4372014-02-05 07:05:03 +0000192declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000193
194
195define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
Elena Demikhovskya30e4372014-02-05 07:05:03 +0000196 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
197 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000198 ret <16 x i16> %res
199}
Elena Demikhovskya30e4372014-02-05 07:05:03 +0000200
201declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
Quentin Colombet4bf1c282013-10-25 17:47:18 +0000202
203define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
204 ; CHECK: vbroadcastss
205 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
206 ret <16 x float> %res
207}
208declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
209
210define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
211 ; CHECK: vbroadcastsd
212 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
213 ret <8 x double> %res
214}
215declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
Quentin Colombet8761a8f2013-10-25 18:04:12 +0000216
217define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
218 ; CHECK: vbroadcastss
219 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
220 ret <16 x float> %res
221}
222declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
223
224define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
225 ; CHECK: vbroadcastsd
226 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
227 ret <8 x double> %res
228}
229declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
Elena Demikhovsky199c8232013-10-27 08:18:37 +0000230
Cameron McInally394d5572013-10-31 13:56:31 +0000231define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
232 ; CHECK: vpbroadcastd
233 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
234 ret <16 x i32> %res
235}
236declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
237
238define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
239 ; CHECK: vpbroadcastd
240 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
241 ret <16 x i32> %res
242}
243declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
244
245define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
246 ; CHECK: vpbroadcastq
247 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
248 ret <8 x i64> %res
249}
250declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
251
252define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
253 ; CHECK: vpbroadcastq
254 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
255 ret <8 x i64> %res
256}
257declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
258
Elena Demikhovskydacddb02013-11-03 13:46:31 +0000259define <16 x i32> @test_conflict_d(<16 x i32> %a) {
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000260 ; CHECK: movw $-1, %ax
261 ; CHECK: vpxor
Elena Demikhovskydacddb02013-11-03 13:46:31 +0000262 ; CHECK: vpconflictd
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000263 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
Elena Demikhovskydacddb02013-11-03 13:46:31 +0000264 ret <16 x i32> %res
265}
Elena Demikhovskydacddb02013-11-03 13:46:31 +0000266
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000267declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
Elena Demikhovskydacddb02013-11-03 13:46:31 +0000268
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000269define <8 x i64> @test_conflict_q(<8 x i64> %a) {
270 ; CHECK: movb $-1, %al
271 ; CHECK: vpxor
272 ; CHECK: vpconflictq
273 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
Elena Demikhovskydacddb02013-11-03 13:46:31 +0000274 ret <8 x i64> %res
275}
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000276
277declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
278
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000279define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
280 ; CHECK: vpconflictd
281 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
282 ret <16 x i32> %res
283}
284
285define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
286 ; CHECK: vpconflictq
287 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
288 ret <8 x i64> %res
289}
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000290
Cameron McInally5d1b7b92014-06-11 12:54:45 +0000291define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
292 ; CHECK: movw $-1, %ax
293 ; CHECK: vpxor
294 ; CHECK: vplzcntd
295 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
296 ret <16 x i32> %res
297}
298
299declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
300
301define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
302 ; CHECK: movb $-1, %al
303 ; CHECK: vpxor
304 ; CHECK: vplzcntq
305 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
306 ret <8 x i64> %res
307}
308
309declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
310
311
Cameron McInallyed5f6452014-06-13 13:20:01 +0000312define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
Cameron McInally5d1b7b92014-06-11 12:54:45 +0000313 ; CHECK: vplzcntd
Cameron McInallyed5f6452014-06-13 13:20:01 +0000314 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
Cameron McInally5d1b7b92014-06-11 12:54:45 +0000315 ret <16 x i32> %res
316}
317
318define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
319 ; CHECK: vplzcntq
320 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
321 ret <8 x i64> %res
322}
323
Cameron McInally0d0489c2014-06-16 14:12:28 +0000324define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
325 ; CHECK-LABEL: test_ctlz_d
326 ; CHECK: vplzcntd
327 %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
328 ret <16 x i32> %res
329}
330
331declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
332
333define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
334 ; CHECK-LABEL: test_ctlz_q
335 ; CHECK: vplzcntq
336 %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
337 ret <8 x i64> %res
338}
339
340declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
341
Cameron McInallye3cc4aa2013-12-06 13:35:35 +0000342define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000343 ; CHECK: vblendmps
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000344 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000345 ret <16 x float> %res
346}
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000347
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000348declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000349
Cameron McInallye3cc4aa2013-12-06 13:35:35 +0000350define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000351 ; CHECK: vblendmpd
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000352 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000353 ret <8 x double> %res
354}
Cameron McInallycbb51da2013-12-04 18:05:36 +0000355
Cameron McInallye3cc4aa2013-12-06 13:35:35 +0000356define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
357 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000358 ; CHECK: vblendmpd (%
Cameron McInallycbb51da2013-12-04 18:05:36 +0000359 %b = load <8 x double>* %ptr
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000360 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
Cameron McInallycbb51da2013-12-04 18:05:36 +0000361 ret <8 x double> %res
362}
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000363declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000364
Cameron McInallye3cc4aa2013-12-06 13:35:35 +0000365define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000366 ; CHECK: vpblendmd
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000367 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000368 ret <16 x i32> %res
369}
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000370declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000371
Cameron McInallye3cc4aa2013-12-06 13:35:35 +0000372define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000373 ; CHECK: vpblendmq
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000374 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000375 ret <8 x i64> %res
376}
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000377declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
Elena Demikhovskyde3f7512014-01-01 15:12:34 +0000378
379 define <8 x i32> @test_cvtpd2udq(<8 x double> %a) {
380 ;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0]
381 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2)
382 ret <8 x i32>%res
383 }
384 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
385
386 define <16 x i32> @test_cvtps2udq(<16 x float> %a) {
387 ;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0]
388 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1)
389 ret <16 x i32>%res
390 }
391 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
392
393 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
394 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
395 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
396 ret i16 %res
397 }
398 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
399
400 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
401 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
402 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
403 ret i8 %res
404 }
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +0000405 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
406
407 ; cvt intrinsics
408 define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {
409 ;CHECK: vcvtdq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x5b,0xc0]
410 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
411 ret <16 x float>%res
412 }
413 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
414
415 define <16 x float> @test_cvtudq2ps(<16 x i32> %a) {
416 ;CHECK: vcvtudq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7f,0x38,0x7a,0xc0]
417 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
418 ret <16 x float>%res
419 }
420 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
421
422 define <8 x double> @test_cvtdq2pd(<8 x i32> %a) {
423 ;CHECK: vcvtdq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0]
424 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
425 ret <8 x double>%res
426 }
427 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
428
429 define <8 x double> @test_cvtudq2pd(<8 x i32> %a) {
430 ;CHECK: vcvtudq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0]
431 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
432 ret <8 x double>%res
433 }
434 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
435
436 ; fp min - max
437define <16 x float> @test_vmaxps(<16 x float> %a0, <16 x float> %a1) {
438 ; CHECK: vmaxps
439 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1,
440 <16 x float>zeroinitializer, i16 -1, i32 4)
441 ret <16 x float> %res
442}
443declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>,
444 <16 x float>, i16, i32)
445
446define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
447 ; CHECK: vmaxpd
448 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
449 <8 x double>zeroinitializer, i8 -1, i32 4)
450 ret <8 x double> %res
451}
452declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
453 <8 x double>, i8, i32)
454
455define <16 x float> @test_vminps(<16 x float> %a0, <16 x float> %a1) {
456 ; CHECK: vminps
457 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1,
458 <16 x float>zeroinitializer, i16 -1, i32 4)
459 ret <16 x float> %res
460}
461declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>,
462 <16 x float>, i16, i32)
463
464define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
465 ; CHECK: vminpd
466 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
467 <8 x double>zeroinitializer, i8 -1, i32 4)
468 ret <8 x double> %res
469}
470declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
471 <8 x double>, i8, i32)
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +0000472
473 define <8 x float> @test_cvtpd2ps(<8 x double> %a) {
474 ;CHECK: vcvtpd2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0xfd,0x38,0x5a,0xc0]
475 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %a, <8 x float>zeroinitializer, i8 -1, i32 1)
476 ret <8 x float>%res
477 }
478 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000479
480 define <16 x i32> @test_pabsd(<16 x i32> %a) {
481 ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
482 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1)
483 ret < 16 x i32> %res
484 }
485 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
486
487 define <8 x i64> @test_pabsq(<8 x i64> %a) {
488 ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
489 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1)
490 ret <8 x i64> %res
491 }
492 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
493
494define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
495 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
496 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
497 <8 x i64>zeroinitializer, i8 -1)
498 ret <8 x i64> %res
499}
500declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
501
502define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
503 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
504 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
505 <16 x i32>zeroinitializer, i16 -1)
506 ret <16 x i32> %res
507}
508declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
509
510define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
511 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
512 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
513 <16 x i32>zeroinitializer, i16 -1)
514 ret <16 x i32> %res
515}
516declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
517
518define <8 x i64> @test_vpmuludq(<16 x i32> %a0, <16 x i32> %a1) {
519 ; CHECK: vpmuludq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
520 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a0, <16 x i32> %a1,
521 <8 x i64>zeroinitializer, i8 -1)
522 ret <8 x i64> %res
523}
524declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +0000525
526define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
527 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
528 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
529 ret i8 %res
530}
531declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
532
533define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
534 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
535 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
536 ret i16 %res
537}
538declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
Elena Demikhovskyfd056672014-03-13 12:05:52 +0000539
540define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
541; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
542 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
543 ret void
544}
545
546declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
547
548define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
549; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
550 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
551 ret void
552}
553
Elena Demikhovsky299cf5112014-04-29 09:09:15 +0000554declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8 )
555
556define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
557; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]
558 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1)
559 ret <16 x float> %res
560}
561
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000562define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) {
563; CHECK-LABEL: test_vpermt2ps_mask:
564; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1]
565 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask)
566 ret <16 x float> %res
567}
568
Elena Demikhovsky299cf5112014-04-29 09:09:15 +0000569declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
Adam Nemet7f62b232014-06-10 16:39:53 +0000570
571define <8 x i64> @test_vmovntdqa(i8 *%x) {
572; CHECK-LABEL: test_vmovntdqa:
573; CHECK: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07]
574 %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %x)
575 ret <8 x i64> %res
576}
577
578declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
Adam Nemetfd2161b2014-08-05 17:23:04 +0000579
580define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
581; CHECK-LABEL: test_valign_q:
582; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
583 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
584 ret <8 x i64> %res
585}
586
587define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
588; CHECK-LABEL: test_mask_valign_q:
Adam Nemet5ec91282014-08-06 07:13:12 +0000589; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
Adam Nemetfd2161b2014-08-05 17:23:04 +0000590 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
591 ret <8 x i64> %res
592}
593
594declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
Adam Nemetcee9d0a2014-08-12 21:13:12 +0000595
596define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
597; CHECK-LABEL: test_maskz_valign_d:
598; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
599 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
600 ret <16 x i32> %res
601}
602
603declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
Elena Demikhovskyff620ed2014-08-27 07:38:43 +0000604
605define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
606 ; CHECK-LABEL: test_mask_store_ss
607 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
608 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
609 ret void
610}
611
Robert Khasanova27c8e02014-09-30 11:19:50 +0000612declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
613
614define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
615; CHECK-LABEL: test_pcmpeq_d
616; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
617 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
618 ret i16 %res
619}
620
621define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
622; CHECK-LABEL: test_mask_pcmpeq_d
623; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
624 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
625 ret i16 %res
626}
627
628declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
629
630define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
631; CHECK-LABEL: test_pcmpeq_q
632; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
633 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
634 ret i8 %res
635}
636
637define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
638; CHECK-LABEL: test_mask_pcmpeq_q
639; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
640 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
641 ret i8 %res
642}
643
644declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
Robert Khasanov28a7df02014-09-30 12:15:52 +0000645
646define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
647; CHECK-LABEL: test_pcmpgt_d
648; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
649 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
650 ret i16 %res
651}
652
653define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
654; CHECK-LABEL: test_mask_pcmpgt_d
655; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
656 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
657 ret i16 %res
658}
659
660declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
661
662define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
663; CHECK-LABEL: test_pcmpgt_q
664; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
665 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
666 ret i8 %res
667}
668
669define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
670; CHECK-LABEL: test_mask_pcmpgt_q
671; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
672 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
673 ret i8 %res
674}
675
676declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000677
678define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
679; CHECK_LABEL: test_cmp_d_512
680; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
681 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
682 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
683; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
684 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
685 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
686; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
687 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
688 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
689; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
690 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
691 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
692; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
693 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
694 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
695; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
696 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
697 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
698; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
699 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
700 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
701; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
702 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
703 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
704 ret <8 x i16> %vec7
705}
706
707define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
708; CHECK_LABEL: test_mask_cmp_d_512
709; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
710 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
711 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
712; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
713 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
714 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
715; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
716 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
717 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
718; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
719 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
720 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
721; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
722 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
723 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
724; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
725 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
726 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
727; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
728 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
729 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
730; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
731 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
732 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
733 ret <8 x i16> %vec7
734}
735
736declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
737
738define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
739; CHECK_LABEL: test_ucmp_d_512
740; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
741 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
742 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
743; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
744 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
745 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
746; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
747 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
748 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
749; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
750 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
751 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
752; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
753 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
754 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
755; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
756 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
757 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
758; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
759 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
760 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
761; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
762 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
763 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
764 ret <8 x i16> %vec7
765}
766
767define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
768; CHECK_LABEL: test_mask_ucmp_d_512
769; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
770 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
771 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
772; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
773 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
774 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
775; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
776 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
777 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
778; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
779 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
780 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
781; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
782 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
783 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
784; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
785 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
786 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
787; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
788 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
789 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
790; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
791 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
792 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
793 ret <8 x i16> %vec7
794}
795
796declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
797
798define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
799; CHECK_LABEL: test_cmp_q_512
800; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
801 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
802 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
803; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
804 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
805 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
806; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
807 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
808 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
809; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
810 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
811 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
812; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
813 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
814 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
815; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
816 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
817 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
818; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
819 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
820 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
821; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
822 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
823 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
824 ret <8 x i8> %vec7
825}
826
827define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
828; CHECK_LABEL: test_mask_cmp_q_512
829; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
830 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
831 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
832; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
833 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
834 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
835; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
836 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
837 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
838; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
839 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
840 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
841; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
842 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
843 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
844; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
845 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
846 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
847; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
848 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
849 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
850; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
851 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
852 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
853 ret <8 x i8> %vec7
854}
855
856declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
857
858define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
859; CHECK_LABEL: test_ucmp_q_512
860; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
861 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
862 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
863; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
864 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
865 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
866; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
867 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
868 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
869; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
870 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
871 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
872; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
873 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
874 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
875; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
876 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
877 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
878; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
879 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
880 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
881; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
882 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
883 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
884 ret <8 x i8> %vec7
885}
886
887define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
888; CHECK_LABEL: test_mask_ucmp_q_512
889; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
890 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
891 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
892; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
893 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
894 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
895; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
896 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
897 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
898; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
899 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
900 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
901; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
902 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
903 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
904; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
905 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
906 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
907; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
908 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
909 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
910; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
911 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
912 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
913 ret <8 x i8> %vec7
914}
915
916declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
Adam Nemet47b2d5f2014-10-08 23:25:37 +0000917
918define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
919; CHECK-LABEL: test_mask_vextractf32x4:
920; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
921 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
922 ret <4 x float> %res
923}
924
925declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
926
927define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
928; CHECK-LABEL: test_mask_vextracti64x4:
929; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
930 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
931 ret <4 x i64> %res
932}
933
934declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
935
936define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
937; CHECK-LABEL: test_maskz_vextracti32x4:
938; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
939 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
940 ret <4 x i32> %res
941}
942
943declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
944
945define <4 x double> @test_vextractf64x4(<8 x double> %a) {
946; CHECK-LABEL: test_vextractf64x4:
947; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
948 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
949 ret <4 x double> %res
950}
951
952declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
Cameron McInally73a6bca2014-11-12 19:58:54 +0000953
Cameron McInally04400442014-11-14 15:43:00 +0000954define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
955 ; CHECK-LABEL: test_x86_avx512_pslli_d
Cameron McInally73a6bca2014-11-12 19:58:54 +0000956 ; CHECK: vpslld
957 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
958 ret <16 x i32> %res
959}
Cameron McInally04400442014-11-14 15:43:00 +0000960
961define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
962 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
963 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
964 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
965 ret <16 x i32> %res
966}
967
968define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
969 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
970 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
971 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
972 ret <16 x i32> %res
973}
974
Cameron McInally73a6bca2014-11-12 19:58:54 +0000975declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
976
Cameron McInally04400442014-11-14 15:43:00 +0000977define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
978 ; CHECK-LABEL: test_x86_avx512_pslli_q
Cameron McInally73a6bca2014-11-12 19:58:54 +0000979 ; CHECK: vpsllq
980 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
981 ret <8 x i64> %res
982}
Cameron McInally04400442014-11-14 15:43:00 +0000983
984define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
985 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
986 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
987 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
988 ret <8 x i64> %res
989}
990
991define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
992 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
993 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
994 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
995 ret <8 x i64> %res
996}
997
Cameron McInally73a6bca2014-11-12 19:58:54 +0000998declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
999
Cameron McInally04400442014-11-14 15:43:00 +00001000define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1001 ; CHECK-LABEL: test_x86_avx512_psrli_d
Cameron McInally73a6bca2014-11-12 19:58:54 +00001002 ; CHECK: vpsrld
1003 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1004 ret <16 x i32> %res
1005}
Cameron McInally04400442014-11-14 15:43:00 +00001006
1007define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1008 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
1009 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
1010 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1011 ret <16 x i32> %res
1012}
1013
1014define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1015 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1016 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1017 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1018 ret <16 x i32> %res
1019}
1020
Cameron McInally73a6bca2014-11-12 19:58:54 +00001021declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1022
Cameron McInally04400442014-11-14 15:43:00 +00001023define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1024 ; CHECK-LABEL: test_x86_avx512_psrli_q
Cameron McInally73a6bca2014-11-12 19:58:54 +00001025 ; CHECK: vpsrlq
1026 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1027 ret <8 x i64> %res
1028}
Cameron McInally04400442014-11-14 15:43:00 +00001029
1030define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1031 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1032 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1033 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1034 ret <8 x i64> %res
1035}
1036
1037define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1038 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1039 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1040 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1041 ret <8 x i64> %res
1042}
1043
Cameron McInally73a6bca2014-11-12 19:58:54 +00001044declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1045
Cameron McInally04400442014-11-14 15:43:00 +00001046define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1047 ; CHECK-LABEL: test_x86_avx512_psrai_d
Cameron McInally73a6bca2014-11-12 19:58:54 +00001048 ; CHECK: vpsrad
1049 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1050 ret <16 x i32> %res
1051}
Cameron McInally04400442014-11-14 15:43:00 +00001052
1053define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1054 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1055 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1056 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1057 ret <16 x i32> %res
1058}
1059
1060define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1061 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1062 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1063 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1064 ret <16 x i32> %res
1065}
1066
Cameron McInally73a6bca2014-11-12 19:58:54 +00001067declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1068
Cameron McInally04400442014-11-14 15:43:00 +00001069define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1070 ; CHECK-LABEL: test_x86_avx512_psrai_q
Cameron McInally73a6bca2014-11-12 19:58:54 +00001071 ; CHECK: vpsraq
1072 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1073 ret <8 x i64> %res
1074}
Cameron McInally04400442014-11-14 15:43:00 +00001075
1076define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1077 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1078 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1079 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1080 ret <8 x i64> %res
1081}
1082
1083define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1084 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1085 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1086 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1087 ret <8 x i64> %res
1088}
1089
Cameron McInally73a6bca2014-11-12 19:58:54 +00001090declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone