blob: b6375c1f618214e3d835351cdf3a09a177e4ae38 [file] [log] [blame]
Elena Demikhovskycf088092013-12-11 14:31:04 +00001; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +00002
Elena Demikhovskye382c3f2013-12-10 13:53:10 +00003declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4; CHECK-LABEL: test_kortestz
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +00005; CHECK: kortestw
6; CHECK: sete
Elena Demikhovskya3a71402013-10-09 08:16:14 +00007define i32 @test_kortestz(i16 %a0, i16 %a1) {
Adam Nemet9b8cfa22015-01-16 18:50:07 +00008 %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +00009 ret i32 %res
10}
11
Elena Demikhovskye382c3f2013-12-10 13:53:10 +000012declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13; CHECK-LABEL: test_kortestc
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000014; CHECK: kortestw
15; CHECK: sbbl
Elena Demikhovskya3a71402013-10-09 08:16:14 +000016define i32 @test_kortestc(i16 %a0, i16 %a1) {
Adam Nemet9b8cfa22015-01-16 18:50:07 +000017 %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000018 ret i32 %res
19}
20
Elena Demikhovskye382c3f2013-12-10 13:53:10 +000021declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22; CHECK-LABEL: test_kand
23; CHECK: kandw
24; CHECK: kandw
25define i16 @test_kand(i16 %a0, i16 %a1) {
26 %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27 %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
28 ret i16 %t2
29}
30
31declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32; CHECK-LABEL: test_knot
33; CHECK: knotw
34define i16 @test_knot(i16 %a0) {
35 %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
36 ret i16 %res
37}
38
39declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
40
41; CHECK-LABEL: unpckbw_test
42; CHECK: kunpckbw
43; CHECK:ret
44define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45 %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
46 ret i16 %res
47}
48
Elena Demikhovskya3a71402013-10-09 08:16:14 +000049define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000050 ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51 %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000052 ret <16 x float> %res
53}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000054declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000055
Elena Demikhovskya3a71402013-10-09 08:16:14 +000056define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000057 ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58 %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000059 ret <8 x double> %res
60}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000061declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000062
Elena Demikhovskyde3f7512014-01-01 15:12:34 +000063declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
64
65define <8 x double> @test7(<8 x double> %a) {
66; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
Elena Demikhovskye73333a2014-05-04 13:35:37 +000067 %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
Elena Demikhovskyde3f7512014-01-01 15:12:34 +000068 ret <8 x double>%res
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000069}
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000070
Elena Demikhovskyde3f7512014-01-01 15:12:34 +000071declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000072
Elena Demikhovskyde3f7512014-01-01 15:12:34 +000073define <16 x float> @test8(<16 x float> %a) {
74; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
Elena Demikhovskye73333a2014-05-04 13:35:37 +000075 %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
Elena Demikhovskyde3f7512014-01-01 15:12:34 +000076 ret <16 x float>%res
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000077}
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000078
Elena Demikhovskya3a71402013-10-09 08:16:14 +000079define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000080 ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81 %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000082 ret <16 x float> %res
83}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000084declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +000085
Elena Demikhovskya3a71402013-10-09 08:16:14 +000086define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000087 ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88 %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
Elena Demikhovskya3a71402013-10-09 08:16:14 +000089 ret <4 x float> %res
90}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000091declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
Elena Demikhovskya3a71402013-10-09 08:16:14 +000092
Elena Demikhovskya3a71402013-10-09 08:16:14 +000093define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000094 ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95 %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
Elena Demikhovskya3a71402013-10-09 08:16:14 +000096 ret <4 x float> %res
97}
Elena Demikhovskyb19c9dc2014-01-13 12:55:03 +000098declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
Elena Demikhovskya3a71402013-10-09 08:16:14 +000099
Elena Demikhovskya3a71402013-10-09 08:16:14 +0000100define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000101 ; CHECK: vsqrtpd
Elena Demikhovskyf1648592014-07-22 11:07:31 +0000102 %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1]
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000103 ret <8 x double> %res
104}
Elena Demikhovskyf1648592014-07-22 11:07:31 +0000105declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000106
Elena Demikhovskya3a71402013-10-09 08:16:14 +0000107define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000108 ; CHECK: vsqrtps
Elena Demikhovskyf1648592014-07-22 11:07:31 +0000109 %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1]
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000110 ret <16 x float> %res
111}
Elena Demikhovskyf1648592014-07-22 11:07:31 +0000112declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000113
Elena Demikhovskya3a71402013-10-09 08:16:14 +0000114define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000115 ; CHECK: vsqrtss {{.*}}encoding: [0x62
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000116 %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
117 ret <4 x float> %res
118}
119declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
120
Elena Demikhovskya3a71402013-10-09 08:16:14 +0000121define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000122 ; CHECK: vsqrtsd {{.*}}encoding: [0x62
Elena Demikhovsky9a5ed9c2013-08-28 11:21:58 +0000123 %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
124 ret <2 x double> %res
125}
126declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
127
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000128define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000129 ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000130 %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
131 ret i64 %res
132}
133declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
134
135define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000136 ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000137 %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
138 ret <2 x double> %res
139}
140declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
141
142define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000143 ; CHECK: vcvtusi2sdq {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000144 %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
145 ret <2 x double> %res
146}
147declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone
148
149define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000150 ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000151 %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
152 ret i64 %res
153}
154declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
155
156
157define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000158 ; CHECK: vcvtss2si {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000159 %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
160 ret i64 %res
161}
162declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
163
164
165define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000166 ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000167 %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
168 ret <4 x float> %res
169}
170declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
171
172
173define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000174 ; CHECK: vcvttss2si {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000175 %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
176 ret i64 %res
177}
178declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
179
180define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
Elena Demikhovskycf088092013-12-11 14:31:04 +0000181 ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
Elena Demikhovsky2e408ae2013-10-06 13:11:09 +0000182 %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
183 ret i64 %res
184}
185declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000186
187define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
Elena Demikhovskya30e4372014-02-05 07:05:03 +0000188 ; CHECK: vcvtph2ps %ymm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
189 %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000190 ret <16 x float> %res
191}
Elena Demikhovskya30e4372014-02-05 07:05:03 +0000192declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000193
194
195define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
Elena Demikhovskya30e4372014-02-05 07:05:03 +0000196 ; CHECK: vcvtps2ph $2, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
197 %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
Elena Demikhovskydd0794e2013-10-24 07:16:35 +0000198 ret <16 x i16> %res
199}
Elena Demikhovskya30e4372014-02-05 07:05:03 +0000200
201declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
Quentin Colombet4bf1c282013-10-25 17:47:18 +0000202
203define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
204 ; CHECK: vbroadcastss
205 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
206 ret <16 x float> %res
207}
208declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
209
210define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
211 ; CHECK: vbroadcastsd
212 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
213 ret <8 x double> %res
214}
215declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
Quentin Colombet8761a8f2013-10-25 18:04:12 +0000216
217define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
218 ; CHECK: vbroadcastss
219 %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
220 ret <16 x float> %res
221}
222declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
223
224define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
225 ; CHECK: vbroadcastsd
226 %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
227 ret <8 x double> %res
228}
229declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
Elena Demikhovsky199c8232013-10-27 08:18:37 +0000230
Cameron McInally394d5572013-10-31 13:56:31 +0000231define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) {
232 ; CHECK: vpbroadcastd
233 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
234 ret <16 x i32> %res
235}
236declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
237
238define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) {
239 ; CHECK: vpbroadcastd
240 %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
241 ret <16 x i32> %res
242}
243declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
244
245define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
246 ; CHECK: vpbroadcastq
247 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
248 ret <8 x i64> %res
249}
250declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
251
252define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
253 ; CHECK: vpbroadcastq
254 %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
255 ret <8 x i64> %res
256}
257declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
258
Elena Demikhovskydacddb02013-11-03 13:46:31 +0000259define <16 x i32> @test_conflict_d(<16 x i32> %a) {
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000260 ; CHECK: movw $-1, %ax
261 ; CHECK: vpxor
Elena Demikhovskydacddb02013-11-03 13:46:31 +0000262 ; CHECK: vpconflictd
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000263 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
Elena Demikhovskydacddb02013-11-03 13:46:31 +0000264 ret <16 x i32> %res
265}
Elena Demikhovskydacddb02013-11-03 13:46:31 +0000266
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000267declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
Elena Demikhovskydacddb02013-11-03 13:46:31 +0000268
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000269define <8 x i64> @test_conflict_q(<8 x i64> %a) {
270 ; CHECK: movb $-1, %al
271 ; CHECK: vpxor
272 ; CHECK: vpconflictq
273 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
Elena Demikhovskydacddb02013-11-03 13:46:31 +0000274 ret <8 x i64> %res
275}
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000276
277declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
278
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000279define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
Adam Nemet9b8cfa22015-01-16 18:50:07 +0000280 ; CHECK: vpconflictd
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000281 %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
282 ret <16 x i32> %res
283}
284
285define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
286 ; CHECK: vpconflictq
287 %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
288 ret <8 x i64> %res
289}
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000290
Cameron McInally5d1b7b92014-06-11 12:54:45 +0000291define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
292 ; CHECK: movw $-1, %ax
293 ; CHECK: vpxor
294 ; CHECK: vplzcntd
295 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
296 ret <16 x i32> %res
297}
298
299declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
300
301define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
302 ; CHECK: movb $-1, %al
303 ; CHECK: vpxor
304 ; CHECK: vplzcntq
305 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
306 ret <8 x i64> %res
307}
308
309declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
310
311
Cameron McInallyed5f6452014-06-13 13:20:01 +0000312define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
Cameron McInally5d1b7b92014-06-11 12:54:45 +0000313 ; CHECK: vplzcntd
Cameron McInallyed5f6452014-06-13 13:20:01 +0000314 %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
Cameron McInally5d1b7b92014-06-11 12:54:45 +0000315 ret <16 x i32> %res
316}
317
318define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
319 ; CHECK: vplzcntq
320 %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
321 ret <8 x i64> %res
322}
323
Cameron McInally0d0489c2014-06-16 14:12:28 +0000324define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
325 ; CHECK-LABEL: test_ctlz_d
326 ; CHECK: vplzcntd
327 %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
328 ret <16 x i32> %res
329}
330
331declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
332
333define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
334 ; CHECK-LABEL: test_ctlz_q
335 ; CHECK: vplzcntq
336 %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
337 ret <8 x i64> %res
338}
339
340declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
341
Cameron McInallye3cc4aa2013-12-06 13:35:35 +0000342define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +0000343 ; CHECK: vblendmps %zmm1, %zmm0
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000344 %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000345 ret <16 x float> %res
346}
Elena Demikhovsky6270b382013-12-10 11:58:35 +0000347
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000348declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000349
Cameron McInallye3cc4aa2013-12-06 13:35:35 +0000350define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
Elena Demikhovsky949b0d42014-12-22 13:52:48 +0000351 ; CHECK: vblendmpd %zmm1, %zmm0
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000352 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000353 ret <8 x double> %res
354}
Cameron McInallycbb51da2013-12-04 18:05:36 +0000355
Cameron McInallye3cc4aa2013-12-06 13:35:35 +0000356define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
357 ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000358 ; CHECK: vblendmpd (%
Cameron McInallycbb51da2013-12-04 18:05:36 +0000359 %b = load <8 x double>* %ptr
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000360 %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
Cameron McInallycbb51da2013-12-04 18:05:36 +0000361 ret <8 x double> %res
362}
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000363declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000364
Cameron McInallye3cc4aa2013-12-06 13:35:35 +0000365define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000366 ; CHECK: vpblendmd
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000367 %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000368 ret <16 x i32> %res
369}
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000370declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000371
Cameron McInallye3cc4aa2013-12-06 13:35:35 +0000372define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000373 ; CHECK: vpblendmq
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000374 %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
Cameron McInallyd80f7d32013-11-04 19:14:56 +0000375 ret <8 x i64> %res
376}
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000377declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
Elena Demikhovskyde3f7512014-01-01 15:12:34 +0000378
379 define <8 x i32> @test_cvtpd2udq(<8 x double> %a) {
380 ;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0]
381 %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2)
382 ret <8 x i32>%res
383 }
384 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
Adam Nemet9b8cfa22015-01-16 18:50:07 +0000385
Elena Demikhovskyde3f7512014-01-01 15:12:34 +0000386 define <16 x i32> @test_cvtps2udq(<16 x float> %a) {
387 ;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0]
388 %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1)
389 ret <16 x i32>%res
390 }
391 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
392
393 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
394 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
Craig Topperd1e1d102015-01-25 23:26:12 +0000395 %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i8 2, i16 -1, i32 8)
Elena Demikhovskyde3f7512014-01-01 15:12:34 +0000396 ret i16 %res
397 }
Craig Topperd1e1d102015-01-25 23:26:12 +0000398 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i8, i16, i32)
Elena Demikhovskyde3f7512014-01-01 15:12:34 +0000399
400 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
401 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
Craig Topperd1e1d102015-01-25 23:26:12 +0000402 %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i8 4, i8 -1, i32 4)
Elena Demikhovskyde3f7512014-01-01 15:12:34 +0000403 ret i8 %res
404 }
Craig Topperd1e1d102015-01-25 23:26:12 +0000405 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i8, i8, i32)
Elena Demikhovsky52e4a0e2014-01-05 10:46:09 +0000406
407 ; cvt intrinsics
408 define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {
409 ;CHECK: vcvtdq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x5b,0xc0]
410 %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
411 ret <16 x float>%res
412 }
413 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
414
415 define <16 x float> @test_cvtudq2ps(<16 x i32> %a) {
416 ;CHECK: vcvtudq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7f,0x38,0x7a,0xc0]
417 %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
418 ret <16 x float>%res
419 }
420 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
421
422 define <8 x double> @test_cvtdq2pd(<8 x i32> %a) {
423 ;CHECK: vcvtdq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0]
424 %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
425 ret <8 x double>%res
426 }
427 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
428
429 define <8 x double> @test_cvtudq2pd(<8 x i32> %a) {
430 ;CHECK: vcvtudq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0]
431 %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
432 ret <8 x double>%res
433 }
434 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
435
436 ; fp min - max
437define <16 x float> @test_vmaxps(<16 x float> %a0, <16 x float> %a1) {
438 ; CHECK: vmaxps
439 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1,
440 <16 x float>zeroinitializer, i16 -1, i32 4)
441 ret <16 x float> %res
442}
443declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>,
444 <16 x float>, i16, i32)
445
446define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
447 ; CHECK: vmaxpd
448 %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
449 <8 x double>zeroinitializer, i8 -1, i32 4)
450 ret <8 x double> %res
451}
452declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
453 <8 x double>, i8, i32)
454
455define <16 x float> @test_vminps(<16 x float> %a0, <16 x float> %a1) {
456 ; CHECK: vminps
457 %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1,
458 <16 x float>zeroinitializer, i16 -1, i32 4)
459 ret <16 x float> %res
460}
461declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>,
462 <16 x float>, i16, i32)
463
464define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
465 ; CHECK: vminpd
466 %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
467 <8 x double>zeroinitializer, i8 -1, i32 4)
468 ret <8 x double> %res
469}
470declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
471 <8 x double>, i8, i32)
Elena Demikhovsky3629b4a2014-01-06 08:45:54 +0000472
473 define <8 x float> @test_cvtpd2ps(<8 x double> %a) {
474 ;CHECK: vcvtpd2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0xfd,0x38,0x5a,0xc0]
475 %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %a, <8 x float>zeroinitializer, i8 -1, i32 1)
476 ret <8 x float>%res
477 }
478 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
Elena Demikhovsky172a27c2014-01-08 10:54:22 +0000479
480 define <16 x i32> @test_pabsd(<16 x i32> %a) {
481 ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
482 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1)
483 ret < 16 x i32> %res
484 }
485 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
486
487 define <8 x i64> @test_pabsq(<8 x i64> %a) {
488 ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
489 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1)
490 ret <8 x i64> %res
491 }
492 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
493
494define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
495 ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
496 %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
497 <8 x i64>zeroinitializer, i8 -1)
498 ret <8 x i64> %res
499}
500declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
501
502define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
503 ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
504 %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
505 <16 x i32>zeroinitializer, i16 -1)
506 ret <16 x i32> %res
507}
508declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
509
510define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
511 ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
512 %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
513 <16 x i32>zeroinitializer, i16 -1)
514 ret <16 x i32> %res
515}
516declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
517
518define <8 x i64> @test_vpmuludq(<16 x i32> %a0, <16 x i32> %a1) {
519 ; CHECK: vpmuludq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
520 %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a0, <16 x i32> %a1,
521 <8 x i64>zeroinitializer, i8 -1)
522 ret <8 x i64> %res
523}
524declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
Elena Demikhovsky3ebfe112014-02-23 14:28:35 +0000525
526define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
527 ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
528 %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
529 ret i8 %res
530}
531declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
532
533define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
534 ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
535 %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
536 ret i16 %res
537}
538declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
Elena Demikhovskyfd056672014-03-13 12:05:52 +0000539
540define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
541; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
542 call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
543 ret void
544}
545
546declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
547
548define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
549; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
550 call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
551 ret void
552}
553
Adam Nemet3e8b22b2015-01-16 18:50:09 +0000554declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
555
556define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
557; CHECK-LABEL: test_mask_store_aligned_ps:
558; CHECK: ## BB#0:
559; CHECK-NEXT: kmovw %esi, %k1
560; CHECK-NEXT: vmovaps %zmm0, (%rdi) {%k1}
561; CHECK-NEXT: retq
562 call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
563 ret void
564}
565
566declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
567
568define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
569; CHECK-LABEL: test_mask_store_aligned_pd:
570; CHECK: ## BB#0:
571; CHECK-NEXT: kmovw %esi, %k1
572; CHECK-NEXT: vmovapd %zmm0, (%rdi) {%k1}
573; CHECK-NEXT: retq
574 call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
575 ret void
576}
577
578declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
579
580define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
581; CHECK-LABEL: test_maskz_load_aligned_ps:
582; CHECK: ## BB#0:
583; CHECK-NEXT: kmovw %esi, %k1
584; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
585; CHECK-NEXT: retq
586 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
587 ret <16 x float> %res
588}
589
590declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
591
592define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
593; CHECK-LABEL: test_maskz_load_aligned_pd:
594; CHECK: ## BB#0:
595; CHECK-NEXT: kmovw %esi, %k1
596; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
597; CHECK-NEXT: retq
598 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
599 ret <8 x double> %res
600}
601
602declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
603
604define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
605; CHECK-LABEL: test_load_aligned_ps:
606; CHECK: ## BB#0:
607; CHECK-NEXT: vmovaps (%rdi), %zmm0
608; CHECK-NEXT: retq
609 %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
610 ret <16 x float> %res
611}
612
613define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
614; CHECK-LABEL: test_load_aligned_pd:
615; CHECK: ## BB#0:
616; CHECK-NEXT: vmovapd (%rdi), %zmm0
617; CHECK-NEXT: retq
618 %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
619 ret <8 x double> %res
620}
Elena Demikhovsky299cf5112014-04-29 09:09:15 +0000621
622define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
623; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]
624 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1)
625 ret <16 x float> %res
626}
627
Adam Nemet11dd5cf2014-07-02 21:26:01 +0000628define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) {
629; CHECK-LABEL: test_vpermt2ps_mask:
630; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1]
631 %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask)
632 ret <16 x float> %res
633}
634
Elena Demikhovsky299cf5112014-04-29 09:09:15 +0000635declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
Adam Nemet7f62b232014-06-10 16:39:53 +0000636
637define <8 x i64> @test_vmovntdqa(i8 *%x) {
638; CHECK-LABEL: test_vmovntdqa:
639; CHECK: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07]
640 %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %x)
641 ret <8 x i64> %res
642}
643
644declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
Adam Nemetfd2161b2014-08-05 17:23:04 +0000645
646define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
647; CHECK-LABEL: test_valign_q:
648; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
649 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
650 ret <8 x i64> %res
651}
652
653define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
654; CHECK-LABEL: test_mask_valign_q:
Adam Nemet5ec91282014-08-06 07:13:12 +0000655; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
Adam Nemetfd2161b2014-08-05 17:23:04 +0000656 %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
657 ret <8 x i64> %res
658}
659
660declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
Adam Nemetcee9d0a2014-08-12 21:13:12 +0000661
662define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
663; CHECK-LABEL: test_maskz_valign_d:
664; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
665 %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
666 ret <16 x i32> %res
667}
668
669declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
Elena Demikhovskyff620ed2014-08-27 07:38:43 +0000670
671define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
672 ; CHECK-LABEL: test_mask_store_ss
673 ; CHECK: vmovss %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
674 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
675 ret void
676}
677
Robert Khasanova27c8e02014-09-30 11:19:50 +0000678declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
679
680define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
681; CHECK-LABEL: test_pcmpeq_d
682; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
683 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
684 ret i16 %res
685}
686
687define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
688; CHECK-LABEL: test_mask_pcmpeq_d
689; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
690 %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
691 ret i16 %res
692}
693
694declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
695
696define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
697; CHECK-LABEL: test_pcmpeq_q
698; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
699 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
700 ret i8 %res
701}
702
703define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
704; CHECK-LABEL: test_mask_pcmpeq_q
705; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
706 %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
707 ret i8 %res
708}
709
710declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
Robert Khasanov28a7df02014-09-30 12:15:52 +0000711
712define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
713; CHECK-LABEL: test_pcmpgt_d
714; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
715 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
716 ret i16 %res
717}
718
719define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
720; CHECK-LABEL: test_mask_pcmpgt_d
721; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
722 %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
723 ret i16 %res
724}
725
726declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
727
728define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
729; CHECK-LABEL: test_pcmpgt_q
730; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
731 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
732 ret i8 %res
733}
734
735define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
736; CHECK-LABEL: test_mask_pcmpgt_q
737; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
738 %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
739 ret i8 %res
740}
741
742declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000743
744define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
745; CHECK_LABEL: test_cmp_d_512
746; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000747 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 0, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000748 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
749; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000750 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 1, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000751 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
752; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000753 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 2, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000754 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
755; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000756 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 3, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000757 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
758; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000759 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 4, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000760 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
761; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000762 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 5, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000763 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
764; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000765 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 6, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000766 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
767; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000768 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 7, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000769 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
770 ret <8 x i16> %vec7
771}
772
773define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
774; CHECK_LABEL: test_mask_cmp_d_512
775; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000776 %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 0, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000777 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
778; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000779 %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 1, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000780 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
781; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000782 %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 2, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000783 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
784; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000785 %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 3, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000786 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
787; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000788 %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 4, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000789 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
790; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000791 %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 5, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000792 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
793; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000794 %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 6, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000795 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
796; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000797 %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 7, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000798 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
799 ret <8 x i16> %vec7
800}
801
Craig Topper29f2e952015-01-25 23:26:02 +0000802declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i8, i16) nounwind readnone
Robert Khasanovb51bb222014-10-08 15:49:26 +0000803
804define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
805; CHECK_LABEL: test_ucmp_d_512
806; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000807 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 0, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000808 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
809; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000810 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 1, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000811 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
812; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000813 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 2, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000814 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
815; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000816 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 3, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000817 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
818; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000819 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 4, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000820 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
821; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000822 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 5, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000823 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
824; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000825 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 6, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000826 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
827; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000828 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 7, i16 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000829 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
830 ret <8 x i16> %vec7
831}
832
833define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
834; CHECK_LABEL: test_mask_ucmp_d_512
835; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000836 %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 0, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000837 %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
838; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000839 %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 1, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000840 %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
841; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000842 %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 2, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000843 %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
844; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000845 %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 3, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000846 %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
847; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000848 %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 4, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000849 %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
850; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000851 %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 5, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000852 %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
853; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000854 %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 6, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000855 %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
856; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000857 %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 7, i16 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000858 %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
859 ret <8 x i16> %vec7
860}
861
Craig Topper29f2e952015-01-25 23:26:02 +0000862declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i8, i16) nounwind readnone
Robert Khasanovb51bb222014-10-08 15:49:26 +0000863
864define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
865; CHECK_LABEL: test_cmp_q_512
866; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000867 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 0, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000868 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
869; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000870 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 1, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000871 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
872; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000873 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 2, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000874 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
875; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000876 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 3, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000877 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
878; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000879 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 4, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000880 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
881; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000882 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 5, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000883 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
884; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000885 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 6, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000886 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
887; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000888 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 7, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000889 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
890 ret <8 x i8> %vec7
891}
892
893define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
894; CHECK_LABEL: test_mask_cmp_q_512
895; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000896 %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 0, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000897 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
898; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000899 %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 1, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000900 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
901; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000902 %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 2, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000903 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
904; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000905 %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 3, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000906 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
907; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000908 %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 4, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000909 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
910; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000911 %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 5, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000912 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
913; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000914 %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 6, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000915 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
916; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000917 %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 7, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000918 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
919 ret <8 x i8> %vec7
920}
921
Craig Topper29f2e952015-01-25 23:26:02 +0000922declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i8, i8) nounwind readnone
Robert Khasanovb51bb222014-10-08 15:49:26 +0000923
924define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
925; CHECK_LABEL: test_ucmp_q_512
926; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000927 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 0, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000928 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
929; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000930 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 1, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000931 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
932; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000933 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 2, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000934 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
935; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000936 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 3, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000937 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
938; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000939 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 4, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000940 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
941; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000942 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 5, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000943 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
944; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000945 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 6, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000946 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
947; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
Craig Topper29f2e952015-01-25 23:26:02 +0000948 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 7, i8 -1)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000949 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
950 ret <8 x i8> %vec7
951}
952
953define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
954; CHECK_LABEL: test_mask_ucmp_q_512
955; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000956 %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 0, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000957 %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
958; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000959 %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 1, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000960 %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
961; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000962 %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 2, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000963 %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
964; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000965 %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 3, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000966 %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
967; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000968 %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 4, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000969 %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
970; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000971 %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 5, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000972 %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
973; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000974 %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 6, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000975 %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
976; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
Craig Topper29f2e952015-01-25 23:26:02 +0000977 %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 7, i8 %mask)
Robert Khasanovb51bb222014-10-08 15:49:26 +0000978 %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
979 ret <8 x i8> %vec7
980}
981
Craig Topper29f2e952015-01-25 23:26:02 +0000982declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i8, i8) nounwind readnone
Adam Nemet47b2d5f2014-10-08 23:25:37 +0000983
984define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
985; CHECK-LABEL: test_mask_vextractf32x4:
986; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
987 %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
988 ret <4 x float> %res
989}
990
991declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
992
993define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
994; CHECK-LABEL: test_mask_vextracti64x4:
995; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
996 %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
997 ret <4 x i64> %res
998}
999
1000declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
1001
1002define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
1003; CHECK-LABEL: test_maskz_vextracti32x4:
1004; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
1005 %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
1006 ret <4 x i32> %res
1007}
1008
1009declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
1010
1011define <4 x double> @test_vextractf64x4(<8 x double> %a) {
1012; CHECK-LABEL: test_vextractf64x4:
1013; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
1014 %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
1015 ret <4 x double> %res
1016}
1017
1018declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
Cameron McInally73a6bca2014-11-12 19:58:54 +00001019
Cameron McInally04400442014-11-14 15:43:00 +00001020define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
1021 ; CHECK-LABEL: test_x86_avx512_pslli_d
Cameron McInally73a6bca2014-11-12 19:58:54 +00001022 ; CHECK: vpslld
1023 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1024 ret <16 x i32> %res
1025}
Cameron McInally04400442014-11-14 15:43:00 +00001026
1027define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
Adam Nemet9b8cfa22015-01-16 18:50:07 +00001028 ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
1029 ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
Cameron McInally04400442014-11-14 15:43:00 +00001030 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1031 ret <16 x i32> %res
1032}
1033
1034define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
1035 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
1036 ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
1037 %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1038 ret <16 x i32> %res
1039}
1040
Cameron McInally73a6bca2014-11-12 19:58:54 +00001041declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1042
Cameron McInally04400442014-11-14 15:43:00 +00001043define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
1044 ; CHECK-LABEL: test_x86_avx512_pslli_q
Cameron McInally73a6bca2014-11-12 19:58:54 +00001045 ; CHECK: vpsllq
1046 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1047 ret <8 x i64> %res
1048}
Cameron McInally04400442014-11-14 15:43:00 +00001049
1050define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1051 ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
Adam Nemet9b8cfa22015-01-16 18:50:07 +00001052 ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
Cameron McInally04400442014-11-14 15:43:00 +00001053 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1054 ret <8 x i64> %res
1055}
1056
1057define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
1058 ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
Adam Nemet9b8cfa22015-01-16 18:50:07 +00001059 ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
Cameron McInally04400442014-11-14 15:43:00 +00001060 %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1061 ret <8 x i64> %res
1062}
1063
Cameron McInally73a6bca2014-11-12 19:58:54 +00001064declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1065
Cameron McInally04400442014-11-14 15:43:00 +00001066define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1067 ; CHECK-LABEL: test_x86_avx512_psrli_d
Cameron McInally73a6bca2014-11-12 19:58:54 +00001068 ; CHECK: vpsrld
1069 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1070 ret <16 x i32> %res
1071}
Cameron McInally04400442014-11-14 15:43:00 +00001072
1073define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1074 ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
Adam Nemet9b8cfa22015-01-16 18:50:07 +00001075 ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
Cameron McInally04400442014-11-14 15:43:00 +00001076 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1077 ret <16 x i32> %res
1078}
1079
1080define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1081 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1082 ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1083 %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1084 ret <16 x i32> %res
1085}
1086
Cameron McInally73a6bca2014-11-12 19:58:54 +00001087declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1088
Cameron McInally04400442014-11-14 15:43:00 +00001089define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1090 ; CHECK-LABEL: test_x86_avx512_psrli_q
Cameron McInally73a6bca2014-11-12 19:58:54 +00001091 ; CHECK: vpsrlq
1092 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1093 ret <8 x i64> %res
1094}
Cameron McInally04400442014-11-14 15:43:00 +00001095
1096define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1097 ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
Adam Nemet9b8cfa22015-01-16 18:50:07 +00001098 ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
Cameron McInally04400442014-11-14 15:43:00 +00001099 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1100 ret <8 x i64> %res
1101}
1102
1103define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1104 ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1105 ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1106 %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1107 ret <8 x i64> %res
1108}
1109
Cameron McInally73a6bca2014-11-12 19:58:54 +00001110declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1111
Cameron McInally04400442014-11-14 15:43:00 +00001112define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1113 ; CHECK-LABEL: test_x86_avx512_psrai_d
Cameron McInally73a6bca2014-11-12 19:58:54 +00001114 ; CHECK: vpsrad
1115 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1116 ret <16 x i32> %res
1117}
Cameron McInally04400442014-11-14 15:43:00 +00001118
1119define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1120 ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
Adam Nemet9b8cfa22015-01-16 18:50:07 +00001121 ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
Cameron McInally04400442014-11-14 15:43:00 +00001122 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1123 ret <16 x i32> %res
1124}
1125
1126define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1127 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1128 ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1129 %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1130 ret <16 x i32> %res
1131}
1132
Cameron McInally73a6bca2014-11-12 19:58:54 +00001133declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1134
Cameron McInally04400442014-11-14 15:43:00 +00001135define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1136 ; CHECK-LABEL: test_x86_avx512_psrai_q
Cameron McInally73a6bca2014-11-12 19:58:54 +00001137 ; CHECK: vpsraq
1138 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1139 ret <8 x i64> %res
1140}
Cameron McInally04400442014-11-14 15:43:00 +00001141
1142define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1143 ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
Adam Nemet9b8cfa22015-01-16 18:50:07 +00001144 ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
Cameron McInally04400442014-11-14 15:43:00 +00001145 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1146 ret <8 x i64> %res
1147}
1148
1149define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1150 ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1151 ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1152 %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1153 ret <8 x i64> %res
1154}
1155
Cameron McInally73a6bca2014-11-12 19:58:54 +00001156declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
Cameron McInally9b7c15a2014-11-25 20:41:51 +00001157
1158define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1159 ; CHECK-LABEL: test_x86_avx512_psll_d
1160 ; CHECK: vpslld
1161 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1162 ret <16 x i32> %res
1163}
1164
1165define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1166 ; CHECK-LABEL: test_x86_avx512_mask_psll_d
1167 ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1168 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1169 ret <16 x i32> %res
1170}
1171
1172define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1173 ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
1174 ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1175 %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1176 ret <16 x i32> %res
1177}
1178
1179declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1180
1181define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1182 ; CHECK-LABEL: test_x86_avx512_psll_q
1183 ; CHECK: vpsllq
1184 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1185 ret <8 x i64> %res
1186}
1187
1188define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1189 ; CHECK-LABEL: test_x86_avx512_mask_psll_q
1190 ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1191 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1192 ret <8 x i64> %res
1193}
1194
1195define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1196 ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
1197 ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1198 %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1199 ret <8 x i64> %res
1200}
1201
1202declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1203
1204define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1205 ; CHECK-LABEL: test_x86_avx512_psrl_d
1206 ; CHECK: vpsrld
1207 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1208 ret <16 x i32> %res
1209}
1210
1211define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1212 ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
1213 ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1214 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1215 ret <16 x i32> %res
1216}
1217
1218define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1219 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
1220 ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1221 %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1222 ret <16 x i32> %res
1223}
1224
1225declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1226
1227define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1228 ; CHECK-LABEL: test_x86_avx512_psrl_q
1229 ; CHECK: vpsrlq
1230 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1231 ret <8 x i64> %res
1232}
1233
1234define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1235 ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
1236 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1237 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1238 ret <8 x i64> %res
1239}
1240
1241define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1242 ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
1243 ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1244 %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1245 ret <8 x i64> %res
1246}
1247
1248declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1249
1250define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1251 ; CHECK-LABEL: test_x86_avx512_psra_d
1252 ; CHECK: vpsrad
1253 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1254 ret <16 x i32> %res
1255}
1256
1257define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1258 ; CHECK-LABEL: test_x86_avx512_mask_psra_d
1259 ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1260 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1261 ret <16 x i32> %res
1262}
1263
1264define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1265 ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
1266 ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1267 %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1268 ret <16 x i32> %res
1269}
1270
1271declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1272
1273define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1274 ; CHECK-LABEL: test_x86_avx512_psra_q
1275 ; CHECK: vpsraq
1276 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1277 ret <8 x i64> %res
1278}
1279
1280define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1281 ; CHECK-LABEL: test_x86_avx512_mask_psra_q
1282 ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1283 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1284 ret <8 x i64> %res
1285}
1286
1287define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1288 ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
1289 ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1290 %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1291 ret <8 x i64> %res
1292}
1293
1294declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
Cameron McInally5fb084e2014-12-11 17:13:05 +00001295
1296define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1297 ; CHECK-LABEL: test_x86_avx512_psllv_d
1298 ; CHECK: vpsllvd
1299 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1300 ret <16 x i32> %res
1301}
1302
1303define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1304 ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
1305 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1306 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1307 ret <16 x i32> %res
1308}
1309
1310define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1311 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
1312 ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1313 %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1314 ret <16 x i32> %res
1315}
1316
1317declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1318
1319define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1320 ; CHECK-LABEL: test_x86_avx512_psllv_q
1321 ; CHECK: vpsllvq
1322 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1323 ret <8 x i64> %res
1324}
1325
1326define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1327 ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
1328 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1329 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1330 ret <8 x i64> %res
1331}
1332
1333define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1334 ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
1335 ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1336 %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1337 ret <8 x i64> %res
1338}
1339
1340declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1341
1342
1343define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
1344 ; CHECK-LABEL: test_x86_avx512_psrav_d
1345 ; CHECK: vpsravd
1346 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1347 ret <16 x i32> %res
1348}
1349
1350define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1351 ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
1352 ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
1353 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1354 ret <16 x i32> %res
1355}
1356
1357define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1358 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
1359 ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
1360 %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1361 ret <16 x i32> %res
1362}
1363
1364declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1365
1366define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
1367 ; CHECK-LABEL: test_x86_avx512_psrav_q
1368 ; CHECK: vpsravq
1369 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1370 ret <8 x i64> %res
1371}
1372
1373define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1374 ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
1375 ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
1376 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1377 ret <8 x i64> %res
1378}
1379
1380define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1381 ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
1382 ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
1383 %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1384 ret <8 x i64> %res
1385}
1386
1387declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1388
1389define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
1390 ; CHECK-LABEL: test_x86_avx512_psrlv_d
1391 ; CHECK: vpsrlvd
1392 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1393 ret <16 x i32> %res
1394}
1395
1396define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1397 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
1398 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
1399 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1400 ret <16 x i32> %res
1401}
1402
1403define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1404 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
1405 ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1406 %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1407 ret <16 x i32> %res
1408}
1409
1410declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1411
1412define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
1413 ; CHECK-LABEL: test_x86_avx512_psrlv_q
1414 ; CHECK: vpsrlvq
1415 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1416 ret <8 x i64> %res
1417}
1418
1419define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1420 ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
1421 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
1422 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1423 ret <8 x i64> %res
1424}
1425
1426define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1427 ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
1428 ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1429 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1430 ret <8 x i64> %res
1431}
1432
1433declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1434
1435define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
1436 ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
1437 ; CHECK: vpsrlvq (%
1438 %b = load <8 x i64>* %ptr
1439 %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1440 ret <8 x i64> %res
1441}
Elena Demikhovsky714f23b2015-02-18 07:59:20 +00001442
1443declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1444declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
1445declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
1446
1447define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
1448 ; CHECK-LABEL: test_vsubps_rn
1449 ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
1450 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1451 <16 x float> zeroinitializer, i16 -1, i32 0)
1452 ret <16 x float> %res
1453}
1454
1455define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
1456 ; CHECK-LABEL: test_vsubps_rd
1457 ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
1458 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1459 <16 x float> zeroinitializer, i16 -1, i32 1)
1460 ret <16 x float> %res
1461}
1462
1463define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
1464 ; CHECK-LABEL: test_vsubps_ru
1465 ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
1466 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1467 <16 x float> zeroinitializer, i16 -1, i32 2)
1468 ret <16 x float> %res
1469}
1470
1471define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
1472 ; CHECK-LABEL: test_vsubps_rz
1473 ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
1474 %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
1475 <16 x float> zeroinitializer, i16 -1, i32 3)
1476 ret <16 x float> %res
1477}
1478
1479define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
1480 ; CHECK-LABEL: test_vmulps_rn
1481 ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
1482 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1483 <16 x float> zeroinitializer, i16 -1, i32 0)
1484 ret <16 x float> %res
1485}
1486
1487define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
1488 ; CHECK-LABEL: test_vmulps_rd
1489 ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
1490 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1491 <16 x float> zeroinitializer, i16 -1, i32 1)
1492 ret <16 x float> %res
1493}
1494
1495define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
1496 ; CHECK-LABEL: test_vmulps_ru
1497 ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
1498 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1499 <16 x float> zeroinitializer, i16 -1, i32 2)
1500 ret <16 x float> %res
1501}
1502
1503define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
1504 ; CHECK-LABEL: test_vmulps_rz
1505 ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
1506 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1507 <16 x float> zeroinitializer, i16 -1, i32 3)
1508 ret <16 x float> %res
1509}
1510
1511;; mask float
1512define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1513 ; CHECK-LABEL: test_vmulps_mask_rn
1514 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
1515 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1516 <16 x float> zeroinitializer, i16 %mask, i32 0)
1517 ret <16 x float> %res
1518}
1519
1520define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1521 ; CHECK-LABEL: test_vmulps_mask_rd
1522 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
1523 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1524 <16 x float> zeroinitializer, i16 %mask, i32 1)
1525 ret <16 x float> %res
1526}
1527
1528define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1529 ; CHECK-LABEL: test_vmulps_mask_ru
1530 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
1531 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1532 <16 x float> zeroinitializer, i16 %mask, i32 2)
1533 ret <16 x float> %res
1534}
1535
1536define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
1537 ; CHECK-LABEL: test_vmulps_mask_rz
1538 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
1539 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1540 <16 x float> zeroinitializer, i16 %mask, i32 3)
1541 ret <16 x float> %res
1542}
1543
1544;; With Passthru value
1545define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1546 ; CHECK-LABEL: test_vmulps_mask_passthru_rn
1547 ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
1548 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1549 <16 x float> %passthru, i16 %mask, i32 0)
1550 ret <16 x float> %res
1551}
1552
1553define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1554 ; CHECK-LABEL: test_vmulps_mask_passthru_rd
1555 ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
1556 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1557 <16 x float> %passthru, i16 %mask, i32 1)
1558 ret <16 x float> %res
1559}
1560
1561define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1562 ; CHECK-LABEL: test_vmulps_mask_passthru_ru
1563 ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
1564 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1565 <16 x float> %passthru, i16 %mask, i32 2)
1566 ret <16 x float> %res
1567}
1568
1569define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
1570 ; CHECK-LABEL: test_vmulps_mask_passthru_rz
1571 ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
1572 %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
1573 <16 x float> %passthru, i16 %mask, i32 3)
1574 ret <16 x float> %res
1575}
1576
1577;; mask double
1578define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1579 ; CHECK-LABEL: test_vmulpd_mask_rn
1580 ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
1581 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1582 <8 x double> zeroinitializer, i8 %mask, i32 0)
1583 ret <8 x double> %res
1584}
1585
1586define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1587 ; CHECK-LABEL: test_vmulpd_mask_rd
1588 ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
1589 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1590 <8 x double> zeroinitializer, i8 %mask, i32 1)
1591 ret <8 x double> %res
1592}
1593
1594define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1595 ; CHECK-LABEL: test_vmulpd_mask_ru
1596 ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
1597 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1598 <8 x double> zeroinitializer, i8 %mask, i32 2)
1599 ret <8 x double> %res
1600}
1601
1602define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
1603 ; CHECK-LABEL: test_vmulpd_mask_rz
1604 ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
1605 %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
1606 <8 x double> zeroinitializer, i8 %mask, i32 3)
1607 ret <8 x double> %res
1608}