blob: 1d5febfec4f3d7eee51f30ef52712433440d420a [file] [log] [blame]
Craig Topper298b6d72016-05-08 21:33:47 +00001; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
Craig Topper092794b2016-05-08 21:33:44 +00002; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl -mattr=+avx512ifma | FileCheck %s
3
4declare <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
5
6define <2 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
7; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_128:
Craig Topper298b6d72016-05-08 21:33:47 +00008; CHECK: ## BB#0:
9; CHECK-NEXT: kmovw %edi, %k1
Craig Topper5c913e82016-07-18 06:14:34 +000010; CHECK-NEXT: vmovaps %xmm0, %xmm3
Craig Topper298b6d72016-05-08 21:33:47 +000011; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1}
Craig Topper5c913e82016-07-18 06:14:34 +000012; CHECK-NEXT: vmovaps %xmm0, %xmm4
Craig Topper298b6d72016-05-08 21:33:47 +000013; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4
14; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2
15; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1}
16; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z}
17; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0
18; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1
19; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
20; CHECK-NEXT: retq
Craig Topper092794b2016-05-08 21:33:44 +000021
22 %res = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
23 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
24 %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
25 %res3 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
26 %res4 = add <2 x i64> %res, %res1
27 %res5 = add <2 x i64> %res3, %res2
28 %res6 = add <2 x i64> %res5, %res4
29 ret <2 x i64> %res6
30}
31
32declare <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
33
34define <4 x i64>@test_int_x86_avx512_mask_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
35; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52h_uq_256:
Craig Topper298b6d72016-05-08 21:33:47 +000036; CHECK: ## BB#0:
37; CHECK-NEXT: kmovw %edi, %k1
Craig Topper5c913e82016-07-18 06:14:34 +000038; CHECK-NEXT: vmovaps %ymm0, %ymm3
Craig Topper298b6d72016-05-08 21:33:47 +000039; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1}
Craig Topper5c913e82016-07-18 06:14:34 +000040; CHECK-NEXT: vmovaps %ymm0, %ymm4
Craig Topper298b6d72016-05-08 21:33:47 +000041; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4
42; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2
43; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1}
44; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z}
45; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0
46; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1
47; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
48; CHECK-NEXT: retq
Craig Topper092794b2016-05-08 21:33:44 +000049
50 %res = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
51 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
52 %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
53 %res3 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
54 %res4 = add <4 x i64> %res, %res1
55 %res5 = add <4 x i64> %res3, %res2
56 %res6 = add <4 x i64> %res5, %res4
57 ret <4 x i64> %res6
58}
59
60declare <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
61
62define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
63; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_128:
Craig Topper298b6d72016-05-08 21:33:47 +000064; CHECK: ## BB#0:
65; CHECK-NEXT: kmovw %edi, %k1
Craig Topper5c913e82016-07-18 06:14:34 +000066; CHECK-NEXT: vmovaps %xmm0, %xmm3
Craig Topper298b6d72016-05-08 21:33:47 +000067; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm3 {%k1} {z}
Craig Topper5c913e82016-07-18 06:14:34 +000068; CHECK-NEXT: vmovaps %xmm0, %xmm4
Craig Topper298b6d72016-05-08 21:33:47 +000069; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm4
70; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2
71; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z}
72; CHECK-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm2 {%k1} {z}
73; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0
74; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1
75; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
76; CHECK-NEXT: retq
Craig Topper092794b2016-05-08 21:33:44 +000077
78 %res = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
79 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
80 %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
81 %res3 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
82 %res4 = add <2 x i64> %res, %res1
83 %res5 = add <2 x i64> %res3, %res2
84 %res6 = add <2 x i64> %res5, %res4
85 ret <2 x i64> %res6
86}
87
88declare <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
89
90define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52h_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
91; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52h_uq_256:
Craig Topper298b6d72016-05-08 21:33:47 +000092; CHECK: ## BB#0:
93; CHECK-NEXT: kmovw %edi, %k1
Craig Topper5c913e82016-07-18 06:14:34 +000094; CHECK-NEXT: vmovaps %ymm0, %ymm3
Craig Topper298b6d72016-05-08 21:33:47 +000095; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm3 {%k1} {z}
Craig Topper5c913e82016-07-18 06:14:34 +000096; CHECK-NEXT: vmovaps %ymm0, %ymm4
Craig Topper298b6d72016-05-08 21:33:47 +000097; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm4
98; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2
99; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z}
100; CHECK-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm2 {%k1} {z}
101; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0
102; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1
103; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
104; CHECK-NEXT: retq
Craig Topper092794b2016-05-08 21:33:44 +0000105
106 %res = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
107 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
108 %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
109 %res3 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52h.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
110 %res4 = add <4 x i64> %res, %res1
111 %res5 = add <4 x i64> %res3, %res2
112 %res6 = add <4 x i64> %res5, %res4
113 ret <4 x i64> %res6
114}
115
116declare <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
117
118define <2 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
119; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_128:
Craig Topper298b6d72016-05-08 21:33:47 +0000120; CHECK: ## BB#0:
121; CHECK-NEXT: kmovw %edi, %k1
Craig Topper5c913e82016-07-18 06:14:34 +0000122; CHECK-NEXT: vmovaps %xmm0, %xmm3
Craig Topper298b6d72016-05-08 21:33:47 +0000123; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1}
Craig Topper5c913e82016-07-18 06:14:34 +0000124; CHECK-NEXT: vmovaps %xmm0, %xmm4
Craig Topper298b6d72016-05-08 21:33:47 +0000125; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4
126; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2
127; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1}
128; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z}
129; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0
130; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1
131; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
132; CHECK-NEXT: retq
Craig Topper092794b2016-05-08 21:33:44 +0000133
134 %res = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
135 %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
136 %res2 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
137 %res3 = call <2 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
138 %res4 = add <2 x i64> %res, %res1
139 %res5 = add <2 x i64> %res3, %res2
140 %res6 = add <2 x i64> %res5, %res4
141 ret <2 x i64> %res6
142}
143
144declare <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
145
146define <4 x i64>@test_int_x86_avx512_mask_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
147; CHECK-LABEL: test_int_x86_avx512_mask_vpmadd52l_uq_256:
Craig Topper298b6d72016-05-08 21:33:47 +0000148; CHECK: ## BB#0:
149; CHECK-NEXT: kmovw %edi, %k1
Craig Topper5c913e82016-07-18 06:14:34 +0000150; CHECK-NEXT: vmovaps %ymm0, %ymm3
Craig Topper298b6d72016-05-08 21:33:47 +0000151; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1}
Craig Topper5c913e82016-07-18 06:14:34 +0000152; CHECK-NEXT: vmovaps %ymm0, %ymm4
Craig Topper298b6d72016-05-08 21:33:47 +0000153; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4
154; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2
155; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1}
156; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z}
157; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0
158; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1
159; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
160; CHECK-NEXT: retq
Craig Topper092794b2016-05-08 21:33:44 +0000161
162 %res = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
163 %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
164 %res2 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
165 %res3 = call <4 x i64> @llvm.x86.avx512.mask.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
166 %res4 = add <4 x i64> %res, %res1
167 %res5 = add <4 x i64> %res3, %res2
168 %res6 = add <4 x i64> %res5, %res4
169 ret <4 x i64> %res6
170}
171
172declare <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
173
174define <2 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
175; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_128:
Craig Topper298b6d72016-05-08 21:33:47 +0000176; CHECK: ## BB#0:
177; CHECK-NEXT: kmovw %edi, %k1
Craig Topper5c913e82016-07-18 06:14:34 +0000178; CHECK-NEXT: vmovaps %xmm0, %xmm3
Craig Topper298b6d72016-05-08 21:33:47 +0000179; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm3 {%k1} {z}
Craig Topper5c913e82016-07-18 06:14:34 +0000180; CHECK-NEXT: vmovaps %xmm0, %xmm4
Craig Topper298b6d72016-05-08 21:33:47 +0000181; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm4
182; CHECK-NEXT: vpxord %xmm2, %xmm2, %xmm2
183; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z}
184; CHECK-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm2 {%k1} {z}
185; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0
186; CHECK-NEXT: vpaddq %xmm2, %xmm4, %xmm1
187; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
188; CHECK-NEXT: retq
Craig Topper092794b2016-05-08 21:33:44 +0000189
190 %res = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
191 %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
192 %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> zeroinitializer, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
193 %res3 = call <2 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
194 %res4 = add <2 x i64> %res, %res1
195 %res5 = add <2 x i64> %res3, %res2
196 %res6 = add <2 x i64> %res5, %res4
197 ret <2 x i64> %res6
198}
199
200declare <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
201
202define <4 x i64>@test_int_x86_avx512_maskz_vpmadd52l_uq_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
203; CHECK-LABEL: test_int_x86_avx512_maskz_vpmadd52l_uq_256:
Craig Topper298b6d72016-05-08 21:33:47 +0000204; CHECK: ## BB#0:
205; CHECK-NEXT: kmovw %edi, %k1
Craig Topper5c913e82016-07-18 06:14:34 +0000206; CHECK-NEXT: vmovaps %ymm0, %ymm3
Craig Topper298b6d72016-05-08 21:33:47 +0000207; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm3 {%k1} {z}
Craig Topper5c913e82016-07-18 06:14:34 +0000208; CHECK-NEXT: vmovaps %ymm0, %ymm4
Craig Topper298b6d72016-05-08 21:33:47 +0000209; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm4
210; CHECK-NEXT: vpxord %ymm2, %ymm2, %ymm2
211; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z}
212; CHECK-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm2 {%k1} {z}
213; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0
214; CHECK-NEXT: vpaddq %ymm2, %ymm4, %ymm1
215; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
216; CHECK-NEXT: retq
Craig Topper092794b2016-05-08 21:33:44 +0000217
218 %res = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
219 %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
220 %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> zeroinitializer, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
221 %res3 = call <4 x i64> @llvm.x86.avx512.maskz.vpmadd52l.uq.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
222 %res4 = add <4 x i64> %res, %res1
223 %res5 = add <4 x i64> %res3, %res2
224 %res6 = add <4 x i64> %res5, %res4
225 ret <4 x i64> %res6
226}