blob: d24cd0e5634d1411ef79ae1ffb335a7406b0b81c [file] [log] [blame]
Michael Zuckermanfacb37c2016-10-25 07:56:04 +00001// RUN: %clang_cc1 -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
2
3#include <immintrin.h>
4
5long long test_mm512_reduce_add_epi64(__m512i __W){
6 // CHECK: %shuffle.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
7 // CHECK: %shuffle1.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
8 // CHECK: %add.i = add <4 x i64> %shuffle.i, %shuffle1.i
9 // CHECK: %shuffle2.i = shufflevector <4 x i64> %add.i, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
10 // CHECK: %shuffle3.i = shufflevector <4 x i64> %add.i, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
11 // CHECK: %add4.i = add <2 x i64> %shuffle2.i, %shuffle3.i
12 // CHECK: %shuffle6.i = shufflevector <2 x i64> %add4.i, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
13 // CHECK: %add7.i = add <2 x i64> %shuffle6.i, %add4.i
14 // CHECK: %vecext.i = extractelement <2 x i64> %add7.i, i32 0
15 // CHECK: ret i64 %vecext.i
16 return _mm512_reduce_add_epi64(__W);
17}
18
19long long test_mm512_reduce_mul_epi64(__m512i __W){
20 // CHECK: %shuffle.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
21 // CHECK: %shuffle1.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
22 // CHECK: %mul.i = mul <4 x i64> %shuffle.i, %shuffle1.i
23 // CHECK: %shuffle2.i = shufflevector <4 x i64> %mul.i, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
24 // CHECK: %shuffle3.i = shufflevector <4 x i64> %mul.i, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
25 // CHECK: %mul4.i = mul <2 x i64> %shuffle2.i, %shuffle3.i
26 // CHECK: %shuffle6.i = shufflevector <2 x i64> %mul4.i, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
27 // CHECK: %mul7.i = mul <2 x i64> %shuffle6.i, %mul4.i
28 // CHECK: %vecext.i = extractelement <2 x i64> %mul7.i, i32 0
29 // CHECK: ret i64 %vecext.i
30 return _mm512_reduce_mul_epi64(__W);
31}
32
33long long test_mm512_reduce_or_epi64(__m512i __W){
34 // CHECK: %shuffle.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
35 // CHECK: %shuffle1.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
36 // CHECK: %or.i = or <4 x i64> %shuffle.i, %shuffle1.i
37 // CHECK: %shuffle2.i = shufflevector <4 x i64> %or.i, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
38 // CHECK: %shuffle3.i = shufflevector <4 x i64> %or.i, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
39 // CHECK: %or4.i = or <2 x i64> %shuffle2.i, %shuffle3.i
40 // CHECK: %shuffle6.i = shufflevector <2 x i64> %or4.i, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
41 // CHECK: %or7.i = or <2 x i64> %shuffle6.i, %or4.i
42 // CHECK: %vecext.i = extractelement <2 x i64> %or7.i, i32 0
43 // CHECK: ret i64 %vecext.i
44 return _mm512_reduce_or_epi64(__W);
45}
46
47long long test_mm512_reduce_and_epi64(__m512i __W){
48 // CHECK: %shuffle.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
49 // CHECK: %shuffle1.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
50 // CHECK: %and.i = and <4 x i64> %shuffle.i, %shuffle1.i
51 // CHECK: %shuffle2.i = shufflevector <4 x i64> %and.i, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
52 // CHECK: %shuffle3.i = shufflevector <4 x i64> %and.i, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
53 // CHECK: %and4.i = and <2 x i64> %shuffle2.i, %shuffle3.i
54 // CHECK: %shuffle6.i = shufflevector <2 x i64> %and4.i, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
55 // CHECK: %and7.i = and <2 x i64> %shuffle6.i, %and4.i
56 // CHECK: %vecext.i = extractelement <2 x i64> %and7.i, i32 0
57 // CHECK: ret i64 %vecext.i
58 return _mm512_reduce_and_epi64(__W);
59}
60
61long long test_mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W){
62 // CHECK: {{.*}} = bitcast i8 %__M to <8 x i1>
63 // CHECK: {{.*}} = select <8 x i1> {{.*}}, <8 x i64> %__W, <8 x i64> zeroinitializer
64 // CHECK: %shuffle.i = shufflevector <8 x i64> {{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
65 // CHECK: %shuffle1.i = shufflevector <8 x i64> {{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
66 // CHECK: %add.i = add <4 x i64> %shuffle.i, %shuffle1.i
67 // CHECK: %shuffle2.i = shufflevector <4 x i64> %add.i, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
68 // CHECK: %shuffle3.i = shufflevector <4 x i64> %add.i, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
69 // CHECK: %add4.i = add <2 x i64> %shuffle2.i, %shuffle3.i
70 // CHECK: %shuffle6.i = shufflevector <2 x i64> %add4.i, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
71 // CHECK: %add7.i = add <2 x i64> %shuffle6.i, %add4.i
72 // CHECK: %vecext.i = extractelement <2 x i64> %add7.i, i32 0
73 // CHECK: ret i64 %vecext.i
74 return _mm512_mask_reduce_add_epi64(__M, __W);
75}
76
77long long test_mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W){
78 // CHECK: {{.*}} = bitcast i8 %__M to <8 x i1>
79 // CHECK: {{.*}} = select <8 x i1> {{.*}}, <8 x i64> %__W, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
80 // CHECK: %shuffle.i = shufflevector <8 x i64> {{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
81 // CHECK: %shuffle1.i = shufflevector <8 x i64> {{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
82 // CHECK: %mul.i = mul <4 x i64> %shuffle.i, %shuffle1.i
83 // CHECK: %shuffle2.i = shufflevector <4 x i64> %mul.i, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
84 // CHECK: %shuffle3.i = shufflevector <4 x i64> %mul.i, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
85 // CHECK: %mul4.i = mul <2 x i64> %shuffle2.i, %shuffle3.i
86 // CHECK: %shuffle6.i = shufflevector <2 x i64> %mul4.i, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
87 // CHECK: %mul7.i = mul <2 x i64> %shuffle6.i, %mul4.i
88 // CHECK: %vecext.i = extractelement <2 x i64> %mul7.i, i32 0
89 // CHECK: ret i64 %vecext.i
90 return _mm512_mask_reduce_mul_epi64(__M, __W);
91}
92
93long long test_mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W){
94 // CHECK: {{.*}} = bitcast i8 %__M to <8 x i1>
95 // CHECK: {{.*}} = select <8 x i1> {{.*}}, <8 x i64> %__W, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
96 // CHECK: %shuffle.i = shufflevector <8 x i64> {{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
97 // CHECK: %shuffle1.i = shufflevector <8 x i64> {{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
98 // CHECK: %and.i = and <4 x i64> %shuffle.i, %shuffle1.i
99 // CHECK: %shuffle2.i = shufflevector <4 x i64> %and.i, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
100 // CHECK: %shuffle3.i = shufflevector <4 x i64> %and.i, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
101 // CHECK: %and4.i = and <2 x i64> %shuffle2.i, %shuffle3.i
102 // CHECK: %shuffle6.i = shufflevector <2 x i64> %and4.i, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
103 // CHECK: %and7.i = and <2 x i64> %shuffle6.i, %and4.i
104 // CHECK: %vecext.i = extractelement <2 x i64> %and7.i, i32 0
105 return _mm512_mask_reduce_and_epi64(__M, __W);
106}
107
108long long test_mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W){
109 // CHECK: {{.*}} = bitcast i8 %__M to <8 x i1>
110 // CHECK: {{.*}} = select <8 x i1> {{.*}}, <8 x i64> %__W, <8 x i64> zeroinitializer
111 // CHECK: %shuffle.i = shufflevector <8 x i64> {{.*}}, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
112 // CHECK: %shuffle1.i = shufflevector <8 x i64> {{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
113 // CHECK: %or.i = or <4 x i64> %shuffle.i, %shuffle1.i
114 // CHECK: %shuffle2.i = shufflevector <4 x i64> %or.i, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
115 // CHECK: %shuffle3.i = shufflevector <4 x i64> %or.i, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
116 // CHECK: %or4.i = or <2 x i64> %shuffle2.i, %shuffle3.i
117 // CHECK: %shuffle6.i = shufflevector <2 x i64> %or4.i, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
118 // CHECK: %or7.i = or <2 x i64> %shuffle6.i, %or4.i
119 // CHECK: %vecext.i = extractelement <2 x i64> %or7.i, i32 0
120 // CHECK: ret i64 %vecext.i
121 return _mm512_mask_reduce_or_epi64(__M, __W);
122}
123
124int test_mm512_reduce_add_epi32(__m512i __W){
125 // CHECK: {{.*}} = bitcast <8 x i64> %__W to <16 x i32>
126 // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000127 // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Michael Zuckermand3436972016-10-30 14:54:05 +0000128 // CHECK: %add.i = add <8 x i32> %shuffle.i, %shuffle1.i
129 // CHECK: %shuffle2.i = shufflevector <8 x i32> %add.i, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
130 // CHECK: %shuffle3.i = shufflevector <8 x i32> %add.i, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
131 // CHECK: %add4.i = add <4 x i32> %shuffle2.i, %shuffle3.i
132 // CHECK: %shuffle6.i = shufflevector <4 x i32> %add4.i, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
133 // CHECK: %add7.i = add <4 x i32> %shuffle6.i, %add4.i
134 // CHECK: %shuffle9.i = shufflevector <4 x i32> %add7.i, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
135 // CHECK: %add10.i = add <4 x i32> %shuffle9.i, %add7.i
136 // CHECK: {{.*}} = bitcast <4 x i32> %add10.i to <2 x i64>
137 // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000138 // CHECK: %conv.i = trunc i64 %vecext.i to i32
139 // CHECK: ret i32 %conv.i
140 return _mm512_reduce_add_epi32(__W);
141}
142
143int test_mm512_reduce_mul_epi32(__m512i __W){
144 // CHECK: {{.*}} = bitcast <8 x i64> %__W to <16 x i32>
145 // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000146 // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Michael Zuckermand3436972016-10-30 14:54:05 +0000147 // CHECK: %mul.i = mul <8 x i32> %shuffle.i, %shuffle1.i
148 // CHECK: %shuffle2.i = shufflevector <8 x i32> %mul.i, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
149 // CHECK: %shuffle3.i = shufflevector <8 x i32> %mul.i, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
150 // CHECK: %mul4.i = mul <4 x i32> %shuffle2.i, %shuffle3.i
151 // CHECK: %shuffle6.i = shufflevector <4 x i32> %mul4.i, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
152 // CHECK: %mul7.i = mul <4 x i32> %shuffle6.i, %mul4.i
153 // CHECK: %shuffle9.i = shufflevector <4 x i32> %mul7.i, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
154 // CHECK: %mul10.i = mul <4 x i32> %shuffle9.i, %mul7.i
155 // CHECK: {{.*}} = bitcast <4 x i32> %mul10.i to <2 x i64>
156 // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000157 // CHECK: %conv.i = trunc i64 %vecext.i to i32
158 // CHECK: ret i32 %conv.i
159 return _mm512_reduce_mul_epi32(__W);
160}
161
162int test_mm512_reduce_or_epi32(__m512i __W){
163 // CHECK: {{.*}} = bitcast <8 x i64> %__W to <16 x i32>
164 // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
165 // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Michael Zuckermand3436972016-10-30 14:54:05 +0000166 // CHECK: %or.i = or <8 x i32> %shuffle.i, %shuffle1.i
167 // CHECK: %shuffle2.i = shufflevector <8 x i32> %or.i, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
168 // CHECK: %shuffle3.i = shufflevector <8 x i32> %or.i, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
169 // CHECK: %or4.i = or <4 x i32> %shuffle2.i, %shuffle3.i
170 // CHECK: %shuffle6.i = shufflevector <4 x i32> %or4.i, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
171 // CHECK: %or7.i = or <4 x i32> %shuffle6.i, %or4.i
172 // CHECK: %shuffle9.i = shufflevector <4 x i32> %or7.i, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
173 // CHECK: %or10.i = or <4 x i32> %shuffle9.i, %or7.i
174 // CHECK: {{.*}} = bitcast <4 x i32> %or10.i to <2 x i64>
175 // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000176 // CHECK: %conv.i = trunc i64 %vecext.i to i32
177 // CHECK: ret i32 %conv.i
178 return _mm512_reduce_or_epi32(__W);
179}
180
181int test_mm512_reduce_and_epi32(__m512i __W){
182 // CHECK: {{.*}} = bitcast <8 x i64> %__W to <16 x i32>
183 // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
184 // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Michael Zuckermand3436972016-10-30 14:54:05 +0000185 // CHECK: %and.i = and <8 x i32> %shuffle.i, %shuffle1.i
186 // CHECK: %shuffle2.i = shufflevector <8 x i32> %and.i, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
187 // CHECK: %shuffle3.i = shufflevector <8 x i32> %and.i, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
188 // CHECK: %and4.i = and <4 x i32> %shuffle2.i, %shuffle3.i
189 // CHECK: %shuffle6.i = shufflevector <4 x i32> %and4.i, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
190 // CHECK: %and7.i = and <4 x i32> %shuffle6.i, %and4.i
191 // CHECK: %shuffle9.i = shufflevector <4 x i32> %and7.i, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
192 // CHECK: %and10.i = and <4 x i32> %shuffle9.i, %and7.i
193 // CHECK: {{.*}} = bitcast <4 x i32> %and10.i to <2 x i64>
194 // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000195 // CHECK: %conv.i = trunc i64 %vecext.i to i32
196 // CHECK: ret i32 %conv.i
197 return _mm512_reduce_and_epi32(__W);
198}
199
200int test_mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W){
201 // CHECK: {{.*}} = bitcast <8 x i64> %__W to <16 x i32>
202 // CHECK: {{.*}} = bitcast i16 %__M to <16 x i1>
203 // CHECK: {{.*}} = select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> zeroinitializer
204 // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000205 // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Michael Zuckermand3436972016-10-30 14:54:05 +0000206 // CHECK: %add.i = add <8 x i32> %shuffle.i, %shuffle1.i
207 // CHECK: %shuffle2.i = shufflevector <8 x i32> %add.i, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
208 // CHECK: %shuffle3.i = shufflevector <8 x i32> %add.i, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
209 // CHECK: %add4.i = add <4 x i32> %shuffle2.i, %shuffle3.i
210 // CHECK: %shuffle6.i = shufflevector <4 x i32> %add4.i, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
211 // CHECK: %add7.i = add <4 x i32> %shuffle6.i, %add4.i
212 // CHECK: %shuffle9.i = shufflevector <4 x i32> %add7.i, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
213 // CHECK: %add10.i = add <4 x i32> %shuffle9.i, %add7.i
214 // CHECK: {{.*}} = bitcast <4 x i32> %add10.i to <2 x i64>
215 // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000216 // CHECK: %conv.i = trunc i64 %vecext.i to i32
217 // CHECK: ret i32 %conv.i
218 return _mm512_mask_reduce_add_epi32(__M, __W);
219}
220
221int test_mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W){
222 // CHECK: {{.*}} = bitcast <8 x i64> %__W to <16 x i32>
223 // CHECK: {{.*}} = bitcast i16 %__M to <16 x i1>
224 // CHECK: {{.*}} = select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
225 // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000226 // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Michael Zuckermand3436972016-10-30 14:54:05 +0000227 // CHECK: %mul.i = mul <8 x i32> %shuffle.i, %shuffle1.i
228 // CHECK: %shuffle2.i = shufflevector <8 x i32> %mul.i, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
229 // CHECK: %shuffle3.i = shufflevector <8 x i32> %mul.i, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
230 // CHECK: %mul4.i = mul <4 x i32> %shuffle2.i, %shuffle3.i
231 // CHECK: %shuffle6.i = shufflevector <4 x i32> %mul4.i, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
232 // CHECK: %mul7.i = mul <4 x i32> %shuffle6.i, %mul4.i
233 // CHECK: %shuffle9.i = shufflevector <4 x i32> %mul7.i, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
234 // CHECK: %mul10.i = mul <4 x i32> %shuffle9.i, %mul7.i
235 // CHECK: {{.*}} = bitcast <4 x i32> %mul10.i to <2 x i64>
236 // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000237 // CHECK: %conv.i = trunc i64 %vecext.i to i32
238 // CHECK: ret i32 %conv.i
239 return _mm512_mask_reduce_mul_epi32(__M, __W);
240}
241
242int test_mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W){
243 // CHECK: {{.*}} = bitcast <8 x i64> %__W to <16 x i32>
244 // CHECK: {{.*}} = bitcast i16 %__M to <16 x i1>
245 // CHECK: {{.*}} = select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
246 // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
247 // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Michael Zuckermand3436972016-10-30 14:54:05 +0000248 // CHECK: %and.i = and <8 x i32> %shuffle.i, %shuffle1.i
249 // CHECK: %shuffle2.i = shufflevector <8 x i32> %and.i, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
250 // CHECK: %shuffle3.i = shufflevector <8 x i32> %and.i, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
251 // CHECK: %and4.i = and <4 x i32> %shuffle2.i, %shuffle3.i
252 // CHECK: %shuffle6.i = shufflevector <4 x i32> %and4.i, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
253 // CHECK: %and7.i = and <4 x i32> %shuffle6.i, %and4.i
254 // CHECK: %shuffle9.i = shufflevector <4 x i32> %and7.i, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
255 // CHECK: %and10.i = and <4 x i32> %shuffle9.i, %and7.i
256 // CHECK: {{.*}} = bitcast <4 x i32> %and10.i to <2 x i64>
257 // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000258 // CHECK: %conv.i = trunc i64 %vecext.i to i32
259 // CHECK: ret i32 %conv.i
260 return _mm512_mask_reduce_and_epi32(__M, __W);
261}
262
263int test_mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W){
264 // CHECK: {{.*}} = bitcast <8 x i64> %__W to <16 x i32>
265 // CHECK: {{.*}} = bitcast i16 %__M to <16 x i1>
266 // CHECK: {{.*}} = select <16 x i1> {{.*}}, <16 x i32> {{.*}}, <16 x i32> zeroinitializer
267 // CHECK: %shuffle.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
268 // CHECK: %shuffle1.i = shufflevector <16 x i32> {{.*}}, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Michael Zuckermand3436972016-10-30 14:54:05 +0000269 // CHECK: %or.i = or <8 x i32> %shuffle.i, %shuffle1.i
270 // CHECK: %shuffle2.i = shufflevector <8 x i32> %or.i, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
271 // CHECK: %shuffle3.i = shufflevector <8 x i32> %or.i, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
272 // CHECK: %or4.i = or <4 x i32> %shuffle2.i, %shuffle3.i
273 // CHECK: %shuffle6.i = shufflevector <4 x i32> %or4.i, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
274 // CHECK: %or7.i = or <4 x i32> %shuffle6.i, %or4.i
275 // CHECK: %shuffle9.i = shufflevector <4 x i32> %or7.i, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
276 // CHECK: %or10.i = or <4 x i32> %shuffle9.i, %or7.i
277 // CHECK: {{.*}} = bitcast <4 x i32> %or10.i to <2 x i64>
278 // CHECK: %vecext.i = extractelement <2 x i64> {{.*}}, i32 0
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000279 // CHECK: %conv.i = trunc i64 %vecext.i to i32
280 // CHECK: ret i32 %conv.i
281 return _mm512_mask_reduce_or_epi32(__M, __W);
282}
283
284double test_mm512_reduce_add_pd(__m512d __W){
285 // CHECK: %shuffle.i = shufflevector <8 x double> %__W, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
286 // CHECK: %shuffle1.i = shufflevector <8 x double> %__W, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
287 // CHECK: %add.i = fadd <4 x double> %shuffle.i, %shuffle1.i
288 // CHECK: %shuffle2.i = shufflevector <4 x double> %add.i, <4 x double> undef, <2 x i32> <i32 0, i32 1>
289 // CHECK: %shuffle3.i = shufflevector <4 x double> %add.i, <4 x double> undef, <2 x i32> <i32 2, i32 3>
290 // CHECK: %add4.i = fadd <2 x double> %shuffle2.i, %shuffle3.i
291 // CHECK: %shuffle6.i = shufflevector <2 x double> %add4.i, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
292 // CHECK: %add7.i = fadd <2 x double> %add4.i, %shuffle6.i
293 // CHECK: %vecext.i = extractelement <2 x double> %add7.i, i32 0
294 // CHECK: ret double %vecext.i
295 return _mm512_reduce_add_pd(__W);
296}
297
298double test_mm512_reduce_mul_pd(__m512d __W){
299 // CHECK: %shuffle.i = shufflevector <8 x double> %__W, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
300 // CHECK: %shuffle1.i = shufflevector <8 x double> %__W, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
301 // CHECK: %mul.i = fmul <4 x double> %shuffle.i, %shuffle1.i
302 // CHECK: %shuffle2.i = shufflevector <4 x double> %mul.i, <4 x double> undef, <2 x i32> <i32 0, i32 1>
303 // CHECK: %shuffle3.i = shufflevector <4 x double> %mul.i, <4 x double> undef, <2 x i32> <i32 2, i32 3>
304 // CHECK: %mul4.i = fmul <2 x double> %shuffle2.i, %shuffle3.i
305 // CHECK: %shuffle6.i = shufflevector <2 x double> %mul4.i, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
306 // CHECK: %mul7.i = fmul <2 x double> %mul4.i, %shuffle6.i
307 // CHECK: %vecext.i = extractelement <2 x double> %mul7.i, i32 0
308 // CHECK: ret double %vecext.i
309 return _mm512_reduce_mul_pd(__W);
310}
311
312float test_mm512_reduce_add_ps(__m512 __W){
313 // CHECK: %shuffle.i = shufflevector <16 x float> %__W, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
314 // CHECK: %shuffle1.i = shufflevector <16 x float> %__W, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
315 // CHECK: %add.i = fadd <8 x float> %shuffle.i, %shuffle1.i
316 // CHECK: %shuffle2.i = shufflevector <8 x float> %add.i, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
317 // CHECK: %shuffle3.i = shufflevector <8 x float> %add.i, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
318 // CHECK: %add4.i = fadd <4 x float> %shuffle2.i, %shuffle3.i
319 // CHECK: %shuffle6.i = shufflevector <4 x float> %add4.i, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
320 // CHECK: %add7.i = fadd <4 x float> %add4.i, %shuffle6.i
321 // CHECK: %shuffle9.i = shufflevector <4 x float> %add7.i, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
322 // CHECK: %add10.i = fadd <4 x float> %add7.i, %shuffle9.i
323 // CHECK: %vecext.i = extractelement <4 x float> %add10.i, i32 0
324 // CHECK: ret float %vecext.i
325 return _mm512_reduce_add_ps(__W);
326}
327
328float test_mm512_reduce_mul_ps(__m512 __W){
329 // CHECK: %shuffle.i = shufflevector <16 x float> %__W, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
330 // CHECK: %shuffle1.i = shufflevector <16 x float> %__W, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
331 // CHECK: %mul.i = fmul <8 x float> %shuffle.i, %shuffle1.i
332 // CHECK: %shuffle2.i = shufflevector <8 x float> %mul.i, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
333 // CHECK: %shuffle3.i = shufflevector <8 x float> %mul.i, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
334 // CHECK: %mul4.i = fmul <4 x float> %shuffle2.i, %shuffle3.i
335 // CHECK: %shuffle6.i = shufflevector <4 x float> %mul4.i, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
336 // CHECK: %mul7.i = fmul <4 x float> %mul4.i, %shuffle6.i
337 // CHECK: %shuffle9.i = shufflevector <4 x float> %mul7.i, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
338 // CHECK: %mul10.i = fmul <4 x float> %mul7.i, %shuffle9.i
339 // CHECK: %vecext.i = extractelement <4 x float> %mul10.i, i32 0
340 // CHECK: ret float %vecext.i
341 return _mm512_reduce_mul_ps(__W);
342}
343
344double test_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W){
345 // CHECK: {{.*}} = bitcast i8 %__M to <8 x i1>
346 // CHECK: {{.*}} = select <8 x i1> {{.*}}, <8 x double> %__W, <8 x double> zeroinitializer
347 // CHECK: %shuffle.i = shufflevector <8 x double> {{.*}}, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
348 // CHECK: %shuffle1.i = shufflevector <8 x double> {{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
349 // CHECK: %add.i = fadd <4 x double> %shuffle.i, %shuffle1.i
350 // CHECK: %shuffle2.i = shufflevector <4 x double> %add.i, <4 x double> undef, <2 x i32> <i32 0, i32 1>
351 // CHECK: %shuffle3.i = shufflevector <4 x double> %add.i, <4 x double> undef, <2 x i32> <i32 2, i32 3>
352 // CHECK: %add4.i = fadd <2 x double> %shuffle2.i, %shuffle3.i
353 // CHECK: %shuffle6.i = shufflevector <2 x double> %add4.i, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
354 // CHECK: %add7.i = fadd <2 x double> %add4.i, %shuffle6.i
355 // CHECK: %vecext.i = extractelement <2 x double> %add7.i, i32 0
356 // CHECK: ret double %vecext.i
357 return _mm512_mask_reduce_add_pd(__M, __W);
358}
359
360double test_mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W){
361 // CHECK: {{.*}} = bitcast i8 %__M to <8 x i1>
Michael Zuckerman22a03e42016-10-28 17:25:26 +0000362 // CHECK: {{.*}} = select <8 x i1> {{.*}}, <8 x double> %__W, <8 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000363 // CHECK: %shuffle.i = shufflevector <8 x double> {{.*}}, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
364 // CHECK: %shuffle1.i = shufflevector <8 x double> {{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
365 // CHECK: %mul.i = fmul <4 x double> %shuffle.i, %shuffle1.i
366 // CHECK: %shuffle2.i = shufflevector <4 x double> %mul.i, <4 x double> undef, <2 x i32> <i32 0, i32 1>
367 // CHECK: %shuffle3.i = shufflevector <4 x double> %mul.i, <4 x double> undef, <2 x i32> <i32 2, i32 3>
368 // CHECK: %mul4.i = fmul <2 x double> %shuffle2.i, %shuffle3.i
369 // CHECK: %shuffle6.i = shufflevector <2 x double> %mul4.i, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
370 // CHECK: %mul7.i = fmul <2 x double> %mul4.i, %shuffle6.i
371 // CHECK: %vecext.i = extractelement <2 x double> %mul7.i, i32 0
372 // CHECK: ret double %vecext.i
373 return _mm512_mask_reduce_mul_pd(__M, __W);
374}
375
376float test_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W){
377 // CHECK: {{.*}} = bitcast i16 %__M to <16 x i1>
378 // CHECK: {{.*}} = select <16 x i1> {{.*}}, <16 x float> %__W, <16 x float> zeroinitializer
379 // CHECK: %shuffle.i = shufflevector <16 x float> {{.*}}, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
380 // CHECK: %shuffle1.i = shufflevector <16 x float> {{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
381 // CHECK: %add.i = fadd <8 x float> %shuffle.i, %shuffle1.i
382 // CHECK: %shuffle2.i = shufflevector <8 x float> %add.i, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
383 // CHECK: %shuffle3.i = shufflevector <8 x float> %add.i, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
384 // CHECK: %add4.i = fadd <4 x float> %shuffle2.i, %shuffle3.i
385 // CHECK: %shuffle6.i = shufflevector <4 x float> %add4.i, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
386 // CHECK: %add7.i = fadd <4 x float> %add4.i, %shuffle6.i
387 // CHECK: %shuffle9.i = shufflevector <4 x float> %add7.i, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
388 // CHECK: %add10.i = fadd <4 x float> %add7.i, %shuffle9.i
389 // CHECK: %vecext.i = extractelement <4 x float> %add10.i, i32 0
390 // CHECK: ret float %vecext.i
391 return _mm512_mask_reduce_add_ps(__M, __W);
392}
393
394float test_mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W){
395 // CHECK: {{.*}} = bitcast i16 %__M to <16 x i1>
Michael Zuckerman22a03e42016-10-28 17:25:26 +0000396 // CHECK: {{.*}} = select <16 x i1> {{.*}}, <16 x float> %__W, <16 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
Michael Zuckermanfacb37c2016-10-25 07:56:04 +0000397 // CHECK: %shuffle.i = shufflevector <16 x float> {{.*}}, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
398 // CHECK: %shuffle1.i = shufflevector <16 x float> {{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
399 // CHECK: %mul.i = fmul <8 x float> %shuffle.i, %shuffle1.i
400 // CHECK: %shuffle2.i = shufflevector <8 x float> %mul.i, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
401 // CHECK: %shuffle3.i = shufflevector <8 x float> %mul.i, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
402 // CHECK: %mul4.i = fmul <4 x float> %shuffle2.i, %shuffle3.i
403 // CHECK: %shuffle6.i = shufflevector <4 x float> %mul4.i, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
404 // CHECK: %mul7.i = fmul <4 x float> %mul4.i, %shuffle6.i
405 // CHECK: %shuffle9.i = shufflevector <4 x float> %mul7.i, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
406 // CHECK: %mul10.i = fmul <4 x float> %mul7.i, %shuffle9.i
407 // CHECK: %vecext.i = extractelement <4 x float> %mul10.i, i32 0
408 // CHECK: ret float %vecext.i
409 return _mm512_mask_reduce_mul_ps(__M, __W);
410}