blob: 80ad2d5cbc4e0c190531f84d0588986432798565 [file] [log] [blame]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001// RUN: %clang_cc1 -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror |opt -instnamer -S |FileCheck %s
2
3#include <immintrin.h>
4
5long long test_mm512_reduce_max_epi64(__m512i __W){
6 // CHECK: %shuffle1.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
7 // CHECK: %tmp = icmp slt <8 x i64> %shuffle1.i, %__W
8 // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64> %shuffle1.i
9 // CHECK: %shuffle3.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
10 // CHECK: %tmp2 = icmp sgt <8 x i64> %tmp1, %shuffle3.i
11 // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64> %shuffle3.i
12 // CHECK: %shuffle6.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
13 // CHECK: %tmp4 = icmp sgt <8 x i64> %tmp3, %shuffle6.i
14 // CHECK: %.elt.i = extractelement <8 x i1> %tmp4, i32 0
15 // CHECK: %.elt20.i = extractelement <8 x i64> %tmp3, i32 0
16 // CHECK: %shuffle6.elt.i = extractelement <8 x i64> %tmp3, i32 1
17 // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt20.i, i64 %shuffle6.elt.i
18 // CHECK: ret i64 %vecext.i
19 return _mm512_reduce_max_epi64(__W);
20}
21
22unsigned long long test_mm512_reduce_max_epu64(__m512i __W){
23 // CHECK: %shuffle1.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
24 // CHECK: %tmp = icmp ult <8 x i64> %shuffle1.i, %__W
25 // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64> %shuffle1.i
26 // CHECK: %shuffle3.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
27 // CHECK: %tmp2 = icmp ugt <8 x i64> %tmp1, %shuffle3.i
28 // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64> %shuffle3.i
29 // CHECK: %shuffle6.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
30 // CHECK: %tmp4 = icmp ugt <8 x i64> %tmp3, %shuffle6.i
31 // CHECK: %.elt.i = extractelement <8 x i1> %tmp4, i32 0
32 // CHECK: %.elt20.i = extractelement <8 x i64> %tmp3, i32 0
33 // CHECK: %shuffle6.elt.i = extractelement <8 x i64> %tmp3, i32 1
34 // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt20.i, i64 %shuffle6.elt.i
35 // CHECK: ret i64 %vecext.i
36 return _mm512_reduce_max_epu64(__W);
37}
38
39double test_mm512_reduce_max_pd(__m512d __W){
40 // CHECK: %shuffle1.i = shufflevector <8 x double> %__W, <8 x double> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
41 // CHECK: %tmp = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %__W, <8 x double> %shuffle1.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3
42 // CHECK: %shuffle3.i = shufflevector <8 x double> %tmp, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
43 // CHECK: %tmp1 = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %tmp, <8 x double> %shuffle3.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3
44 // CHECK: %shuffle6.i = shufflevector <8 x double> %tmp1, <8 x double> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
45 // CHECK: %tmp2 = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %tmp1, <8 x double> %shuffle6.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3
46 // CHECK: %vecext.i = extractelement <8 x double> %tmp2, i32 0
47 // CHECK: ret double %vecext.i
48 return _mm512_reduce_max_pd(__W);
49}
50
51long long test_mm512_reduce_min_epi64(__m512i __W){
52 // CHECK: %shuffle1.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
53 // CHECK: %tmp = icmp slt <8 x i64> %shuffle1.i, %__W
54 // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64> %shuffle1.i
55 // CHECK: %shuffle3.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
56 // CHECK: %tmp2 = icmp sgt <8 x i64> %tmp1, %shuffle3.i
57 // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64> %shuffle3.i
58 // CHECK: %shuffle6.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
59 // CHECK: %tmp4 = icmp sgt <8 x i64> %tmp3, %shuffle6.i
60 // CHECK: %.elt.i = extractelement <8 x i1> %tmp4, i32 0
61 // CHECK: %.elt20.i = extractelement <8 x i64> %tmp3, i32 0
62 // CHECK: %shuffle6.elt.i = extractelement <8 x i64> %tmp3, i32 1
63 // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt20.i, i64 %shuffle6.elt.i
64 // CHECK: ret i64 %vecext.i
65 return _mm512_reduce_max_epi64(__W);
66}
67
68unsigned long long test_mm512_reduce_min_epu64(__m512i __W){
69 // CHECK: %shuffle1.i = shufflevector <8 x i64> %__W, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
70 // CHECK: %tmp = icmp ult <8 x i64> %shuffle1.i, %__W
71 // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64> %shuffle1.i
72 // CHECK: %shuffle3.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
73 // CHECK: %tmp2 = icmp ugt <8 x i64> %tmp1, %shuffle3.i
74 // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64> %shuffle3.i
75 // CHECK: %shuffle6.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
76 // CHECK: %tmp4 = icmp ugt <8 x i64> %tmp3, %shuffle6.i
77 // CHECK: %.elt.i = extractelement <8 x i1> %tmp4, i32 0
78 // CHECK: %.elt20.i = extractelement <8 x i64> %tmp3, i32 0
79 // CHECK: %shuffle6.elt.i = extractelement <8 x i64> %tmp3, i32 1
80 // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt20.i, i64 %shuffle6.elt.i
81 // CHECK: ret i64 %vecext.i
82 return _mm512_reduce_max_epu64(__W);
83}
84
85double test_mm512_reduce_min_pd(__m512d __W){
86 // CHECK: %shuffle1.i = shufflevector <8 x double> %__W, <8 x double> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
87 // CHECK: %tmp = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %__W, <8 x double> %shuffle1.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3
88 // CHECK: %shuffle3.i = shufflevector <8 x double> %tmp, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
89 // CHECK: %tmp1 = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %tmp, <8 x double> %shuffle3.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3
90 // CHECK: %shuffle6.i = shufflevector <8 x double> %tmp1, <8 x double> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
91 // CHECK: %tmp2 = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %tmp1, <8 x double> %shuffle6.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3
92 // CHECK: %vecext.i = extractelement <8 x double> %tmp2, i32 0
93 // CHECK: ret double %vecext.i
94 return _mm512_reduce_min_pd(__W);
95}
96
97long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){
98 // CHECK: %tmp = bitcast i8 %__M to <8 x i1>
99 // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64> <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>
100 // CHECK: %shuffle1.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
101 // CHECK: %tmp2 = icmp sgt <8 x i64> %tmp1, %shuffle1.i
102 // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64> %shuffle1.i
103 // CHECK: %shuffle4.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
104 // CHECK: %tmp4 = icmp sgt <8 x i64> %tmp3, %shuffle4.i
105 // CHECK: %tmp5 = select <8 x i1> %tmp4, <8 x i64> %tmp3, <8 x i64> %shuffle4.i
106 // CHECK: %shuffle7.i = shufflevector <8 x i64> %tmp5, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
107 // CHECK: %tmp6 = icmp sgt <8 x i64> %tmp5, %shuffle7.i
108 // CHECK: %.elt.i = extractelement <8 x i1> %tmp6, i32 0
109 // CHECK: %.elt22.i = extractelement <8 x i64> %tmp5, i32 0
110 // CHECK: %shuffle7.elt.i = extractelement <8 x i64> %tmp5, i32 1
111 // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt22.i, i64 %shuffle7.elt.i
112 // CHECK: ret i64 %vecext.i
113 return _mm512_mask_reduce_max_epi64(__M, __W);
114}
115
116unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){
117 // CHECK: %tmp = bitcast i8 %__M to <8 x i1>
118 // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64> zeroinitializer
119 // CHECK: %shuffle1.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
120 // CHECK: %tmp2 = icmp ugt <8 x i64> %tmp1, %shuffle1.i
121 // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64> %shuffle1.i
122 // CHECK: %shuffle4.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
123 // CHECK: %tmp4 = icmp ugt <8 x i64> %tmp3, %shuffle4.i
124 // CHECK: %tmp5 = select <8 x i1> %tmp4, <8 x i64> %tmp3, <8 x i64> %shuffle4.i
125 // CHECK: %shuffle7.i = shufflevector <8 x i64> %tmp5, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
126 // CHECK: %tmp6 = icmp ugt <8 x i64> %tmp5, %shuffle7.i
127 // CHECK: %.elt.i = extractelement <8 x i1> %tmp6, i32 0
128 // CHECK: %.elt22.i = extractelement <8 x i64> %tmp5, i32 0
129 // CHECK: %shuffle7.elt.i = extractelement <8 x i64> %tmp5, i32 1
130 // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt22.i, i64 %shuffle7.elt.i
131 // CHECK: ret i64 %vecext.i
132 return _mm512_mask_reduce_max_epu64(__M, __W);
133}
134
135long long test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){
136 // CHECK: %tmp = bitcast i8 %__M to <8 x i1>
137 // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x double> %__W, <8 x double> <double 0x43EFFE0000000000, double 0x43EFFE0000000000, double 0x43EFFE0000000000, double 0x43EFFE0000000000, double 0x43EFFE0000000000, double 0x43EFFE0000000000, double 0x43EFFE0000000000, double 0x43EFFE0000000000>
138 // CHECK: %shuffle1.i = shufflevector <8 x double> %tmp1, <8 x double> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
139 // CHECK: %tmp2 = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %tmp1, <8 x double> %shuffle1.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3
140 // CHECK: %shuffle4.i = shufflevector <8 x double> %tmp2, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
141 // CHECK: %tmp3 = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %tmp2, <8 x double> %shuffle4.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3
142 // CHECK: %shuffle7.i = shufflevector <8 x double> %tmp3, <8 x double> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
143 // CHECK: %tmp4 = tail call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %tmp3, <8 x double> %shuffle7.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3
144 // CHECK: %vecext.i = extractelement <8 x double> %tmp4, i32 0
145 // CHECK: %conv = fptosi double %vecext.i to i64
146 // CHECK: ret i64 %conv
147 return _mm512_mask_reduce_max_pd(__M, __W);
148}
149
150long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){
151 // CHECK: %tmp = bitcast i8 %__M to <8 x i1>
152 // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64> <i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807>
153 // CHECK: %shuffle1.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
154 // CHECK: %tmp2 = icmp slt <8 x i64> %tmp1, %shuffle1.i
155 // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64> %shuffle1.i
156 // CHECK: %shuffle4.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
157 // CHECK: %tmp4 = icmp slt <8 x i64> %tmp3, %shuffle4.i
158 // CHECK: %tmp5 = select <8 x i1> %tmp4, <8 x i64> %tmp3, <8 x i64> %shuffle4.i
159 // CHECK: %shuffle7.i = shufflevector <8 x i64> %tmp5, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
160 // CHECK: %tmp6 = icmp slt <8 x i64> %tmp5, %shuffle7.i
161 // CHECK: %.elt.i = extractelement <8 x i1> %tmp6, i32 0
162 // CHECK: %.elt22.i = extractelement <8 x i64> %tmp5, i32 0
163 // CHECK: %shuffle7.elt.i = extractelement <8 x i64> %tmp5, i32 1
164 // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt22.i, i64 %shuffle7.elt.i
165 // CHECK: ret i64 %vecext.i
166 return _mm512_mask_reduce_min_epi64(__M, __W);
167}
168
169long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){
170 // CHECK: %tmp = bitcast i8 %__M to <8 x i1>
171 // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x i64> %__W, <8 x i64> zeroinitializer
172 // CHECK: %shuffle1.i = shufflevector <8 x i64> %tmp1, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
173 // CHECK: %tmp2 = icmp ugt <8 x i64> %tmp1, %shuffle1.i
174 // CHECK: %tmp3 = select <8 x i1> %tmp2, <8 x i64> %tmp1, <8 x i64> %shuffle1.i
175 // CHECK: %shuffle4.i = shufflevector <8 x i64> %tmp3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
176 // CHECK: %tmp4 = icmp ugt <8 x i64> %tmp3, %shuffle4.i
177 // CHECK: %tmp5 = select <8 x i1> %tmp4, <8 x i64> %tmp3, <8 x i64> %shuffle4.i
178 // CHECK: %shuffle7.i = shufflevector <8 x i64> %tmp5, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
179 // CHECK: %tmp6 = icmp ugt <8 x i64> %tmp5, %shuffle7.i
180 // CHECK: %.elt.i = extractelement <8 x i1> %tmp6, i32 0
181 // CHECK: %.elt22.i = extractelement <8 x i64> %tmp5, i32 0
182 // CHECK: %shuffle7.elt.i = extractelement <8 x i64> %tmp5, i32 1
183 // CHECK: %vecext.i = select i1 %.elt.i, i64 %.elt22.i, i64 %shuffle7.elt.i
184 // CHECK: ret i64 %vecext.i
185 return _mm512_mask_reduce_max_epu64(__M, __W);
186}
187
188double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){
189 // CHECK: %tmp = bitcast i8 %__M to <8 x i1>
190 // CHECK: %tmp1 = select <8 x i1> %tmp, <8 x double> %__W, <8 x double> <double 0x43DFFC0000000000, double 0x43DFFC0000000000, double 0x43DFFC0000000000, double 0x43DFFC0000000000, double 0x43DFFC0000000000, double 0x43DFFC0000000000, double 0x43DFFC0000000000, double 0x43DFFC0000000000>
191 // CHECK: %shuffle1.i = shufflevector <8 x double> %tmp1, <8 x double> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
192 // CHECK: %tmp2 = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %tmp1, <8 x double> %shuffle1.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3
193 // CHECK: %shuffle4.i = shufflevector <8 x double> %tmp2, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
194 // CHECK: %tmp3 = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %tmp2, <8 x double> %shuffle4.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3
195 // CHECK: %shuffle7.i = shufflevector <8 x double> %tmp3, <8 x double> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
196 // CHECK: %tmp4 = tail call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %tmp3, <8 x double> %shuffle7.i, <8 x double> zeroinitializer, i8 -1, i32 4) #3
197 // CHECK: %vecext.i = extractelement <8 x double> %tmp4, i32 0
198 // CHECK: ret double %vecext.i
199 return _mm512_mask_reduce_min_pd(__M, __W);
200}
201
202int test_mm512_reduce_max_epi32(__m512i __W){
203 // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
204 // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
205 // CHECK: %tmp1 = icmp sgt <16 x i32> %tmp, %shuffle1.i
206 // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32> %shuffle1.i
207 // CHECK: %shuffle3.i = shufflevector <16 x i32> %tmp2, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
208 // CHECK: %tmp3 = icmp sgt <16 x i32> %tmp2, %shuffle3.i
209 // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32> %shuffle3.i
210 // CHECK: %shuffle6.i = shufflevector <16 x i32> %tmp4, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
211 // CHECK: %tmp5 = icmp sgt <16 x i32> %tmp4, %shuffle6.i
212 // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> %shuffle6.i
213 // CHECK: %shuffle9.i = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
214 // CHECK: %tmp7 = icmp sgt <16 x i32> %tmp6, %shuffle9.i
215 // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32> %shuffle9.i
216 // CHECK: %tmp9 = bitcast <16 x i32> %tmp8 to <8 x i64>
217 // CHECK: %vecext.i = extractelement <8 x i64> %tmp9, i32 0
218 // CHECK: %conv.i = trunc i64 %vecext.i to i32
219 // CHECK: ret i32 %conv.i
220 return _mm512_reduce_max_epi32(__W);
221}
222
223unsigned int test_mm512_reduce_max_epu32(__m512i __W){
224 // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
225 // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
226 // CHECK: %tmp1 = icmp ugt <16 x i32> %tmp, %shuffle1.i
227 // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32> %shuffle1.i
228 // CHECK: %shuffle3.i = shufflevector <16 x i32> %tmp2, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
229 // CHECK: %tmp3 = icmp ugt <16 x i32> %tmp2, %shuffle3.i
230 // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32> %shuffle3.i
231 // CHECK: %shuffle6.i = shufflevector <16 x i32> %tmp4, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
232 // CHECK: %tmp5 = icmp ugt <16 x i32> %tmp4, %shuffle6.i
233 // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> %shuffle6.i
234 // CHECK: %shuffle9.i = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
235 // CHECK: %tmp7 = icmp ugt <16 x i32> %tmp6, %shuffle9.i
236 // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32> %shuffle9.i
237 // CHECK: %tmp9 = bitcast <16 x i32> %tmp8 to <8 x i64>
238 // CHECK: %vecext.i = extractelement <8 x i64> %tmp9, i32 0
239 // CHECK: %conv.i = trunc i64 %vecext.i to i32
240 // CHECK: ret i32 %conv.i
241 return _mm512_reduce_max_epu32(__W);
242}
243
244float test_mm512_reduce_max_ps(__m512 __W){
245 // CHECK: %shuffle1.i = shufflevector <16 x float> %__W, <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
246 // CHECK: %tmp = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %__W, <16 x float> %shuffle1.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
247 // CHECK: %shuffle3.i = shufflevector <16 x float> %tmp, <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
248 // CHECK: %tmp1 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %tmp, <16 x float> %shuffle3.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
249 // CHECK: %shuffle6.i = shufflevector <16 x float> %tmp1, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
250 // CHECK: %tmp2 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %tmp1, <16 x float> %shuffle6.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
251 // CHECK: %shuffle9.i = shufflevector <16 x float> %tmp2, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
252 // CHECK: %tmp3 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %tmp2, <16 x float> %shuffle9.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
253 // CHECK: %vecext.i = extractelement <16 x float> %tmp3, i32 0
254 // CHECK: ret float %vecext.i
255 return _mm512_reduce_max_ps(__W);
256}
257
258int test_mm512_reduce_min_epi32(__m512i __W){
259 // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
260 // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
261 // CHECK: %tmp1 = icmp slt <16 x i32> %tmp, %shuffle1.i
262 // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32> %shuffle1.i
263 // CHECK: %shuffle3.i = shufflevector <16 x i32> %tmp2, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
264 // CHECK: %tmp3 = icmp slt <16 x i32> %tmp2, %shuffle3.i
265 // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32> %shuffle3.i
266 // CHECK: %shuffle6.i = shufflevector <16 x i32> %tmp4, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
267 // CHECK: %tmp5 = icmp slt <16 x i32> %tmp4, %shuffle6.i
268 // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> %shuffle6.i
269 // CHECK: %shuffle9.i = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
270 // CHECK: %tmp7 = icmp slt <16 x i32> %tmp6, %shuffle9.i
271 // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32> %shuffle9.i
272 // CHECK: %tmp9 = bitcast <16 x i32> %tmp8 to <8 x i64>
273 // CHECK: %vecext.i = extractelement <8 x i64> %tmp9, i32 0
274 // CHECK: %conv.i = trunc i64 %vecext.i to i32
275 // CHECK: ret i32 %conv.i
276 return _mm512_reduce_min_epi32(__W);
277}
278
279unsigned int test_mm512_reduce_min_epu32(__m512i __W){
280 // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
281 // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
282 // CHECK: %tmp1 = icmp ult <16 x i32> %tmp, %shuffle1.i
283 // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32> %shuffle1.i
284 // CHECK: %shuffle3.i = shufflevector <16 x i32> %tmp2, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
285 // CHECK: %tmp3 = icmp ult <16 x i32> %tmp2, %shuffle3.i
286 // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32> %shuffle3.i
287 // CHECK: %shuffle6.i = shufflevector <16 x i32> %tmp4, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
288 // CHECK: %tmp5 = icmp ult <16 x i32> %tmp4, %shuffle6.i
289 // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> %shuffle6.i
290 // CHECK: %shuffle9.i = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
291 // CHECK: %tmp7 = icmp ult <16 x i32> %tmp6, %shuffle9.i
292 // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32> %shuffle9.i
293 // CHECK: %tmp9 = bitcast <16 x i32> %tmp8 to <8 x i64>
294 // CHECK: %vecext.i = extractelement <8 x i64> %tmp9, i32 0
295 // CHECK: %conv.i = trunc i64 %vecext.i to i32
296 // CHECK: ret i32 %conv.i
297 return _mm512_reduce_min_epu32(__W);
298}
299
300float test_mm512_reduce_min_ps(__m512 __W){
301 // CHECK: %shuffle1.i = shufflevector <16 x float> %__W, <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
302 // CHECK: %tmp = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %__W, <16 x float> %shuffle1.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
303 // CHECK: %shuffle3.i = shufflevector <16 x float> %tmp, <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
304 // CHECK: %tmp1 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %tmp, <16 x float> %shuffle3.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
305 // CHECK: %shuffle6.i = shufflevector <16 x float> %tmp1, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
306 // CHECK: %tmp2 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %tmp1, <16 x float> %shuffle6.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
307 // CHECK: %shuffle9.i = shufflevector <16 x float> %tmp2, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
308 // CHECK: %tmp3 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %tmp2, <16 x float> %shuffle9.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
309 // CHECK: %vecext.i = extractelement <16 x float> %tmp3, i32 0
310 // CHECK: ret float %vecext.i
311 return _mm512_reduce_min_ps(__W);
312}
313
314int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){
315 // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
316 // CHECK: %tmp1 = bitcast i16 %__M to <16 x i1>
317 // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
318 // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp2, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
319 // CHECK: %tmp3 = icmp sgt <16 x i32> %tmp2, %shuffle1.i
320 // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32> %shuffle1.i
321 // CHECK: %shuffle4.i = shufflevector <16 x i32> %tmp4, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
322 // CHECK: %tmp5 = icmp sgt <16 x i32> %tmp4, %shuffle4.i
323 // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> %shuffle4.i
324 // CHECK: %shuffle7.i = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
325 // CHECK: %tmp7 = icmp sgt <16 x i32> %tmp6, %shuffle7.i
326 // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32> %shuffle7.i
327 // CHECK: %shuffle10.i = shufflevector <16 x i32> %tmp8, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
328 // CHECK: %tmp9 = icmp sgt <16 x i32> %tmp8, %shuffle10.i
329 // CHECK: %tmp10 = select <16 x i1> %tmp9, <16 x i32> %tmp8, <16 x i32> %shuffle10.i
330 // CHECK: %tmp11 = bitcast <16 x i32> %tmp10 to <8 x i64>
331 // CHECK: %vecext.i = extractelement <8 x i64> %tmp11, i32 0
332 // CHECK: %conv.i = trunc i64 %vecext.i to i32
333 // CHECK: ret i32 %conv.i
334 return _mm512_mask_reduce_max_epi32(__M, __W);
335}
336
337unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){
338 // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
339 // CHECK: %tmp1 = bitcast i16 %__M to <16 x i1>
340 // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32> zeroinitializer
341 // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp2, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
342 // CHECK: %tmp3 = icmp ugt <16 x i32> %tmp2, %shuffle1.i
343 // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32> %shuffle1.i
344 // CHECK: %shuffle4.i = shufflevector <16 x i32> %tmp4, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
345 // CHECK: %tmp5 = icmp ugt <16 x i32> %tmp4, %shuffle4.i
346 // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> %shuffle4.i
347 // CHECK: %shuffle7.i = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
348 // CHECK: %tmp7 = icmp ugt <16 x i32> %tmp6, %shuffle7.i
349 // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32> %shuffle7.i
350 // CHECK: %shuffle10.i = shufflevector <16 x i32> %tmp8, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
351 // CHECK: %tmp9 = icmp ugt <16 x i32> %tmp8, %shuffle10.i
352 // CHECK: %tmp10 = select <16 x i1> %tmp9, <16 x i32> %tmp8, <16 x i32> %shuffle10.i
353 // CHECK: %tmp11 = bitcast <16 x i32> %tmp10 to <8 x i64>
354 // CHECK: %vecext.i = extractelement <8 x i64> %tmp11, i32 0
355 // CHECK: %conv.i = trunc i64 %vecext.i to i32
356 // CHECK: ret i32 %conv.i
357 return _mm512_mask_reduce_max_epu32(__M, __W);
358}
359
360float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){
361 // CHECK: %tmp = bitcast i16 %__M to <16 x i1>
362 // CHECK: %tmp1 = select <16 x i1> %tmp, <16 x float> %__W, <16 x float> <float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000, float 0x41EFF00000000000>
363 // CHECK: %shuffle1.i = shufflevector <16 x float> %tmp1, <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
364 // CHECK: %tmp2 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %tmp1, <16 x float> %shuffle1.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
365 // CHECK: %shuffle4.i = shufflevector <16 x float> %tmp2, <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
366 // CHECK: %tmp3 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %tmp2, <16 x float> %shuffle4.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
367 // CHECK: %shuffle7.i = shufflevector <16 x float> %tmp3, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
368 // CHECK: %tmp4 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %tmp3, <16 x float> %shuffle7.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
369 // CHECK: %shuffle10.i = shufflevector <16 x float> %tmp4, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
370 // CHECK: %tmp5 = tail call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %tmp4, <16 x float> %shuffle10.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
371 // CHECK: %vecext.i = extractelement <16 x float> %tmp5, i32 0
372 // CHECK: ret float %vecext.i
373 return _mm512_mask_reduce_max_ps(__M, __W);
374}
375
376int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){
377 // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
378 // CHECK: %tmp1 = bitcast i16 %__M to <16 x i1>
379 // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
380 // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp2, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
381 // CHECK: %tmp3 = icmp slt <16 x i32> %tmp2, %shuffle1.i
382 // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32> %shuffle1.i
383 // CHECK: %shuffle4.i = shufflevector <16 x i32> %tmp4, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
384 // CHECK: %tmp5 = icmp slt <16 x i32> %tmp4, %shuffle4.i
385 // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> %shuffle4.i
386 // CHECK: %shuffle7.i = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
387 // CHECK: %tmp7 = icmp slt <16 x i32> %tmp6, %shuffle7.i
388 // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32> %shuffle7.i
389 // CHECK: %shuffle10.i = shufflevector <16 x i32> %tmp8, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
390 // CHECK: %tmp9 = icmp slt <16 x i32> %tmp8, %shuffle10.i
391 // CHECK: %tmp10 = select <16 x i1> %tmp9, <16 x i32> %tmp8, <16 x i32> %shuffle10.i
392 // CHECK: %tmp11 = bitcast <16 x i32> %tmp10 to <8 x i64>
393 // CHECK: %vecext.i = extractelement <8 x i64> %tmp11, i32 0
394 // CHECK: %conv.i = trunc i64 %vecext.i to i32
395 // CHECK: ret i32 %conv.i
396 return _mm512_mask_reduce_min_epi32(__M, __W);
397}
398
399unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){
400 // CHECK: %tmp = bitcast <8 x i64> %__W to <16 x i32>
401 // CHECK: %tmp1 = bitcast i16 %__M to <16 x i1>
402 // CHECK: %tmp2 = select <16 x i1> %tmp1, <16 x i32> %tmp, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
403 // CHECK: %shuffle1.i = shufflevector <16 x i32> %tmp2, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
404 // CHECK: %tmp3 = icmp ult <16 x i32> %tmp2, %shuffle1.i
405 // CHECK: %tmp4 = select <16 x i1> %tmp3, <16 x i32> %tmp2, <16 x i32> %shuffle1.i
406 // CHECK: %shuffle4.i = shufflevector <16 x i32> %tmp4, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
407 // CHECK: %tmp5 = icmp ult <16 x i32> %tmp4, %shuffle4.i
408 // CHECK: %tmp6 = select <16 x i1> %tmp5, <16 x i32> %tmp4, <16 x i32> %shuffle4.i
409 // CHECK: %shuffle7.i = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
410 // CHECK: %tmp7 = icmp ult <16 x i32> %tmp6, %shuffle7.i
411 // CHECK: %tmp8 = select <16 x i1> %tmp7, <16 x i32> %tmp6, <16 x i32> %shuffle7.i
412 // CHECK: %shuffle10.i = shufflevector <16 x i32> %tmp8, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
413 // CHECK: %tmp9 = icmp ult <16 x i32> %tmp8, %shuffle10.i
414 // CHECK: %tmp10 = select <16 x i1> %tmp9, <16 x i32> %tmp8, <16 x i32> %shuffle10.i
415 // CHECK: %tmp11 = bitcast <16 x i32> %tmp10 to <8 x i64>
416 // CHECK: %vecext.i = extractelement <8 x i64> %tmp11, i32 0
417 // CHECK: %conv.i = trunc i64 %vecext.i to i32
418 // CHECK: ret i32 %conv.i
419 return _mm512_mask_reduce_min_epu32(__M, __W);
420}
421
422float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){
423 // CHECK: %tmp = bitcast i16 %__M to <16 x i1>
424 // CHECK: %tmp1 = select <16 x i1> %tmp, <16 x float> %__W, <16 x float> <float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000, float 0x41DFE00000000000>
425 // CHECK: %shuffle1.i = shufflevector <16 x float> %tmp1, <16 x float> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
426 // CHECK: %tmp2 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %tmp1, <16 x float> %shuffle1.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
427 // CHECK: %shuffle4.i = shufflevector <16 x float> %tmp2, <16 x float> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
428 // CHECK: %tmp3 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %tmp2, <16 x float> %shuffle4.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
429 // CHECK: %shuffle7.i = shufflevector <16 x float> %tmp3, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
430 // CHECK: %tmp4 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %tmp3, <16 x float> %shuffle7.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
431 // CHECK: %shuffle10.i = shufflevector <16 x float> %tmp4, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
432 // CHECK: %tmp5 = tail call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %tmp4, <16 x float> %shuffle10.i, <16 x float> zeroinitializer, i16 -1, i32 4) #3
433 // CHECK: %vecext.i = extractelement <16 x float> %tmp5, i32 0
434 // CHECK: ret float %vecext.i
435 return _mm512_mask_reduce_min_ps(__M, __W);
436}
437