blob: 2081cef754606c311d56a0f41816d80293fb7353 [file] [log] [blame]
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00001// RUN: %clang_cc1 -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s
Michael Zuckerman25eb4202016-10-29 10:29:20 +00002
3#include <immintrin.h>
4
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00005// CHECK-LABEL: define i64 @test_mm512_reduce_max_epi64(<8 x i64> %__W) #0 {
6// CHECK: [[_COMPOUNDLITERAL_I_I11_I:%.*]] = alloca <8 x i64>, align 64
7// CHECK: [[__A_ADDR_I12_I:%.*]] = alloca <8 x i64>, align 64
8// CHECK: [[__B_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
9// CHECK: [[_COMPOUNDLITERAL_I_I8_I:%.*]] = alloca <8 x i64>, align 64
10// CHECK: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64
11// CHECK: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64
12// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
13// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
14// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
15// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64
16// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
17// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
18// CHECK: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
19// CHECK: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64
20// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
21// CHECK: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
22// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
23// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
24// CHECK: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
25// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP4]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
26// CHECK: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__A_ADDR_I_I]], align 64
27// CHECK: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__B_ADDR_I_I]], align 64
28// CHECK: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
29// CHECK: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
30// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
31// CHECK: [[TMP7:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
32// CHECK: [[TMP8:%.*]] = icmp sgt <8 x i64> [[TMP5]], [[TMP6]]
33// CHECK: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]]
34// CHECK: store <8 x i64> [[TMP9]], <8 x i64>* [[__V_ADDR_I]], align 64
35// CHECK: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
36// CHECK: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
37// CHECK: [[SHUFFLE2_I:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
38// CHECK: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
39// CHECK: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
40// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP12]], <8 x i64> [[TMP13]], <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
41// CHECK: store <8 x i64> [[SHUFFLE2_I]], <8 x i64>* [[__A_ADDR_I12_I]], align 64
42// CHECK: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__B_ADDR_I13_I]], align 64
43// CHECK: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I12_I]], align 64
44// CHECK: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I13_I]], align 64
45// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
46// CHECK: [[TMP16:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
47// CHECK: [[TMP17:%.*]] = icmp sgt <8 x i64> [[TMP14]], [[TMP15]]
48// CHECK: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]]
49// CHECK: store <8 x i64> [[TMP18]], <8 x i64>* [[__V_ADDR_I]], align 64
50// CHECK: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
51// CHECK: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
52// CHECK: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP19]], <8 x i64> [[TMP20]], <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
53// CHECK: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
54// CHECK: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
55// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <8 x i64> [[TMP21]], <8 x i64> [[TMP22]], <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
56// CHECK: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__A_ADDR_I9_I]], align 64
57// CHECK: store <8 x i64> [[SHUFFLE6_I]], <8 x i64>* [[__B_ADDR_I10_I]], align 64
58// CHECK: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64
59// CHECK: [[TMP24:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64
60// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I8_I]], align 64
61// CHECK: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I8_I]], align 64
62// CHECK: [[TMP26:%.*]] = icmp sgt <8 x i64> [[TMP23]], [[TMP24]]
63// CHECK: [[TMP27:%.*]] = select <8 x i1> [[TMP26]], <8 x i64> [[TMP23]], <8 x i64> [[TMP24]]
64// CHECK: store <8 x i64> [[TMP27]], <8 x i64>* [[__V_ADDR_I]], align 64
65// CHECK: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
66// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP28]], i32 0
67// CHECK: ret i64 [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +000068long long test_mm512_reduce_max_epi64(__m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +000069 return _mm512_reduce_max_epi64(__W);
70}
71
Sanjay Patel1acd2cf2017-09-25 19:56:57 +000072// CHECK-LABEL: define i64 @test_mm512_reduce_max_epu64(<8 x i64> %__W) #0 {
73// CHECK: [[_COMPOUNDLITERAL_I_I11_I:%.*]] = alloca <8 x i64>, align 64
74// CHECK: [[__A_ADDR_I12_I:%.*]] = alloca <8 x i64>, align 64
75// CHECK: [[__B_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
76// CHECK: [[_COMPOUNDLITERAL_I_I8_I:%.*]] = alloca <8 x i64>, align 64
77// CHECK: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64
78// CHECK: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64
79// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
80// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
81// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
82// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64
83// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
84// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
85// CHECK: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
86// CHECK: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64
87// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
88// CHECK: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
89// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
90// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
91// CHECK: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
92// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP4]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
93// CHECK: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__A_ADDR_I_I]], align 64
94// CHECK: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__B_ADDR_I_I]], align 64
95// CHECK: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
96// CHECK: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
97// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
98// CHECK: [[TMP7:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
99// CHECK: [[TMP8:%.*]] = icmp ugt <8 x i64> [[TMP5]], [[TMP6]]
100// CHECK: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]]
101// CHECK: store <8 x i64> [[TMP9]], <8 x i64>* [[__V_ADDR_I]], align 64
102// CHECK: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
103// CHECK: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
104// CHECK: [[SHUFFLE2_I:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
105// CHECK: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
106// CHECK: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
107// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP12]], <8 x i64> [[TMP13]], <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
108// CHECK: store <8 x i64> [[SHUFFLE2_I]], <8 x i64>* [[__A_ADDR_I12_I]], align 64
109// CHECK: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__B_ADDR_I13_I]], align 64
110// CHECK: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I12_I]], align 64
111// CHECK: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I13_I]], align 64
112// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
113// CHECK: [[TMP16:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
114// CHECK: [[TMP17:%.*]] = icmp ugt <8 x i64> [[TMP14]], [[TMP15]]
115// CHECK: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]]
116// CHECK: store <8 x i64> [[TMP18]], <8 x i64>* [[__V_ADDR_I]], align 64
117// CHECK: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
118// CHECK: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
119// CHECK: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP19]], <8 x i64> [[TMP20]], <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
120// CHECK: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
121// CHECK: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
122// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <8 x i64> [[TMP21]], <8 x i64> [[TMP22]], <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
123// CHECK: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__A_ADDR_I9_I]], align 64
124// CHECK: store <8 x i64> [[SHUFFLE6_I]], <8 x i64>* [[__B_ADDR_I10_I]], align 64
125// CHECK: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64
126// CHECK: [[TMP24:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64
127// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I8_I]], align 64
128// CHECK: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I8_I]], align 64
129// CHECK: [[TMP26:%.*]] = icmp ugt <8 x i64> [[TMP23]], [[TMP24]]
130// CHECK: [[TMP27:%.*]] = select <8 x i1> [[TMP26]], <8 x i64> [[TMP23]], <8 x i64> [[TMP24]]
131// CHECK: store <8 x i64> [[TMP27]], <8 x i64>* [[__V_ADDR_I]], align 64
132// CHECK: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
133// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP28]], i32 0
134// CHECK: ret i64 [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000135unsigned long long test_mm512_reduce_max_epu64(__m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000136 return _mm512_reduce_max_epu64(__W);
137}
138
Sanjay Patel1acd2cf2017-09-25 19:56:57 +0000139// CHECK-LABEL: define double @test_mm512_reduce_max_pd(<8 x double> %__W) #0 {
140// CHECK: [[_COMPOUNDLITERAL_I_I11_I:%.*]] = alloca <8 x double>, align 64
141// CHECK: [[__A_ADDR_I12_I:%.*]] = alloca <8 x double>, align 64
142// CHECK: [[__B_ADDR_I13_I:%.*]] = alloca <8 x double>, align 64
143// CHECK: [[_COMPOUNDLITERAL_I_I8_I:%.*]] = alloca <8 x double>, align 64
144// CHECK: [[__A_ADDR_I9_I:%.*]] = alloca <8 x double>, align 64
145// CHECK: [[__B_ADDR_I10_I:%.*]] = alloca <8 x double>, align 64
146// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x double>, align 64
147// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64
148// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x double>, align 64
149// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64
150// CHECK: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64
151// CHECK: store <8 x double> %__W, <8 x double>* [[__W_ADDR]], align 64
152// CHECK: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64
153// CHECK: store <8 x double> [[TMP0]], <8 x double>* [[__V_ADDR_I]], align 64
154// CHECK: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
155// CHECK: [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
156// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
157// CHECK: [[TMP3:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
158// CHECK: [[TMP4:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
159// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> [[TMP4]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
160// CHECK: store <8 x double> [[SHUFFLE_I]], <8 x double>* [[__A_ADDR_I_I]], align 64
161// CHECK: store <8 x double> [[SHUFFLE1_I]], <8 x double>* [[__B_ADDR_I_I]], align 64
162// CHECK: [[TMP5:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64
163// CHECK: [[TMP6:%.*]] = load <8 x double>, <8 x double>* [[__B_ADDR_I_I]], align 64
164// CHECK: store <8 x double> zeroinitializer, <8 x double>* [[_COMPOUNDLITERAL_I_I_I]], align 64
165// CHECK: [[TMP7:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I_I]], align 64
166// CHECK: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> [[TMP5]], <8 x double> [[TMP6]], <8 x double> [[TMP7]], i8 -1, i32 4) #2
167// CHECK: store <8 x double> [[TMP8]], <8 x double>* [[__V_ADDR_I]], align 64
168// CHECK: [[TMP9:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
169// CHECK: [[TMP10:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
170// CHECK: [[SHUFFLE2_I:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
171// CHECK: [[TMP11:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
172// CHECK: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
173// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> [[TMP12]], <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
174// CHECK: store <8 x double> [[SHUFFLE2_I]], <8 x double>* [[__A_ADDR_I12_I]], align 64
175// CHECK: store <8 x double> [[SHUFFLE3_I]], <8 x double>* [[__B_ADDR_I13_I]], align 64
176// CHECK: [[TMP13:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I12_I]], align 64
177// CHECK: [[TMP14:%.*]] = load <8 x double>, <8 x double>* [[__B_ADDR_I13_I]], align 64
178// CHECK: store <8 x double> zeroinitializer, <8 x double>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
179// CHECK: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
180// CHECK: [[TMP16:%.*]] = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> [[TMP13]], <8 x double> [[TMP14]], <8 x double> [[TMP15]], i8 -1, i32 4) #2
181// CHECK: store <8 x double> [[TMP16]], <8 x double>* [[__V_ADDR_I]], align 64
182// CHECK: [[TMP17:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
183// CHECK: [[TMP18:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
184// CHECK: [[SHUFFLE5_I:%.*]] = shufflevector <8 x double> [[TMP17]], <8 x double> [[TMP18]], <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
185// CHECK: [[TMP19:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
186// CHECK: [[TMP20:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
187// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <8 x double> [[TMP19]], <8 x double> [[TMP20]], <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
188// CHECK: store <8 x double> [[SHUFFLE5_I]], <8 x double>* [[__A_ADDR_I9_I]], align 64
189// CHECK: store <8 x double> [[SHUFFLE6_I]], <8 x double>* [[__B_ADDR_I10_I]], align 64
190// CHECK: [[TMP21:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I9_I]], align 64
191// CHECK: [[TMP22:%.*]] = load <8 x double>, <8 x double>* [[__B_ADDR_I10_I]], align 64
192// CHECK: store <8 x double> zeroinitializer, <8 x double>* [[_COMPOUNDLITERAL_I_I8_I]], align 64
193// CHECK: [[TMP23:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I8_I]], align 64
194// CHECK: [[TMP24:%.*]] = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> [[TMP21]], <8 x double> [[TMP22]], <8 x double> [[TMP23]], i8 -1, i32 4) #2
195// CHECK: store <8 x double> [[TMP24]], <8 x double>* [[__V_ADDR_I]], align 64
196// CHECK: [[TMP25:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
197// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x double> [[TMP25]], i32 0
198// CHECK: ret double [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000199double test_mm512_reduce_max_pd(__m512d __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000200 return _mm512_reduce_max_pd(__W);
201}
202
Sanjay Patel1acd2cf2017-09-25 19:56:57 +0000203// CHECK-LABEL: define i64 @test_mm512_reduce_min_epi64(<8 x i64> %__W) #0 {
204// CHECK: [[_COMPOUNDLITERAL_I_I11_I:%.*]] = alloca <8 x i64>, align 64
205// CHECK: [[__A_ADDR_I12_I:%.*]] = alloca <8 x i64>, align 64
206// CHECK: [[__B_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
207// CHECK: [[_COMPOUNDLITERAL_I_I8_I:%.*]] = alloca <8 x i64>, align 64
208// CHECK: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64
209// CHECK: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64
210// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
211// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
212// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
213// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64
214// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
215// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
216// CHECK: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
217// CHECK: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64
218// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
219// CHECK: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
220// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
221// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
222// CHECK: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
223// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP4]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
224// CHECK: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__A_ADDR_I_I]], align 64
225// CHECK: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__B_ADDR_I_I]], align 64
226// CHECK: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
227// CHECK: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
228// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
229// CHECK: [[TMP7:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
230// CHECK: [[TMP8:%.*]] = icmp sgt <8 x i64> [[TMP5]], [[TMP6]]
231// CHECK: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]]
232// CHECK: store <8 x i64> [[TMP9]], <8 x i64>* [[__V_ADDR_I]], align 64
233// CHECK: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
234// CHECK: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
235// CHECK: [[SHUFFLE2_I:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
236// CHECK: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
237// CHECK: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
238// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP12]], <8 x i64> [[TMP13]], <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
239// CHECK: store <8 x i64> [[SHUFFLE2_I]], <8 x i64>* [[__A_ADDR_I12_I]], align 64
240// CHECK: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__B_ADDR_I13_I]], align 64
241// CHECK: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I12_I]], align 64
242// CHECK: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I13_I]], align 64
243// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
244// CHECK: [[TMP16:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
245// CHECK: [[TMP17:%.*]] = icmp sgt <8 x i64> [[TMP14]], [[TMP15]]
246// CHECK: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]]
247// CHECK: store <8 x i64> [[TMP18]], <8 x i64>* [[__V_ADDR_I]], align 64
248// CHECK: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
249// CHECK: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
250// CHECK: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP19]], <8 x i64> [[TMP20]], <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
251// CHECK: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
252// CHECK: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
253// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <8 x i64> [[TMP21]], <8 x i64> [[TMP22]], <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
254// CHECK: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__A_ADDR_I9_I]], align 64
255// CHECK: store <8 x i64> [[SHUFFLE6_I]], <8 x i64>* [[__B_ADDR_I10_I]], align 64
256// CHECK: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64
257// CHECK: [[TMP24:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64
258// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I8_I]], align 64
259// CHECK: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I8_I]], align 64
260// CHECK: [[TMP26:%.*]] = icmp sgt <8 x i64> [[TMP23]], [[TMP24]]
261// CHECK: [[TMP27:%.*]] = select <8 x i1> [[TMP26]], <8 x i64> [[TMP23]], <8 x i64> [[TMP24]]
262// CHECK: store <8 x i64> [[TMP27]], <8 x i64>* [[__V_ADDR_I]], align 64
263// CHECK: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
264// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP28]], i32 0
265// CHECK: ret i64 [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000266long long test_mm512_reduce_min_epi64(__m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000267 return _mm512_reduce_max_epi64(__W);
268}
269
Sanjay Patel1acd2cf2017-09-25 19:56:57 +0000270// CHECK-LABEL: define i64 @test_mm512_reduce_min_epu64(<8 x i64> %__W) #0 {
271// CHECK: [[_COMPOUNDLITERAL_I_I11_I:%.*]] = alloca <8 x i64>, align 64
272// CHECK: [[__A_ADDR_I12_I:%.*]] = alloca <8 x i64>, align 64
273// CHECK: [[__B_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
274// CHECK: [[_COMPOUNDLITERAL_I_I8_I:%.*]] = alloca <8 x i64>, align 64
275// CHECK: [[__A_ADDR_I9_I:%.*]] = alloca <8 x i64>, align 64
276// CHECK: [[__B_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64
277// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
278// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
279// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
280// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64
281// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
282// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
283// CHECK: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
284// CHECK: store <8 x i64> [[TMP0]], <8 x i64>* [[__V_ADDR_I]], align 64
285// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
286// CHECK: [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
287// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP1]], <8 x i64> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
288// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
289// CHECK: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
290// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP3]], <8 x i64> [[TMP4]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
291// CHECK: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__A_ADDR_I_I]], align 64
292// CHECK: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__B_ADDR_I_I]], align 64
293// CHECK: [[TMP5:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
294// CHECK: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
295// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
296// CHECK: [[TMP7:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
297// CHECK: [[TMP8:%.*]] = icmp ugt <8 x i64> [[TMP5]], [[TMP6]]
298// CHECK: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP5]], <8 x i64> [[TMP6]]
299// CHECK: store <8 x i64> [[TMP9]], <8 x i64>* [[__V_ADDR_I]], align 64
300// CHECK: [[TMP10:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
301// CHECK: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
302// CHECK: [[SHUFFLE2_I:%.*]] = shufflevector <8 x i64> [[TMP10]], <8 x i64> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
303// CHECK: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
304// CHECK: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
305// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP12]], <8 x i64> [[TMP13]], <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
306// CHECK: store <8 x i64> [[SHUFFLE2_I]], <8 x i64>* [[__A_ADDR_I12_I]], align 64
307// CHECK: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__B_ADDR_I13_I]], align 64
308// CHECK: [[TMP14:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I12_I]], align 64
309// CHECK: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I13_I]], align 64
310// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
311// CHECK: [[TMP16:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
312// CHECK: [[TMP17:%.*]] = icmp ugt <8 x i64> [[TMP14]], [[TMP15]]
313// CHECK: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> [[TMP14]], <8 x i64> [[TMP15]]
314// CHECK: store <8 x i64> [[TMP18]], <8 x i64>* [[__V_ADDR_I]], align 64
315// CHECK: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
316// CHECK: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
317// CHECK: [[SHUFFLE5_I:%.*]] = shufflevector <8 x i64> [[TMP19]], <8 x i64> [[TMP20]], <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
318// CHECK: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
319// CHECK: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
320// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <8 x i64> [[TMP21]], <8 x i64> [[TMP22]], <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
321// CHECK: store <8 x i64> [[SHUFFLE5_I]], <8 x i64>* [[__A_ADDR_I9_I]], align 64
322// CHECK: store <8 x i64> [[SHUFFLE6_I]], <8 x i64>* [[__B_ADDR_I10_I]], align 64
323// CHECK: [[TMP23:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I9_I]], align 64
324// CHECK: [[TMP24:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I10_I]], align 64
325// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I8_I]], align 64
326// CHECK: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I8_I]], align 64
327// CHECK: [[TMP26:%.*]] = icmp ugt <8 x i64> [[TMP23]], [[TMP24]]
328// CHECK: [[TMP27:%.*]] = select <8 x i1> [[TMP26]], <8 x i64> [[TMP23]], <8 x i64> [[TMP24]]
329// CHECK: store <8 x i64> [[TMP27]], <8 x i64>* [[__V_ADDR_I]], align 64
330// CHECK: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
331// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP28]], i32 0
332// CHECK: ret i64 [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000333unsigned long long test_mm512_reduce_min_epu64(__m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000334 return _mm512_reduce_max_epu64(__W);
335}
336
Sanjay Patel1acd2cf2017-09-25 19:56:57 +0000337// CHECK-LABEL: define double @test_mm512_reduce_min_pd(<8 x double> %__W) #0 {
338// CHECK: [[_COMPOUNDLITERAL_I_I11_I:%.*]] = alloca <8 x double>, align 64
339// CHECK: [[__A_ADDR_I12_I:%.*]] = alloca <8 x double>, align 64
340// CHECK: [[__B_ADDR_I13_I:%.*]] = alloca <8 x double>, align 64
341// CHECK: [[_COMPOUNDLITERAL_I_I8_I:%.*]] = alloca <8 x double>, align 64
342// CHECK: [[__A_ADDR_I9_I:%.*]] = alloca <8 x double>, align 64
343// CHECK: [[__B_ADDR_I10_I:%.*]] = alloca <8 x double>, align 64
344// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x double>, align 64
345// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64
346// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x double>, align 64
347// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64
348// CHECK: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64
349// CHECK: store <8 x double> %__W, <8 x double>* [[__W_ADDR]], align 64
350// CHECK: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64
351// CHECK: store <8 x double> [[TMP0]], <8 x double>* [[__V_ADDR_I]], align 64
352// CHECK: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
353// CHECK: [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
354// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x double> [[TMP1]], <8 x double> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
355// CHECK: [[TMP3:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
356// CHECK: [[TMP4:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
357// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> [[TMP4]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
358// CHECK: store <8 x double> [[SHUFFLE_I]], <8 x double>* [[__A_ADDR_I_I]], align 64
359// CHECK: store <8 x double> [[SHUFFLE1_I]], <8 x double>* [[__B_ADDR_I_I]], align 64
360// CHECK: [[TMP5:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64
361// CHECK: [[TMP6:%.*]] = load <8 x double>, <8 x double>* [[__B_ADDR_I_I]], align 64
362// CHECK: store <8 x double> zeroinitializer, <8 x double>* [[_COMPOUNDLITERAL_I_I_I]], align 64
363// CHECK: [[TMP7:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I_I]], align 64
364// CHECK: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> [[TMP5]], <8 x double> [[TMP6]], <8 x double> [[TMP7]], i8 -1, i32 4) #2
365// CHECK: store <8 x double> [[TMP8]], <8 x double>* [[__V_ADDR_I]], align 64
366// CHECK: [[TMP9:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
367// CHECK: [[TMP10:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
368// CHECK: [[SHUFFLE2_I:%.*]] = shufflevector <8 x double> [[TMP9]], <8 x double> [[TMP10]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
369// CHECK: [[TMP11:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
370// CHECK: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
371// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <8 x double> [[TMP11]], <8 x double> [[TMP12]], <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
372// CHECK: store <8 x double> [[SHUFFLE2_I]], <8 x double>* [[__A_ADDR_I12_I]], align 64
373// CHECK: store <8 x double> [[SHUFFLE3_I]], <8 x double>* [[__B_ADDR_I13_I]], align 64
374// CHECK: [[TMP13:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I12_I]], align 64
375// CHECK: [[TMP14:%.*]] = load <8 x double>, <8 x double>* [[__B_ADDR_I13_I]], align 64
376// CHECK: store <8 x double> zeroinitializer, <8 x double>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
377// CHECK: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
378// CHECK: [[TMP16:%.*]] = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> [[TMP13]], <8 x double> [[TMP14]], <8 x double> [[TMP15]], i8 -1, i32 4) #2
379// CHECK: store <8 x double> [[TMP16]], <8 x double>* [[__V_ADDR_I]], align 64
380// CHECK: [[TMP17:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
381// CHECK: [[TMP18:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
382// CHECK: [[SHUFFLE5_I:%.*]] = shufflevector <8 x double> [[TMP17]], <8 x double> [[TMP18]], <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
383// CHECK: [[TMP19:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
384// CHECK: [[TMP20:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
385// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <8 x double> [[TMP19]], <8 x double> [[TMP20]], <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
386// CHECK: store <8 x double> [[SHUFFLE5_I]], <8 x double>* [[__A_ADDR_I9_I]], align 64
387// CHECK: store <8 x double> [[SHUFFLE6_I]], <8 x double>* [[__B_ADDR_I10_I]], align 64
388// CHECK: [[TMP21:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I9_I]], align 64
389// CHECK: [[TMP22:%.*]] = load <8 x double>, <8 x double>* [[__B_ADDR_I10_I]], align 64
390// CHECK: store <8 x double> zeroinitializer, <8 x double>* [[_COMPOUNDLITERAL_I_I8_I]], align 64
391// CHECK: [[TMP23:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I8_I]], align 64
392// CHECK: [[TMP24:%.*]] = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> [[TMP21]], <8 x double> [[TMP22]], <8 x double> [[TMP23]], i8 -1, i32 4) #2
393// CHECK: store <8 x double> [[TMP24]], <8 x double>* [[__V_ADDR_I]], align 64
394// CHECK: [[TMP25:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
395// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x double> [[TMP25]], i32 0
396// CHECK: ret double [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000397double test_mm512_reduce_min_pd(__m512d __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000398 return _mm512_reduce_min_pd(__W);
399}
400
Sanjay Patel1acd2cf2017-09-25 19:56:57 +0000401// CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_epi64(i8 zeroext %__M, <8 x i64> %__W) #0 {
402// CHECK: [[_COMPOUNDLITERAL_I_I12_I:%.*]] = alloca <8 x i64>, align 64
403// CHECK: [[__A_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
404// CHECK: [[__B_ADDR_I14_I:%.*]] = alloca <8 x i64>, align 64
405// CHECK: [[_COMPOUNDLITERAL_I_I9_I:%.*]] = alloca <8 x i64>, align 64
406// CHECK: [[__A_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64
407// CHECK: [[__B_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64
408// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
409// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
410// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
411// CHECK: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8
412// CHECK: [[_COMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64
413// CHECK: [[__M_ADDR_I:%.*]] = alloca i8, align 1
414// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64
415// CHECK: [[__M_ADDR:%.*]] = alloca i8, align 1
416// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
417// CHECK: store i8 %__M, i8* [[__M_ADDR]], align 1
418// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
419// CHECK: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1
420// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
421// CHECK: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1
422// CHECK: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64
423// CHECK: [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1
424// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
425// CHECK: store i64 -9223372036854775808, i64* [[__D_ADDR_I_I]], align 8
426// CHECK: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
427// CHECK: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP4]], i32 0
428// CHECK: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
429// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP5]], i32 1
430// CHECK: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
431// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP6]], i32 2
432// CHECK: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
433// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP7]], i32 3
434// CHECK: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
435// CHECK: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP8]], i32 4
436// CHECK: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
437// CHECK: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP9]], i32 5
438// CHECK: [[TMP10:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
439// CHECK: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP10]], i32 6
440// CHECK: [[TMP11:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
441// CHECK: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP11]], i32 7
442// CHECK: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[_COMPOUNDLITERAL_I_I]], align 64
443// CHECK: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I]], align 64
444// CHECK: [[TMP13:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
445// CHECK: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP3]], <8 x i64> [[TMP12]]
446// CHECK: store <8 x i64> [[TMP14]], <8 x i64>* [[__V_ADDR_I]], align 64
447// CHECK: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
448// CHECK: [[TMP16:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
449// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP15]], <8 x i64> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
450// CHECK: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
451// CHECK: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
452// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
453// CHECK: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__A_ADDR_I13_I]], align 64
454// CHECK: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__B_ADDR_I14_I]], align 64
455// CHECK: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I13_I]], align 64
456// CHECK: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I14_I]], align 64
457// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
458// CHECK: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
459// CHECK: [[TMP22:%.*]] = icmp sgt <8 x i64> [[TMP19]], [[TMP20]]
460// CHECK: [[TMP23:%.*]] = select <8 x i1> [[TMP22]], <8 x i64> [[TMP19]], <8 x i64> [[TMP20]]
461// CHECK: store <8 x i64> [[TMP23]], <8 x i64>* [[__V_ADDR_I]], align 64
462// CHECK: [[TMP24:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
463// CHECK: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
464// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP24]], <8 x i64> [[TMP25]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
465// CHECK: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
466// CHECK: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
467// CHECK: [[SHUFFLE4_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
468// CHECK: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__A_ADDR_I10_I]], align 64
469// CHECK: store <8 x i64> [[SHUFFLE4_I]], <8 x i64>* [[__B_ADDR_I11_I]], align 64
470// CHECK: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I10_I]], align 64
471// CHECK: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I11_I]], align 64
472// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I9_I]], align 64
473// CHECK: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I9_I]], align 64
474// CHECK: [[TMP31:%.*]] = icmp sgt <8 x i64> [[TMP28]], [[TMP29]]
475// CHECK: [[TMP32:%.*]] = select <8 x i1> [[TMP31]], <8 x i64> [[TMP28]], <8 x i64> [[TMP29]]
476// CHECK: store <8 x i64> [[TMP32]], <8 x i64>* [[__V_ADDR_I]], align 64
477// CHECK: [[TMP33:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
478// CHECK: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
479// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <8 x i64> [[TMP33]], <8 x i64> [[TMP34]], <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
480// CHECK: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
481// CHECK: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
482// CHECK: [[SHUFFLE7_I:%.*]] = shufflevector <8 x i64> [[TMP35]], <8 x i64> [[TMP36]], <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
483// CHECK: store <8 x i64> [[SHUFFLE6_I]], <8 x i64>* [[__A_ADDR_I_I]], align 64
484// CHECK: store <8 x i64> [[SHUFFLE7_I]], <8 x i64>* [[__B_ADDR_I_I]], align 64
485// CHECK: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
486// CHECK: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
487// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
488// CHECK: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
489// CHECK: [[TMP40:%.*]] = icmp sgt <8 x i64> [[TMP37]], [[TMP38]]
490// CHECK: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP37]], <8 x i64> [[TMP38]]
491// CHECK: store <8 x i64> [[TMP41]], <8 x i64>* [[__V_ADDR_I]], align 64
492// CHECK: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
493// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0
494// CHECK: ret i64 [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000495long long test_mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000496 return _mm512_mask_reduce_max_epi64(__M, __W);
497}
498
Sanjay Patel1acd2cf2017-09-25 19:56:57 +0000499// CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_epu64(i8 zeroext %__M, <8 x i64> %__W) #0 {
500// CHECK: [[_COMPOUNDLITERAL_I_I12_I:%.*]] = alloca <8 x i64>, align 64
501// CHECK: [[__A_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
502// CHECK: [[__B_ADDR_I14_I:%.*]] = alloca <8 x i64>, align 64
503// CHECK: [[_COMPOUNDLITERAL_I_I9_I:%.*]] = alloca <8 x i64>, align 64
504// CHECK: [[__A_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64
505// CHECK: [[__B_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64
506// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
507// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
508// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
509// CHECK: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8
510// CHECK: [[_COMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64
511// CHECK: [[__M_ADDR_I:%.*]] = alloca i8, align 1
512// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64
513// CHECK: [[__M_ADDR:%.*]] = alloca i8, align 1
514// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
515// CHECK: store i8 %__M, i8* [[__M_ADDR]], align 1
516// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
517// CHECK: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1
518// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
519// CHECK: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1
520// CHECK: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64
521// CHECK: [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1
522// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
523// CHECK: store i64 0, i64* [[__D_ADDR_I_I]], align 8
524// CHECK: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
525// CHECK: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP4]], i32 0
526// CHECK: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
527// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP5]], i32 1
528// CHECK: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
529// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP6]], i32 2
530// CHECK: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
531// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP7]], i32 3
532// CHECK: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
533// CHECK: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP8]], i32 4
534// CHECK: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
535// CHECK: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP9]], i32 5
536// CHECK: [[TMP10:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
537// CHECK: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP10]], i32 6
538// CHECK: [[TMP11:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
539// CHECK: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP11]], i32 7
540// CHECK: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[_COMPOUNDLITERAL_I_I]], align 64
541// CHECK: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I]], align 64
542// CHECK: [[TMP13:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
543// CHECK: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP3]], <8 x i64> [[TMP12]]
544// CHECK: store <8 x i64> [[TMP14]], <8 x i64>* [[__V_ADDR_I]], align 64
545// CHECK: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
546// CHECK: [[TMP16:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
547// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP15]], <8 x i64> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
548// CHECK: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
549// CHECK: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
550// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
551// CHECK: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__A_ADDR_I13_I]], align 64
552// CHECK: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__B_ADDR_I14_I]], align 64
553// CHECK: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I13_I]], align 64
554// CHECK: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I14_I]], align 64
555// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
556// CHECK: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
557// CHECK: [[TMP22:%.*]] = icmp ugt <8 x i64> [[TMP19]], [[TMP20]]
558// CHECK: [[TMP23:%.*]] = select <8 x i1> [[TMP22]], <8 x i64> [[TMP19]], <8 x i64> [[TMP20]]
559// CHECK: store <8 x i64> [[TMP23]], <8 x i64>* [[__V_ADDR_I]], align 64
560// CHECK: [[TMP24:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
561// CHECK: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
562// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP24]], <8 x i64> [[TMP25]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
563// CHECK: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
564// CHECK: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
565// CHECK: [[SHUFFLE4_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
566// CHECK: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__A_ADDR_I10_I]], align 64
567// CHECK: store <8 x i64> [[SHUFFLE4_I]], <8 x i64>* [[__B_ADDR_I11_I]], align 64
568// CHECK: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I10_I]], align 64
569// CHECK: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I11_I]], align 64
570// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I9_I]], align 64
571// CHECK: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I9_I]], align 64
572// CHECK: [[TMP31:%.*]] = icmp ugt <8 x i64> [[TMP28]], [[TMP29]]
573// CHECK: [[TMP32:%.*]] = select <8 x i1> [[TMP31]], <8 x i64> [[TMP28]], <8 x i64> [[TMP29]]
574// CHECK: store <8 x i64> [[TMP32]], <8 x i64>* [[__V_ADDR_I]], align 64
575// CHECK: [[TMP33:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
576// CHECK: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
577// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <8 x i64> [[TMP33]], <8 x i64> [[TMP34]], <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
578// CHECK: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
579// CHECK: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
580// CHECK: [[SHUFFLE7_I:%.*]] = shufflevector <8 x i64> [[TMP35]], <8 x i64> [[TMP36]], <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
581// CHECK: store <8 x i64> [[SHUFFLE6_I]], <8 x i64>* [[__A_ADDR_I_I]], align 64
582// CHECK: store <8 x i64> [[SHUFFLE7_I]], <8 x i64>* [[__B_ADDR_I_I]], align 64
583// CHECK: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
584// CHECK: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
585// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
586// CHECK: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
587// CHECK: [[TMP40:%.*]] = icmp ugt <8 x i64> [[TMP37]], [[TMP38]]
588// CHECK: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP37]], <8 x i64> [[TMP38]]
589// CHECK: store <8 x i64> [[TMP41]], <8 x i64>* [[__V_ADDR_I]], align 64
590// CHECK: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
591// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0
592// CHECK: ret i64 [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000593unsigned long test_mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000594 return _mm512_mask_reduce_max_epu64(__M, __W);
595}
596
Sanjay Patel1acd2cf2017-09-25 19:56:57 +0000597// CHECK-LABEL: define i64 @test_mm512_mask_reduce_max_pd(i8 zeroext %__M, <8 x double> %__W) #0 {
598// CHECK: [[_COMPOUNDLITERAL_I_I12_I:%.*]] = alloca <8 x double>, align 64
599// CHECK: [[__A_ADDR_I13_I:%.*]] = alloca <8 x double>, align 64
600// CHECK: [[__B_ADDR_I14_I:%.*]] = alloca <8 x double>, align 64
601// CHECK: [[_COMPOUNDLITERAL_I_I9_I:%.*]] = alloca <8 x double>, align 64
602// CHECK: [[__A_ADDR_I10_I:%.*]] = alloca <8 x double>, align 64
603// CHECK: [[__B_ADDR_I11_I:%.*]] = alloca <8 x double>, align 64
604// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x double>, align 64
605// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64
606// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x double>, align 64
607// CHECK: [[__W_ADDR_I_I:%.*]] = alloca double, align 8
608// CHECK: [[_COMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64
609// CHECK: [[__M_ADDR_I:%.*]] = alloca i8, align 1
610// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64
611// CHECK: [[__M_ADDR:%.*]] = alloca i8, align 1
612// CHECK: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64
613// CHECK: store i8 %__M, i8* [[__M_ADDR]], align 1
614// CHECK: store <8 x double> %__W, <8 x double>* [[__W_ADDR]], align 64
615// CHECK: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1
616// CHECK: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64
617// CHECK: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1
618// CHECK: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64
619// CHECK: [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1
620// CHECK: [[TMP3:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
621// CHECK: store double 0x7FF0000000000000, double* [[__W_ADDR_I_I]], align 8
622// CHECK: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
623// CHECK: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP4]], i32 0
624// CHECK: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
625// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <8 x double> [[VECINIT_I_I]], double [[TMP5]], i32 1
626// CHECK: [[TMP6:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
627// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <8 x double> [[VECINIT1_I_I]], double [[TMP6]], i32 2
628// CHECK: [[TMP7:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
629// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <8 x double> [[VECINIT2_I_I]], double [[TMP7]], i32 3
630// CHECK: [[TMP8:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
631// CHECK: [[VECINIT4_I_I:%.*]] = insertelement <8 x double> [[VECINIT3_I_I]], double [[TMP8]], i32 4
632// CHECK: [[TMP9:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
633// CHECK: [[VECINIT5_I_I:%.*]] = insertelement <8 x double> [[VECINIT4_I_I]], double [[TMP9]], i32 5
634// CHECK: [[TMP10:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
635// CHECK: [[VECINIT6_I_I:%.*]] = insertelement <8 x double> [[VECINIT5_I_I]], double [[TMP10]], i32 6
636// CHECK: [[TMP11:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
637// CHECK: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP11]], i32 7
638// CHECK: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[_COMPOUNDLITERAL_I_I]], align 64
639// CHECK: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I]], align 64
640// CHECK: [[SUB_I:%.*]] = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, [[TMP12]]
641// CHECK: [[TMP13:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
642// CHECK: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP3]], <8 x double> [[SUB_I]]
643// CHECK: store <8 x double> [[TMP14]], <8 x double>* [[__V_ADDR_I]], align 64
644// CHECK: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
645// CHECK: [[TMP16:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
646// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x double> [[TMP15]], <8 x double> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
647// CHECK: [[TMP17:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
648// CHECK: [[TMP18:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
649// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <8 x double> [[TMP17]], <8 x double> [[TMP18]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
650// CHECK: store <8 x double> [[SHUFFLE_I]], <8 x double>* [[__A_ADDR_I13_I]], align 64
651// CHECK: store <8 x double> [[SHUFFLE1_I]], <8 x double>* [[__B_ADDR_I14_I]], align 64
652// CHECK: [[TMP19:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I13_I]], align 64
653// CHECK: [[TMP20:%.*]] = load <8 x double>, <8 x double>* [[__B_ADDR_I14_I]], align 64
654// CHECK: store <8 x double> zeroinitializer, <8 x double>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
655// CHECK: [[TMP21:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
656// CHECK: [[TMP22:%.*]] = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> [[TMP19]], <8 x double> [[TMP20]], <8 x double> [[TMP21]], i8 -1, i32 4) #2
657// CHECK: store <8 x double> [[TMP22]], <8 x double>* [[__V_ADDR_I]], align 64
658// CHECK: [[TMP23:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
659// CHECK: [[TMP24:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
660// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> [[TMP24]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
661// CHECK: [[TMP25:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
662// CHECK: [[TMP26:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
663// CHECK: [[SHUFFLE4_I:%.*]] = shufflevector <8 x double> [[TMP25]], <8 x double> [[TMP26]], <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
664// CHECK: store <8 x double> [[SHUFFLE3_I]], <8 x double>* [[__A_ADDR_I10_I]], align 64
665// CHECK: store <8 x double> [[SHUFFLE4_I]], <8 x double>* [[__B_ADDR_I11_I]], align 64
666// CHECK: [[TMP27:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I10_I]], align 64
667// CHECK: [[TMP28:%.*]] = load <8 x double>, <8 x double>* [[__B_ADDR_I11_I]], align 64
668// CHECK: store <8 x double> zeroinitializer, <8 x double>* [[_COMPOUNDLITERAL_I_I9_I]], align 64
669// CHECK: [[TMP29:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I9_I]], align 64
670// CHECK: [[TMP30:%.*]] = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> [[TMP27]], <8 x double> [[TMP28]], <8 x double> [[TMP29]], i8 -1, i32 4) #2
671// CHECK: store <8 x double> [[TMP30]], <8 x double>* [[__V_ADDR_I]], align 64
672// CHECK: [[TMP31:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
673// CHECK: [[TMP32:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
674// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <8 x double> [[TMP31]], <8 x double> [[TMP32]], <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
675// CHECK: [[TMP33:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
676// CHECK: [[TMP34:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
677// CHECK: [[SHUFFLE7_I:%.*]] = shufflevector <8 x double> [[TMP33]], <8 x double> [[TMP34]], <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
678// CHECK: store <8 x double> [[SHUFFLE6_I]], <8 x double>* [[__A_ADDR_I_I]], align 64
679// CHECK: store <8 x double> [[SHUFFLE7_I]], <8 x double>* [[__B_ADDR_I_I]], align 64
680// CHECK: [[TMP35:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64
681// CHECK: [[TMP36:%.*]] = load <8 x double>, <8 x double>* [[__B_ADDR_I_I]], align 64
682// CHECK: store <8 x double> zeroinitializer, <8 x double>* [[_COMPOUNDLITERAL_I_I_I]], align 64
683// CHECK: [[TMP37:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I_I]], align 64
684// CHECK: [[TMP38:%.*]] = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> [[TMP35]], <8 x double> [[TMP36]], <8 x double> [[TMP37]], i8 -1, i32 4) #2
685// CHECK: store <8 x double> [[TMP38]], <8 x double>* [[__V_ADDR_I]], align 64
686// CHECK: [[TMP39:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
687// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x double> [[TMP39]], i32 0
688// CHECK: [[CONV:%.*]] = fptosi double [[VECEXT_I]] to i64
689// CHECK: ret i64 [[CONV]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000690long long test_mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000691 return _mm512_mask_reduce_max_pd(__M, __W);
692}
693
Sanjay Patel1acd2cf2017-09-25 19:56:57 +0000694// CHECK-LABEL: define i64 @test_mm512_mask_reduce_min_epi64(i8 zeroext %__M, <8 x i64> %__W) #0 {
695// CHECK: [[_COMPOUNDLITERAL_I_I12_I:%.*]] = alloca <8 x i64>, align 64
696// CHECK: [[__A_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
697// CHECK: [[__B_ADDR_I14_I:%.*]] = alloca <8 x i64>, align 64
698// CHECK: [[_COMPOUNDLITERAL_I_I9_I:%.*]] = alloca <8 x i64>, align 64
699// CHECK: [[__A_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64
700// CHECK: [[__B_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64
701// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
702// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
703// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
704// CHECK: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8
705// CHECK: [[_COMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64
706// CHECK: [[__M_ADDR_I:%.*]] = alloca i8, align 1
707// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64
708// CHECK: [[__M_ADDR:%.*]] = alloca i8, align 1
709// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
710// CHECK: store i8 %__M, i8* [[__M_ADDR]], align 1
711// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
712// CHECK: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1
713// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
714// CHECK: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1
715// CHECK: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64
716// CHECK: [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1
717// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
718// CHECK: store i64 9223372036854775807, i64* [[__D_ADDR_I_I]], align 8
719// CHECK: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
720// CHECK: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP4]], i32 0
721// CHECK: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
722// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP5]], i32 1
723// CHECK: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
724// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP6]], i32 2
725// CHECK: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
726// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP7]], i32 3
727// CHECK: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
728// CHECK: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP8]], i32 4
729// CHECK: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
730// CHECK: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP9]], i32 5
731// CHECK: [[TMP10:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
732// CHECK: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP10]], i32 6
733// CHECK: [[TMP11:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
734// CHECK: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP11]], i32 7
735// CHECK: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[_COMPOUNDLITERAL_I_I]], align 64
736// CHECK: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I]], align 64
737// CHECK: [[TMP13:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
738// CHECK: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP3]], <8 x i64> [[TMP12]]
739// CHECK: store <8 x i64> [[TMP14]], <8 x i64>* [[__V_ADDR_I]], align 64
740// CHECK: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
741// CHECK: [[TMP16:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
742// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP15]], <8 x i64> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
743// CHECK: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
744// CHECK: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
745// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
746// CHECK: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__A_ADDR_I13_I]], align 64
747// CHECK: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__B_ADDR_I14_I]], align 64
748// CHECK: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I13_I]], align 64
749// CHECK: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I14_I]], align 64
750// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
751// CHECK: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
752// CHECK: [[TMP22:%.*]] = icmp slt <8 x i64> [[TMP19]], [[TMP20]]
753// CHECK: [[TMP23:%.*]] = select <8 x i1> [[TMP22]], <8 x i64> [[TMP19]], <8 x i64> [[TMP20]]
754// CHECK: store <8 x i64> [[TMP23]], <8 x i64>* [[__V_ADDR_I]], align 64
755// CHECK: [[TMP24:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
756// CHECK: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
757// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP24]], <8 x i64> [[TMP25]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
758// CHECK: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
759// CHECK: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
760// CHECK: [[SHUFFLE4_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
761// CHECK: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__A_ADDR_I10_I]], align 64
762// CHECK: store <8 x i64> [[SHUFFLE4_I]], <8 x i64>* [[__B_ADDR_I11_I]], align 64
763// CHECK: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I10_I]], align 64
764// CHECK: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I11_I]], align 64
765// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I9_I]], align 64
766// CHECK: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I9_I]], align 64
767// CHECK: [[TMP31:%.*]] = icmp slt <8 x i64> [[TMP28]], [[TMP29]]
768// CHECK: [[TMP32:%.*]] = select <8 x i1> [[TMP31]], <8 x i64> [[TMP28]], <8 x i64> [[TMP29]]
769// CHECK: store <8 x i64> [[TMP32]], <8 x i64>* [[__V_ADDR_I]], align 64
770// CHECK: [[TMP33:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
771// CHECK: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
772// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <8 x i64> [[TMP33]], <8 x i64> [[TMP34]], <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
773// CHECK: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
774// CHECK: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
775// CHECK: [[SHUFFLE7_I:%.*]] = shufflevector <8 x i64> [[TMP35]], <8 x i64> [[TMP36]], <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
776// CHECK: store <8 x i64> [[SHUFFLE6_I]], <8 x i64>* [[__A_ADDR_I_I]], align 64
777// CHECK: store <8 x i64> [[SHUFFLE7_I]], <8 x i64>* [[__B_ADDR_I_I]], align 64
778// CHECK: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
779// CHECK: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
780// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
781// CHECK: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
782// CHECK: [[TMP40:%.*]] = icmp slt <8 x i64> [[TMP37]], [[TMP38]]
783// CHECK: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP37]], <8 x i64> [[TMP38]]
784// CHECK: store <8 x i64> [[TMP41]], <8 x i64>* [[__V_ADDR_I]], align 64
785// CHECK: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
786// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0
787// CHECK: ret i64 [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000788long long test_mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000789 return _mm512_mask_reduce_min_epi64(__M, __W);
790}
791
Sanjay Patel1acd2cf2017-09-25 19:56:57 +0000792// CHECK-LABEL: define i64 @test_mm512_mask_reduce_min_epu64(i8 zeroext %__M, <8 x i64> %__W) #0 {
793// CHECK: [[_COMPOUNDLITERAL_I_I12_I:%.*]] = alloca <8 x i64>, align 64
794// CHECK: [[__A_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
795// CHECK: [[__B_ADDR_I14_I:%.*]] = alloca <8 x i64>, align 64
796// CHECK: [[_COMPOUNDLITERAL_I_I9_I:%.*]] = alloca <8 x i64>, align 64
797// CHECK: [[__A_ADDR_I10_I:%.*]] = alloca <8 x i64>, align 64
798// CHECK: [[__B_ADDR_I11_I:%.*]] = alloca <8 x i64>, align 64
799// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
800// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
801// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
802// CHECK: [[__D_ADDR_I_I:%.*]] = alloca i64, align 8
803// CHECK: [[_COMPOUNDLITERAL_I_I:%.*]] = alloca <8 x i64>, align 64
804// CHECK: [[__M_ADDR_I:%.*]] = alloca i8, align 1
805// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64
806// CHECK: [[__M_ADDR:%.*]] = alloca i8, align 1
807// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
808// CHECK: store i8 %__M, i8* [[__M_ADDR]], align 1
809// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
810// CHECK: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1
811// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
812// CHECK: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1
813// CHECK: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64
814// CHECK: [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1
815// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
816// CHECK: store i64 0, i64* [[__D_ADDR_I_I]], align 8
817// CHECK: [[TMP4:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
818// CHECK: [[VECINIT_I_I:%.*]] = insertelement <8 x i64> undef, i64 [[TMP4]], i32 0
819// CHECK: [[TMP5:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
820// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <8 x i64> [[VECINIT_I_I]], i64 [[TMP5]], i32 1
821// CHECK: [[TMP6:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
822// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <8 x i64> [[VECINIT1_I_I]], i64 [[TMP6]], i32 2
823// CHECK: [[TMP7:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
824// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <8 x i64> [[VECINIT2_I_I]], i64 [[TMP7]], i32 3
825// CHECK: [[TMP8:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
826// CHECK: [[VECINIT4_I_I:%.*]] = insertelement <8 x i64> [[VECINIT3_I_I]], i64 [[TMP8]], i32 4
827// CHECK: [[TMP9:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
828// CHECK: [[VECINIT5_I_I:%.*]] = insertelement <8 x i64> [[VECINIT4_I_I]], i64 [[TMP9]], i32 5
829// CHECK: [[TMP10:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
830// CHECK: [[VECINIT6_I_I:%.*]] = insertelement <8 x i64> [[VECINIT5_I_I]], i64 [[TMP10]], i32 6
831// CHECK: [[TMP11:%.*]] = load i64, i64* [[__D_ADDR_I_I]], align 8
832// CHECK: [[VECINIT7_I_I:%.*]] = insertelement <8 x i64> [[VECINIT6_I_I]], i64 [[TMP11]], i32 7
833// CHECK: store <8 x i64> [[VECINIT7_I_I]], <8 x i64>* [[_COMPOUNDLITERAL_I_I]], align 64
834// CHECK: [[TMP12:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I]], align 64
835// CHECK: [[TMP13:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
836// CHECK: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> [[TMP3]], <8 x i64> [[TMP12]]
837// CHECK: store <8 x i64> [[TMP14]], <8 x i64>* [[__V_ADDR_I]], align 64
838// CHECK: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
839// CHECK: [[TMP16:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
840// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i64> [[TMP15]], <8 x i64> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
841// CHECK: [[TMP17:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
842// CHECK: [[TMP18:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
843// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <8 x i64> [[TMP17]], <8 x i64> [[TMP18]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
844// CHECK: store <8 x i64> [[SHUFFLE_I]], <8 x i64>* [[__A_ADDR_I13_I]], align 64
845// CHECK: store <8 x i64> [[SHUFFLE1_I]], <8 x i64>* [[__B_ADDR_I14_I]], align 64
846// CHECK: [[TMP19:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I13_I]], align 64
847// CHECK: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I14_I]], align 64
848// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
849// CHECK: [[TMP21:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
850// CHECK: [[TMP22:%.*]] = icmp ugt <8 x i64> [[TMP19]], [[TMP20]]
851// CHECK: [[TMP23:%.*]] = select <8 x i1> [[TMP22]], <8 x i64> [[TMP19]], <8 x i64> [[TMP20]]
852// CHECK: store <8 x i64> [[TMP23]], <8 x i64>* [[__V_ADDR_I]], align 64
853// CHECK: [[TMP24:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
854// CHECK: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
855// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <8 x i64> [[TMP24]], <8 x i64> [[TMP25]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
856// CHECK: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
857// CHECK: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
858// CHECK: [[SHUFFLE4_I:%.*]] = shufflevector <8 x i64> [[TMP26]], <8 x i64> [[TMP27]], <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
859// CHECK: store <8 x i64> [[SHUFFLE3_I]], <8 x i64>* [[__A_ADDR_I10_I]], align 64
860// CHECK: store <8 x i64> [[SHUFFLE4_I]], <8 x i64>* [[__B_ADDR_I11_I]], align 64
861// CHECK: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I10_I]], align 64
862// CHECK: [[TMP29:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I11_I]], align 64
863// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I9_I]], align 64
864// CHECK: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I9_I]], align 64
865// CHECK: [[TMP31:%.*]] = icmp ugt <8 x i64> [[TMP28]], [[TMP29]]
866// CHECK: [[TMP32:%.*]] = select <8 x i1> [[TMP31]], <8 x i64> [[TMP28]], <8 x i64> [[TMP29]]
867// CHECK: store <8 x i64> [[TMP32]], <8 x i64>* [[__V_ADDR_I]], align 64
868// CHECK: [[TMP33:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
869// CHECK: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
870// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <8 x i64> [[TMP33]], <8 x i64> [[TMP34]], <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
871// CHECK: [[TMP35:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
872// CHECK: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
873// CHECK: [[SHUFFLE7_I:%.*]] = shufflevector <8 x i64> [[TMP35]], <8 x i64> [[TMP36]], <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
874// CHECK: store <8 x i64> [[SHUFFLE6_I]], <8 x i64>* [[__A_ADDR_I_I]], align 64
875// CHECK: store <8 x i64> [[SHUFFLE7_I]], <8 x i64>* [[__B_ADDR_I_I]], align 64
876// CHECK: [[TMP37:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
877// CHECK: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
878// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
879// CHECK: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
880// CHECK: [[TMP40:%.*]] = icmp ugt <8 x i64> [[TMP37]], [[TMP38]]
881// CHECK: [[TMP41:%.*]] = select <8 x i1> [[TMP40]], <8 x i64> [[TMP37]], <8 x i64> [[TMP38]]
882// CHECK: store <8 x i64> [[TMP41]], <8 x i64>* [[__V_ADDR_I]], align 64
883// CHECK: [[TMP42:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
884// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP42]], i32 0
885// CHECK: ret i64 [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000886long long test_mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000887 return _mm512_mask_reduce_max_epu64(__M, __W);
888}
889
Sanjay Patel1acd2cf2017-09-25 19:56:57 +0000890// CHECK-LABEL: define double @test_mm512_mask_reduce_min_pd(i8 zeroext %__M, <8 x double> %__W) #0 {
891// CHECK: [[_COMPOUNDLITERAL_I_I12_I:%.*]] = alloca <8 x double>, align 64
892// CHECK: [[__A_ADDR_I13_I:%.*]] = alloca <8 x double>, align 64
893// CHECK: [[__B_ADDR_I14_I:%.*]] = alloca <8 x double>, align 64
894// CHECK: [[_COMPOUNDLITERAL_I_I9_I:%.*]] = alloca <8 x double>, align 64
895// CHECK: [[__A_ADDR_I10_I:%.*]] = alloca <8 x double>, align 64
896// CHECK: [[__B_ADDR_I11_I:%.*]] = alloca <8 x double>, align 64
897// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x double>, align 64
898// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x double>, align 64
899// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x double>, align 64
900// CHECK: [[__W_ADDR_I_I:%.*]] = alloca double, align 8
901// CHECK: [[_COMPOUNDLITERAL_I_I:%.*]] = alloca <8 x double>, align 64
902// CHECK: [[__M_ADDR_I:%.*]] = alloca i8, align 1
903// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x double>, align 64
904// CHECK: [[__M_ADDR:%.*]] = alloca i8, align 1
905// CHECK: [[__W_ADDR:%.*]] = alloca <8 x double>, align 64
906// CHECK: store i8 %__M, i8* [[__M_ADDR]], align 1
907// CHECK: store <8 x double> %__W, <8 x double>* [[__W_ADDR]], align 64
908// CHECK: [[TMP0:%.*]] = load i8, i8* [[__M_ADDR]], align 1
909// CHECK: [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[__W_ADDR]], align 64
910// CHECK: store i8 [[TMP0]], i8* [[__M_ADDR_I]], align 1
911// CHECK: store <8 x double> [[TMP1]], <8 x double>* [[__V_ADDR_I]], align 64
912// CHECK: [[TMP2:%.*]] = load i8, i8* [[__M_ADDR_I]], align 1
913// CHECK: [[TMP3:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
914// CHECK: store double 0x7FF0000000000000, double* [[__W_ADDR_I_I]], align 8
915// CHECK: [[TMP4:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
916// CHECK: [[VECINIT_I_I:%.*]] = insertelement <8 x double> undef, double [[TMP4]], i32 0
917// CHECK: [[TMP5:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
918// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <8 x double> [[VECINIT_I_I]], double [[TMP5]], i32 1
919// CHECK: [[TMP6:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
920// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <8 x double> [[VECINIT1_I_I]], double [[TMP6]], i32 2
921// CHECK: [[TMP7:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
922// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <8 x double> [[VECINIT2_I_I]], double [[TMP7]], i32 3
923// CHECK: [[TMP8:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
924// CHECK: [[VECINIT4_I_I:%.*]] = insertelement <8 x double> [[VECINIT3_I_I]], double [[TMP8]], i32 4
925// CHECK: [[TMP9:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
926// CHECK: [[VECINIT5_I_I:%.*]] = insertelement <8 x double> [[VECINIT4_I_I]], double [[TMP9]], i32 5
927// CHECK: [[TMP10:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
928// CHECK: [[VECINIT6_I_I:%.*]] = insertelement <8 x double> [[VECINIT5_I_I]], double [[TMP10]], i32 6
929// CHECK: [[TMP11:%.*]] = load double, double* [[__W_ADDR_I_I]], align 8
930// CHECK: [[VECINIT7_I_I:%.*]] = insertelement <8 x double> [[VECINIT6_I_I]], double [[TMP11]], i32 7
931// CHECK: store <8 x double> [[VECINIT7_I_I]], <8 x double>* [[_COMPOUNDLITERAL_I_I]], align 64
932// CHECK: [[TMP12:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I]], align 64
933// CHECK: [[TMP13:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
934// CHECK: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP3]], <8 x double> [[TMP12]]
935// CHECK: store <8 x double> [[TMP14]], <8 x double>* [[__V_ADDR_I]], align 64
936// CHECK: [[TMP15:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
937// CHECK: [[TMP16:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
938// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x double> [[TMP15]], <8 x double> [[TMP16]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
939// CHECK: [[TMP17:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
940// CHECK: [[TMP18:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
941// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <8 x double> [[TMP17]], <8 x double> [[TMP18]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
942// CHECK: store <8 x double> [[SHUFFLE_I]], <8 x double>* [[__A_ADDR_I13_I]], align 64
943// CHECK: store <8 x double> [[SHUFFLE1_I]], <8 x double>* [[__B_ADDR_I14_I]], align 64
944// CHECK: [[TMP19:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I13_I]], align 64
945// CHECK: [[TMP20:%.*]] = load <8 x double>, <8 x double>* [[__B_ADDR_I14_I]], align 64
946// CHECK: store <8 x double> zeroinitializer, <8 x double>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
947// CHECK: [[TMP21:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
948// CHECK: [[TMP22:%.*]] = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> [[TMP19]], <8 x double> [[TMP20]], <8 x double> [[TMP21]], i8 -1, i32 4) #2
949// CHECK: store <8 x double> [[TMP22]], <8 x double>* [[__V_ADDR_I]], align 64
950// CHECK: [[TMP23:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
951// CHECK: [[TMP24:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
952// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> [[TMP24]], <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
953// CHECK: [[TMP25:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
954// CHECK: [[TMP26:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
955// CHECK: [[SHUFFLE4_I:%.*]] = shufflevector <8 x double> [[TMP25]], <8 x double> [[TMP26]], <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
956// CHECK: store <8 x double> [[SHUFFLE3_I]], <8 x double>* [[__A_ADDR_I10_I]], align 64
957// CHECK: store <8 x double> [[SHUFFLE4_I]], <8 x double>* [[__B_ADDR_I11_I]], align 64
958// CHECK: [[TMP27:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I10_I]], align 64
959// CHECK: [[TMP28:%.*]] = load <8 x double>, <8 x double>* [[__B_ADDR_I11_I]], align 64
960// CHECK: store <8 x double> zeroinitializer, <8 x double>* [[_COMPOUNDLITERAL_I_I9_I]], align 64
961// CHECK: [[TMP29:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I9_I]], align 64
962// CHECK: [[TMP30:%.*]] = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> [[TMP27]], <8 x double> [[TMP28]], <8 x double> [[TMP29]], i8 -1, i32 4) #2
963// CHECK: store <8 x double> [[TMP30]], <8 x double>* [[__V_ADDR_I]], align 64
964// CHECK: [[TMP31:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
965// CHECK: [[TMP32:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
966// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <8 x double> [[TMP31]], <8 x double> [[TMP32]], <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
967// CHECK: [[TMP33:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
968// CHECK: [[TMP34:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
969// CHECK: [[SHUFFLE7_I:%.*]] = shufflevector <8 x double> [[TMP33]], <8 x double> [[TMP34]], <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
970// CHECK: store <8 x double> [[SHUFFLE6_I]], <8 x double>* [[__A_ADDR_I_I]], align 64
971// CHECK: store <8 x double> [[SHUFFLE7_I]], <8 x double>* [[__B_ADDR_I_I]], align 64
972// CHECK: [[TMP35:%.*]] = load <8 x double>, <8 x double>* [[__A_ADDR_I_I]], align 64
973// CHECK: [[TMP36:%.*]] = load <8 x double>, <8 x double>* [[__B_ADDR_I_I]], align 64
974// CHECK: store <8 x double> zeroinitializer, <8 x double>* [[_COMPOUNDLITERAL_I_I_I]], align 64
975// CHECK: [[TMP37:%.*]] = load <8 x double>, <8 x double>* [[_COMPOUNDLITERAL_I_I_I]], align 64
976// CHECK: [[TMP38:%.*]] = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> [[TMP35]], <8 x double> [[TMP36]], <8 x double> [[TMP37]], i8 -1, i32 4) #2
977// CHECK: store <8 x double> [[TMP38]], <8 x double>* [[__V_ADDR_I]], align 64
978// CHECK: [[TMP39:%.*]] = load <8 x double>, <8 x double>* [[__V_ADDR_I]], align 64
979// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x double> [[TMP39]], i32 0
980// CHECK: ret double [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000981double test_mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +0000982 return _mm512_mask_reduce_min_pd(__M, __W);
983}
984
Sanjay Patel1acd2cf2017-09-25 19:56:57 +0000985// CHECK-LABEL: define i32 @test_mm512_reduce_max_epi32(<8 x i64> %__W) #0 {
986// CHECK: [[_COMPOUNDLITERAL_I_I17_I:%.*]] = alloca <8 x i64>, align 64
987// CHECK: [[__A_ADDR_I18_I:%.*]] = alloca <8 x i64>, align 64
988// CHECK: [[__B_ADDR_I19_I:%.*]] = alloca <8 x i64>, align 64
989// CHECK: [[_COMPOUNDLITERAL_I_I14_I:%.*]] = alloca <8 x i64>, align 64
990// CHECK: [[__A_ADDR_I15_I:%.*]] = alloca <8 x i64>, align 64
991// CHECK: [[__B_ADDR_I16_I:%.*]] = alloca <8 x i64>, align 64
992// CHECK: [[_COMPOUNDLITERAL_I_I11_I:%.*]] = alloca <8 x i64>, align 64
993// CHECK: [[__A_ADDR_I12_I:%.*]] = alloca <8 x i64>, align 64
994// CHECK: [[__B_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
995// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
996// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
997// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
998// CHECK: [[A_ADDR_I:%.*]] = alloca <8 x i64>, align 64
999// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
1000// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
1001// CHECK: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
1002// CHECK: store <8 x i64> [[TMP0]], <8 x i64>* [[A_ADDR_I]], align 64
1003// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1004// CHECK: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to <16 x i32>
1005// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1006// CHECK: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP3]] to <16 x i32>
1007// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1008// CHECK: [[TMP5:%.*]] = bitcast <16 x i32> [[SHUFFLE_I]] to <8 x i64>
1009// CHECK: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1010// CHECK: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP6]] to <16 x i32>
1011// CHECK: [[TMP8:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1012// CHECK: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP8]] to <16 x i32>
1013// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP9]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1014// CHECK: [[TMP10:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I]] to <8 x i64>
1015// CHECK: store <8 x i64> [[TMP5]], <8 x i64>* [[__A_ADDR_I_I]], align 64
1016// CHECK: store <8 x i64> [[TMP10]], <8 x i64>* [[__B_ADDR_I_I]], align 64
1017// CHECK: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
1018// CHECK: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP11]] to <16 x i32>
1019// CHECK: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
1020// CHECK: [[TMP14:%.*]] = bitcast <8 x i64> [[TMP13]] to <16 x i32>
1021// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1022// CHECK: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1023// CHECK: [[TMP16:%.*]] = bitcast <8 x i64> [[TMP15]] to <16 x i32>
1024// CHECK: [[TMP17:%.*]] = icmp sgt <16 x i32> [[TMP12]], [[TMP14]]
1025// CHECK: [[TMP18:%.*]] = select <16 x i1> [[TMP17]], <16 x i32> [[TMP12]], <16 x i32> [[TMP14]]
1026// CHECK: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64>
1027// CHECK: store <8 x i64> [[TMP19]], <8 x i64>* [[A_ADDR_I]], align 64
1028// CHECK: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1029// CHECK: [[TMP21:%.*]] = bitcast <8 x i64> [[TMP20]] to <16 x i32>
1030// CHECK: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1031// CHECK: [[TMP23:%.*]] = bitcast <8 x i64> [[TMP22]] to <16 x i32>
1032// CHECK: [[SHUFFLE2_I:%.*]] = shufflevector <16 x i32> [[TMP21]], <16 x i32> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1033// CHECK: [[TMP24:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I]] to <8 x i64>
1034// CHECK: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1035// CHECK: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32>
1036// CHECK: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1037// CHECK: [[TMP28:%.*]] = bitcast <8 x i64> [[TMP27]] to <16 x i32>
1038// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> [[TMP28]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1039// CHECK: [[TMP29:%.*]] = bitcast <16 x i32> [[SHUFFLE3_I]] to <8 x i64>
1040// CHECK: store <8 x i64> [[TMP24]], <8 x i64>* [[__A_ADDR_I18_I]], align 64
1041// CHECK: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I19_I]], align 64
1042// CHECK: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I18_I]], align 64
1043// CHECK: [[TMP31:%.*]] = bitcast <8 x i64> [[TMP30]] to <16 x i32>
1044// CHECK: [[TMP32:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I19_I]], align 64
1045// CHECK: [[TMP33:%.*]] = bitcast <8 x i64> [[TMP32]] to <16 x i32>
1046// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I17_I]], align 64
1047// CHECK: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I17_I]], align 64
1048// CHECK: [[TMP35:%.*]] = bitcast <8 x i64> [[TMP34]] to <16 x i32>
1049// CHECK: [[TMP36:%.*]] = icmp sgt <16 x i32> [[TMP31]], [[TMP33]]
1050// CHECK: [[TMP37:%.*]] = select <16 x i1> [[TMP36]], <16 x i32> [[TMP31]], <16 x i32> [[TMP33]]
1051// CHECK: [[TMP38:%.*]] = bitcast <16 x i32> [[TMP37]] to <8 x i64>
1052// CHECK: store <8 x i64> [[TMP38]], <8 x i64>* [[A_ADDR_I]], align 64
1053// CHECK: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1054// CHECK: [[TMP40:%.*]] = bitcast <8 x i64> [[TMP39]] to <16 x i32>
1055// CHECK: [[TMP41:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1056// CHECK: [[TMP42:%.*]] = bitcast <8 x i64> [[TMP41]] to <16 x i32>
1057// CHECK: [[SHUFFLE5_I:%.*]] = shufflevector <16 x i32> [[TMP40]], <16 x i32> [[TMP42]], <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1058// CHECK: [[TMP43:%.*]] = bitcast <16 x i32> [[SHUFFLE5_I]] to <8 x i64>
1059// CHECK: [[TMP44:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1060// CHECK: [[TMP45:%.*]] = bitcast <8 x i64> [[TMP44]] to <16 x i32>
1061// CHECK: [[TMP46:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1062// CHECK: [[TMP47:%.*]] = bitcast <8 x i64> [[TMP46]] to <16 x i32>
1063// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <16 x i32> [[TMP45]], <16 x i32> [[TMP47]], <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1064// CHECK: [[TMP48:%.*]] = bitcast <16 x i32> [[SHUFFLE6_I]] to <8 x i64>
1065// CHECK: store <8 x i64> [[TMP43]], <8 x i64>* [[__A_ADDR_I15_I]], align 64
1066// CHECK: store <8 x i64> [[TMP48]], <8 x i64>* [[__B_ADDR_I16_I]], align 64
1067// CHECK: [[TMP49:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I15_I]], align 64
1068// CHECK: [[TMP50:%.*]] = bitcast <8 x i64> [[TMP49]] to <16 x i32>
1069// CHECK: [[TMP51:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I16_I]], align 64
1070// CHECK: [[TMP52:%.*]] = bitcast <8 x i64> [[TMP51]] to <16 x i32>
1071// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I14_I]], align 64
1072// CHECK: [[TMP53:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I14_I]], align 64
1073// CHECK: [[TMP54:%.*]] = bitcast <8 x i64> [[TMP53]] to <16 x i32>
1074// CHECK: [[TMP55:%.*]] = icmp sgt <16 x i32> [[TMP50]], [[TMP52]]
1075// CHECK: [[TMP56:%.*]] = select <16 x i1> [[TMP55]], <16 x i32> [[TMP50]], <16 x i32> [[TMP52]]
1076// CHECK: [[TMP57:%.*]] = bitcast <16 x i32> [[TMP56]] to <8 x i64>
1077// CHECK: store <8 x i64> [[TMP57]], <8 x i64>* [[A_ADDR_I]], align 64
1078// CHECK: [[TMP58:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1079// CHECK: [[TMP59:%.*]] = bitcast <8 x i64> [[TMP58]] to <16 x i32>
1080// CHECK: [[TMP60:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1081// CHECK: [[TMP61:%.*]] = bitcast <8 x i64> [[TMP60]] to <16 x i32>
1082// CHECK: [[SHUFFLE8_I:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1083// CHECK: [[TMP62:%.*]] = bitcast <16 x i32> [[SHUFFLE8_I]] to <8 x i64>
1084// CHECK: [[TMP63:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1085// CHECK: [[TMP64:%.*]] = bitcast <8 x i64> [[TMP63]] to <16 x i32>
1086// CHECK: [[TMP65:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1087// CHECK: [[TMP66:%.*]] = bitcast <8 x i64> [[TMP65]] to <16 x i32>
1088// CHECK: [[SHUFFLE9_I:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> [[TMP66]], <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1089// CHECK: [[TMP67:%.*]] = bitcast <16 x i32> [[SHUFFLE9_I]] to <8 x i64>
1090// CHECK: store <8 x i64> [[TMP62]], <8 x i64>* [[__A_ADDR_I12_I]], align 64
1091// CHECK: store <8 x i64> [[TMP67]], <8 x i64>* [[__B_ADDR_I13_I]], align 64
1092// CHECK: [[TMP68:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I12_I]], align 64
1093// CHECK: [[TMP69:%.*]] = bitcast <8 x i64> [[TMP68]] to <16 x i32>
1094// CHECK: [[TMP70:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I13_I]], align 64
1095// CHECK: [[TMP71:%.*]] = bitcast <8 x i64> [[TMP70]] to <16 x i32>
1096// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
1097// CHECK: [[TMP72:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
1098// CHECK: [[TMP73:%.*]] = bitcast <8 x i64> [[TMP72]] to <16 x i32>
1099// CHECK: [[TMP74:%.*]] = icmp sgt <16 x i32> [[TMP69]], [[TMP71]]
1100// CHECK: [[TMP75:%.*]] = select <16 x i1> [[TMP74]], <16 x i32> [[TMP69]], <16 x i32> [[TMP71]]
1101// CHECK: [[TMP76:%.*]] = bitcast <16 x i32> [[TMP75]] to <8 x i64>
1102// CHECK: store <8 x i64> [[TMP76]], <8 x i64>* [[A_ADDR_I]], align 64
1103// CHECK: [[TMP77:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1104// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP77]], i32 0
1105// CHECK: [[CONV_I:%.*]] = trunc i64 [[VECEXT_I]] to i32
1106// CHECK: ret i32 [[CONV_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001107int test_mm512_reduce_max_epi32(__m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001108 return _mm512_reduce_max_epi32(__W);
1109}
1110
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00001111// CHECK-LABEL: define i32 @test_mm512_reduce_max_epu32(<8 x i64> %__W) #0 {
1112// CHECK: [[_COMPOUNDLITERAL_I_I17_I:%.*]] = alloca <8 x i64>, align 64
1113// CHECK: [[__A_ADDR_I18_I:%.*]] = alloca <8 x i64>, align 64
1114// CHECK: [[__B_ADDR_I19_I:%.*]] = alloca <8 x i64>, align 64
1115// CHECK: [[_COMPOUNDLITERAL_I_I14_I:%.*]] = alloca <8 x i64>, align 64
1116// CHECK: [[__A_ADDR_I15_I:%.*]] = alloca <8 x i64>, align 64
1117// CHECK: [[__B_ADDR_I16_I:%.*]] = alloca <8 x i64>, align 64
1118// CHECK: [[_COMPOUNDLITERAL_I_I11_I:%.*]] = alloca <8 x i64>, align 64
1119// CHECK: [[__A_ADDR_I12_I:%.*]] = alloca <8 x i64>, align 64
1120// CHECK: [[__B_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
1121// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
1122// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
1123// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
1124// CHECK: [[A_ADDR_I:%.*]] = alloca <8 x i64>, align 64
1125// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
1126// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
1127// CHECK: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
1128// CHECK: store <8 x i64> [[TMP0]], <8 x i64>* [[A_ADDR_I]], align 64
1129// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1130// CHECK: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to <16 x i32>
1131// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1132// CHECK: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP3]] to <16 x i32>
1133// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1134// CHECK: [[TMP5:%.*]] = bitcast <16 x i32> [[SHUFFLE_I]] to <8 x i64>
1135// CHECK: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1136// CHECK: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP6]] to <16 x i32>
1137// CHECK: [[TMP8:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1138// CHECK: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP8]] to <16 x i32>
1139// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP9]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1140// CHECK: [[TMP10:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I]] to <8 x i64>
1141// CHECK: store <8 x i64> [[TMP5]], <8 x i64>* [[__A_ADDR_I_I]], align 64
1142// CHECK: store <8 x i64> [[TMP10]], <8 x i64>* [[__B_ADDR_I_I]], align 64
1143// CHECK: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
1144// CHECK: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP11]] to <16 x i32>
1145// CHECK: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
1146// CHECK: [[TMP14:%.*]] = bitcast <8 x i64> [[TMP13]] to <16 x i32>
1147// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1148// CHECK: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1149// CHECK: [[TMP16:%.*]] = bitcast <8 x i64> [[TMP15]] to <16 x i32>
1150// CHECK: [[TMP17:%.*]] = icmp ugt <16 x i32> [[TMP12]], [[TMP14]]
1151// CHECK: [[TMP18:%.*]] = select <16 x i1> [[TMP17]], <16 x i32> [[TMP12]], <16 x i32> [[TMP14]]
1152// CHECK: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64>
1153// CHECK: store <8 x i64> [[TMP19]], <8 x i64>* [[A_ADDR_I]], align 64
1154// CHECK: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1155// CHECK: [[TMP21:%.*]] = bitcast <8 x i64> [[TMP20]] to <16 x i32>
1156// CHECK: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1157// CHECK: [[TMP23:%.*]] = bitcast <8 x i64> [[TMP22]] to <16 x i32>
1158// CHECK: [[SHUFFLE2_I:%.*]] = shufflevector <16 x i32> [[TMP21]], <16 x i32> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1159// CHECK: [[TMP24:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I]] to <8 x i64>
1160// CHECK: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1161// CHECK: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32>
1162// CHECK: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1163// CHECK: [[TMP28:%.*]] = bitcast <8 x i64> [[TMP27]] to <16 x i32>
1164// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> [[TMP28]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1165// CHECK: [[TMP29:%.*]] = bitcast <16 x i32> [[SHUFFLE3_I]] to <8 x i64>
1166// CHECK: store <8 x i64> [[TMP24]], <8 x i64>* [[__A_ADDR_I18_I]], align 64
1167// CHECK: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I19_I]], align 64
1168// CHECK: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I18_I]], align 64
1169// CHECK: [[TMP31:%.*]] = bitcast <8 x i64> [[TMP30]] to <16 x i32>
1170// CHECK: [[TMP32:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I19_I]], align 64
1171// CHECK: [[TMP33:%.*]] = bitcast <8 x i64> [[TMP32]] to <16 x i32>
1172// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I17_I]], align 64
1173// CHECK: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I17_I]], align 64
1174// CHECK: [[TMP35:%.*]] = bitcast <8 x i64> [[TMP34]] to <16 x i32>
1175// CHECK: [[TMP36:%.*]] = icmp ugt <16 x i32> [[TMP31]], [[TMP33]]
1176// CHECK: [[TMP37:%.*]] = select <16 x i1> [[TMP36]], <16 x i32> [[TMP31]], <16 x i32> [[TMP33]]
1177// CHECK: [[TMP38:%.*]] = bitcast <16 x i32> [[TMP37]] to <8 x i64>
1178// CHECK: store <8 x i64> [[TMP38]], <8 x i64>* [[A_ADDR_I]], align 64
1179// CHECK: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1180// CHECK: [[TMP40:%.*]] = bitcast <8 x i64> [[TMP39]] to <16 x i32>
1181// CHECK: [[TMP41:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1182// CHECK: [[TMP42:%.*]] = bitcast <8 x i64> [[TMP41]] to <16 x i32>
1183// CHECK: [[SHUFFLE5_I:%.*]] = shufflevector <16 x i32> [[TMP40]], <16 x i32> [[TMP42]], <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1184// CHECK: [[TMP43:%.*]] = bitcast <16 x i32> [[SHUFFLE5_I]] to <8 x i64>
1185// CHECK: [[TMP44:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1186// CHECK: [[TMP45:%.*]] = bitcast <8 x i64> [[TMP44]] to <16 x i32>
1187// CHECK: [[TMP46:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1188// CHECK: [[TMP47:%.*]] = bitcast <8 x i64> [[TMP46]] to <16 x i32>
1189// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <16 x i32> [[TMP45]], <16 x i32> [[TMP47]], <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1190// CHECK: [[TMP48:%.*]] = bitcast <16 x i32> [[SHUFFLE6_I]] to <8 x i64>
1191// CHECK: store <8 x i64> [[TMP43]], <8 x i64>* [[__A_ADDR_I15_I]], align 64
1192// CHECK: store <8 x i64> [[TMP48]], <8 x i64>* [[__B_ADDR_I16_I]], align 64
1193// CHECK: [[TMP49:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I15_I]], align 64
1194// CHECK: [[TMP50:%.*]] = bitcast <8 x i64> [[TMP49]] to <16 x i32>
1195// CHECK: [[TMP51:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I16_I]], align 64
1196// CHECK: [[TMP52:%.*]] = bitcast <8 x i64> [[TMP51]] to <16 x i32>
1197// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I14_I]], align 64
1198// CHECK: [[TMP53:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I14_I]], align 64
1199// CHECK: [[TMP54:%.*]] = bitcast <8 x i64> [[TMP53]] to <16 x i32>
1200// CHECK: [[TMP55:%.*]] = icmp ugt <16 x i32> [[TMP50]], [[TMP52]]
1201// CHECK: [[TMP56:%.*]] = select <16 x i1> [[TMP55]], <16 x i32> [[TMP50]], <16 x i32> [[TMP52]]
1202// CHECK: [[TMP57:%.*]] = bitcast <16 x i32> [[TMP56]] to <8 x i64>
1203// CHECK: store <8 x i64> [[TMP57]], <8 x i64>* [[A_ADDR_I]], align 64
1204// CHECK: [[TMP58:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1205// CHECK: [[TMP59:%.*]] = bitcast <8 x i64> [[TMP58]] to <16 x i32>
1206// CHECK: [[TMP60:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1207// CHECK: [[TMP61:%.*]] = bitcast <8 x i64> [[TMP60]] to <16 x i32>
1208// CHECK: [[SHUFFLE8_I:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1209// CHECK: [[TMP62:%.*]] = bitcast <16 x i32> [[SHUFFLE8_I]] to <8 x i64>
1210// CHECK: [[TMP63:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1211// CHECK: [[TMP64:%.*]] = bitcast <8 x i64> [[TMP63]] to <16 x i32>
1212// CHECK: [[TMP65:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1213// CHECK: [[TMP66:%.*]] = bitcast <8 x i64> [[TMP65]] to <16 x i32>
1214// CHECK: [[SHUFFLE9_I:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> [[TMP66]], <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1215// CHECK: [[TMP67:%.*]] = bitcast <16 x i32> [[SHUFFLE9_I]] to <8 x i64>
1216// CHECK: store <8 x i64> [[TMP62]], <8 x i64>* [[__A_ADDR_I12_I]], align 64
1217// CHECK: store <8 x i64> [[TMP67]], <8 x i64>* [[__B_ADDR_I13_I]], align 64
1218// CHECK: [[TMP68:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I12_I]], align 64
1219// CHECK: [[TMP69:%.*]] = bitcast <8 x i64> [[TMP68]] to <16 x i32>
1220// CHECK: [[TMP70:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I13_I]], align 64
1221// CHECK: [[TMP71:%.*]] = bitcast <8 x i64> [[TMP70]] to <16 x i32>
1222// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
1223// CHECK: [[TMP72:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
1224// CHECK: [[TMP73:%.*]] = bitcast <8 x i64> [[TMP72]] to <16 x i32>
1225// CHECK: [[TMP74:%.*]] = icmp ugt <16 x i32> [[TMP69]], [[TMP71]]
1226// CHECK: [[TMP75:%.*]] = select <16 x i1> [[TMP74]], <16 x i32> [[TMP69]], <16 x i32> [[TMP71]]
1227// CHECK: [[TMP76:%.*]] = bitcast <16 x i32> [[TMP75]] to <8 x i64>
1228// CHECK: store <8 x i64> [[TMP76]], <8 x i64>* [[A_ADDR_I]], align 64
1229// CHECK: [[TMP77:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1230// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP77]], i32 0
1231// CHECK: [[CONV_I:%.*]] = trunc i64 [[VECEXT_I]] to i32
1232// CHECK: ret i32 [[CONV_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001233unsigned int test_mm512_reduce_max_epu32(__m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001234 return _mm512_reduce_max_epu32(__W);
1235}
1236
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00001237// CHECK-LABEL: define float @test_mm512_reduce_max_ps(<16 x float> %__W) #0 {
1238// CHECK: [[_COMPOUNDLITERAL_I_I17_I:%.*]] = alloca <16 x float>, align 64
1239// CHECK: [[__A_ADDR_I18_I:%.*]] = alloca <16 x float>, align 64
1240// CHECK: [[__B_ADDR_I19_I:%.*]] = alloca <16 x float>, align 64
1241// CHECK: [[_COMPOUNDLITERAL_I_I14_I:%.*]] = alloca <16 x float>, align 64
1242// CHECK: [[__A_ADDR_I15_I:%.*]] = alloca <16 x float>, align 64
1243// CHECK: [[__B_ADDR_I16_I:%.*]] = alloca <16 x float>, align 64
1244// CHECK: [[_COMPOUNDLITERAL_I_I11_I:%.*]] = alloca <16 x float>, align 64
1245// CHECK: [[__A_ADDR_I12_I:%.*]] = alloca <16 x float>, align 64
1246// CHECK: [[__B_ADDR_I13_I:%.*]] = alloca <16 x float>, align 64
1247// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <16 x float>, align 64
1248// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64
1249// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <16 x float>, align 64
1250// CHECK: [[A_ADDR_I:%.*]] = alloca <16 x float>, align 64
1251// CHECK: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64
1252// CHECK: store <16 x float> %__W, <16 x float>* [[__W_ADDR]], align 64
1253// CHECK: [[TMP0:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64
1254// CHECK: store <16 x float> [[TMP0]], <16 x float>* [[A_ADDR_I]], align 64
1255// CHECK: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1256// CHECK: [[TMP2:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1257// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1258// CHECK: [[TMP3:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1259// CHECK: [[TMP4:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1260// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> [[TMP4]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1261// CHECK: store <16 x float> [[SHUFFLE_I]], <16 x float>* [[__A_ADDR_I_I]], align 64
1262// CHECK: store <16 x float> [[SHUFFLE1_I]], <16 x float>* [[__B_ADDR_I_I]], align 64
1263// CHECK: [[TMP5:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64
1264// CHECK: [[TMP6:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I_I]], align 64
1265// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1266// CHECK: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1267// CHECK: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> [[TMP5]], <16 x float> [[TMP6]], <16 x float> [[TMP7]], i16 -1, i32 4) #2
1268// CHECK: store <16 x float> [[TMP8]], <16 x float>* [[A_ADDR_I]], align 64
1269// CHECK: [[TMP9:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1270// CHECK: [[TMP10:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1271// CHECK: [[SHUFFLE2_I:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1272// CHECK: [[TMP11:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1273// CHECK: [[TMP12:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1274// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> [[TMP12]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1275// CHECK: store <16 x float> [[SHUFFLE2_I]], <16 x float>* [[__A_ADDR_I18_I]], align 64
1276// CHECK: store <16 x float> [[SHUFFLE3_I]], <16 x float>* [[__B_ADDR_I19_I]], align 64
1277// CHECK: [[TMP13:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I18_I]], align 64
1278// CHECK: [[TMP14:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I19_I]], align 64
1279// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I17_I]], align 64
1280// CHECK: [[TMP15:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I17_I]], align 64
1281// CHECK: [[TMP16:%.*]] = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> [[TMP13]], <16 x float> [[TMP14]], <16 x float> [[TMP15]], i16 -1, i32 4) #2
1282// CHECK: store <16 x float> [[TMP16]], <16 x float>* [[A_ADDR_I]], align 64
1283// CHECK: [[TMP17:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1284// CHECK: [[TMP18:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1285// CHECK: [[SHUFFLE5_I:%.*]] = shufflevector <16 x float> [[TMP17]], <16 x float> [[TMP18]], <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1286// CHECK: [[TMP19:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1287// CHECK: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1288// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1289// CHECK: store <16 x float> [[SHUFFLE5_I]], <16 x float>* [[__A_ADDR_I15_I]], align 64
1290// CHECK: store <16 x float> [[SHUFFLE6_I]], <16 x float>* [[__B_ADDR_I16_I]], align 64
1291// CHECK: [[TMP21:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I15_I]], align 64
1292// CHECK: [[TMP22:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I16_I]], align 64
1293// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I14_I]], align 64
1294// CHECK: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I14_I]], align 64
1295// CHECK: [[TMP24:%.*]] = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x float> [[TMP23]], i16 -1, i32 4) #2
1296// CHECK: store <16 x float> [[TMP24]], <16 x float>* [[A_ADDR_I]], align 64
1297// CHECK: [[TMP25:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1298// CHECK: [[TMP26:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1299// CHECK: [[SHUFFLE8_I:%.*]] = shufflevector <16 x float> [[TMP25]], <16 x float> [[TMP26]], <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1300// CHECK: [[TMP27:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1301// CHECK: [[TMP28:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1302// CHECK: [[SHUFFLE9_I:%.*]] = shufflevector <16 x float> [[TMP27]], <16 x float> [[TMP28]], <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1303// CHECK: store <16 x float> [[SHUFFLE8_I]], <16 x float>* [[__A_ADDR_I12_I]], align 64
1304// CHECK: store <16 x float> [[SHUFFLE9_I]], <16 x float>* [[__B_ADDR_I13_I]], align 64
1305// CHECK: [[TMP29:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I12_I]], align 64
1306// CHECK: [[TMP30:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I13_I]], align 64
1307// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
1308// CHECK: [[TMP31:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
1309// CHECK: [[TMP32:%.*]] = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> [[TMP29]], <16 x float> [[TMP30]], <16 x float> [[TMP31]], i16 -1, i32 4) #2
1310// CHECK: store <16 x float> [[TMP32]], <16 x float>* [[A_ADDR_I]], align 64
1311// CHECK: [[TMP33:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1312// CHECK: [[VECEXT_I:%.*]] = extractelement <16 x float> [[TMP33]], i32 0
1313// CHECK: ret float [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001314float test_mm512_reduce_max_ps(__m512 __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001315 return _mm512_reduce_max_ps(__W);
1316}
1317
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00001318// CHECK-LABEL: define i32 @test_mm512_reduce_min_epi32(<8 x i64> %__W) #0 {
1319// CHECK: [[_COMPOUNDLITERAL_I_I17_I:%.*]] = alloca <8 x i64>, align 64
1320// CHECK: [[__A_ADDR_I18_I:%.*]] = alloca <8 x i64>, align 64
1321// CHECK: [[__B_ADDR_I19_I:%.*]] = alloca <8 x i64>, align 64
1322// CHECK: [[_COMPOUNDLITERAL_I_I14_I:%.*]] = alloca <8 x i64>, align 64
1323// CHECK: [[__A_ADDR_I15_I:%.*]] = alloca <8 x i64>, align 64
1324// CHECK: [[__B_ADDR_I16_I:%.*]] = alloca <8 x i64>, align 64
1325// CHECK: [[_COMPOUNDLITERAL_I_I11_I:%.*]] = alloca <8 x i64>, align 64
1326// CHECK: [[__A_ADDR_I12_I:%.*]] = alloca <8 x i64>, align 64
1327// CHECK: [[__B_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
1328// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
1329// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
1330// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
1331// CHECK: [[A_ADDR_I:%.*]] = alloca <8 x i64>, align 64
1332// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
1333// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
1334// CHECK: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
1335// CHECK: store <8 x i64> [[TMP0]], <8 x i64>* [[A_ADDR_I]], align 64
1336// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1337// CHECK: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to <16 x i32>
1338// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1339// CHECK: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP3]] to <16 x i32>
1340// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1341// CHECK: [[TMP5:%.*]] = bitcast <16 x i32> [[SHUFFLE_I]] to <8 x i64>
1342// CHECK: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1343// CHECK: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP6]] to <16 x i32>
1344// CHECK: [[TMP8:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1345// CHECK: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP8]] to <16 x i32>
1346// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP9]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1347// CHECK: [[TMP10:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I]] to <8 x i64>
1348// CHECK: store <8 x i64> [[TMP5]], <8 x i64>* [[__A_ADDR_I_I]], align 64
1349// CHECK: store <8 x i64> [[TMP10]], <8 x i64>* [[__B_ADDR_I_I]], align 64
1350// CHECK: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
1351// CHECK: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP11]] to <16 x i32>
1352// CHECK: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
1353// CHECK: [[TMP14:%.*]] = bitcast <8 x i64> [[TMP13]] to <16 x i32>
1354// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1355// CHECK: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1356// CHECK: [[TMP16:%.*]] = bitcast <8 x i64> [[TMP15]] to <16 x i32>
1357// CHECK: [[TMP17:%.*]] = icmp slt <16 x i32> [[TMP12]], [[TMP14]]
1358// CHECK: [[TMP18:%.*]] = select <16 x i1> [[TMP17]], <16 x i32> [[TMP12]], <16 x i32> [[TMP14]]
1359// CHECK: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64>
1360// CHECK: store <8 x i64> [[TMP19]], <8 x i64>* [[A_ADDR_I]], align 64
1361// CHECK: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1362// CHECK: [[TMP21:%.*]] = bitcast <8 x i64> [[TMP20]] to <16 x i32>
1363// CHECK: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1364// CHECK: [[TMP23:%.*]] = bitcast <8 x i64> [[TMP22]] to <16 x i32>
1365// CHECK: [[SHUFFLE2_I:%.*]] = shufflevector <16 x i32> [[TMP21]], <16 x i32> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1366// CHECK: [[TMP24:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I]] to <8 x i64>
1367// CHECK: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1368// CHECK: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32>
1369// CHECK: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1370// CHECK: [[TMP28:%.*]] = bitcast <8 x i64> [[TMP27]] to <16 x i32>
1371// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> [[TMP28]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1372// CHECK: [[TMP29:%.*]] = bitcast <16 x i32> [[SHUFFLE3_I]] to <8 x i64>
1373// CHECK: store <8 x i64> [[TMP24]], <8 x i64>* [[__A_ADDR_I18_I]], align 64
1374// CHECK: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I19_I]], align 64
1375// CHECK: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I18_I]], align 64
1376// CHECK: [[TMP31:%.*]] = bitcast <8 x i64> [[TMP30]] to <16 x i32>
1377// CHECK: [[TMP32:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I19_I]], align 64
1378// CHECK: [[TMP33:%.*]] = bitcast <8 x i64> [[TMP32]] to <16 x i32>
1379// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I17_I]], align 64
1380// CHECK: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I17_I]], align 64
1381// CHECK: [[TMP35:%.*]] = bitcast <8 x i64> [[TMP34]] to <16 x i32>
1382// CHECK: [[TMP36:%.*]] = icmp slt <16 x i32> [[TMP31]], [[TMP33]]
1383// CHECK: [[TMP37:%.*]] = select <16 x i1> [[TMP36]], <16 x i32> [[TMP31]], <16 x i32> [[TMP33]]
1384// CHECK: [[TMP38:%.*]] = bitcast <16 x i32> [[TMP37]] to <8 x i64>
1385// CHECK: store <8 x i64> [[TMP38]], <8 x i64>* [[A_ADDR_I]], align 64
1386// CHECK: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1387// CHECK: [[TMP40:%.*]] = bitcast <8 x i64> [[TMP39]] to <16 x i32>
1388// CHECK: [[TMP41:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1389// CHECK: [[TMP42:%.*]] = bitcast <8 x i64> [[TMP41]] to <16 x i32>
1390// CHECK: [[SHUFFLE5_I:%.*]] = shufflevector <16 x i32> [[TMP40]], <16 x i32> [[TMP42]], <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1391// CHECK: [[TMP43:%.*]] = bitcast <16 x i32> [[SHUFFLE5_I]] to <8 x i64>
1392// CHECK: [[TMP44:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1393// CHECK: [[TMP45:%.*]] = bitcast <8 x i64> [[TMP44]] to <16 x i32>
1394// CHECK: [[TMP46:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1395// CHECK: [[TMP47:%.*]] = bitcast <8 x i64> [[TMP46]] to <16 x i32>
1396// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <16 x i32> [[TMP45]], <16 x i32> [[TMP47]], <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1397// CHECK: [[TMP48:%.*]] = bitcast <16 x i32> [[SHUFFLE6_I]] to <8 x i64>
1398// CHECK: store <8 x i64> [[TMP43]], <8 x i64>* [[__A_ADDR_I15_I]], align 64
1399// CHECK: store <8 x i64> [[TMP48]], <8 x i64>* [[__B_ADDR_I16_I]], align 64
1400// CHECK: [[TMP49:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I15_I]], align 64
1401// CHECK: [[TMP50:%.*]] = bitcast <8 x i64> [[TMP49]] to <16 x i32>
1402// CHECK: [[TMP51:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I16_I]], align 64
1403// CHECK: [[TMP52:%.*]] = bitcast <8 x i64> [[TMP51]] to <16 x i32>
1404// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I14_I]], align 64
1405// CHECK: [[TMP53:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I14_I]], align 64
1406// CHECK: [[TMP54:%.*]] = bitcast <8 x i64> [[TMP53]] to <16 x i32>
1407// CHECK: [[TMP55:%.*]] = icmp slt <16 x i32> [[TMP50]], [[TMP52]]
1408// CHECK: [[TMP56:%.*]] = select <16 x i1> [[TMP55]], <16 x i32> [[TMP50]], <16 x i32> [[TMP52]]
1409// CHECK: [[TMP57:%.*]] = bitcast <16 x i32> [[TMP56]] to <8 x i64>
1410// CHECK: store <8 x i64> [[TMP57]], <8 x i64>* [[A_ADDR_I]], align 64
1411// CHECK: [[TMP58:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1412// CHECK: [[TMP59:%.*]] = bitcast <8 x i64> [[TMP58]] to <16 x i32>
1413// CHECK: [[TMP60:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1414// CHECK: [[TMP61:%.*]] = bitcast <8 x i64> [[TMP60]] to <16 x i32>
1415// CHECK: [[SHUFFLE8_I:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1416// CHECK: [[TMP62:%.*]] = bitcast <16 x i32> [[SHUFFLE8_I]] to <8 x i64>
1417// CHECK: [[TMP63:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1418// CHECK: [[TMP64:%.*]] = bitcast <8 x i64> [[TMP63]] to <16 x i32>
1419// CHECK: [[TMP65:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1420// CHECK: [[TMP66:%.*]] = bitcast <8 x i64> [[TMP65]] to <16 x i32>
1421// CHECK: [[SHUFFLE9_I:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> [[TMP66]], <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1422// CHECK: [[TMP67:%.*]] = bitcast <16 x i32> [[SHUFFLE9_I]] to <8 x i64>
1423// CHECK: store <8 x i64> [[TMP62]], <8 x i64>* [[__A_ADDR_I12_I]], align 64
1424// CHECK: store <8 x i64> [[TMP67]], <8 x i64>* [[__B_ADDR_I13_I]], align 64
1425// CHECK: [[TMP68:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I12_I]], align 64
1426// CHECK: [[TMP69:%.*]] = bitcast <8 x i64> [[TMP68]] to <16 x i32>
1427// CHECK: [[TMP70:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I13_I]], align 64
1428// CHECK: [[TMP71:%.*]] = bitcast <8 x i64> [[TMP70]] to <16 x i32>
1429// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
1430// CHECK: [[TMP72:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
1431// CHECK: [[TMP73:%.*]] = bitcast <8 x i64> [[TMP72]] to <16 x i32>
1432// CHECK: [[TMP74:%.*]] = icmp slt <16 x i32> [[TMP69]], [[TMP71]]
1433// CHECK: [[TMP75:%.*]] = select <16 x i1> [[TMP74]], <16 x i32> [[TMP69]], <16 x i32> [[TMP71]]
1434// CHECK: [[TMP76:%.*]] = bitcast <16 x i32> [[TMP75]] to <8 x i64>
1435// CHECK: store <8 x i64> [[TMP76]], <8 x i64>* [[A_ADDR_I]], align 64
1436// CHECK: [[TMP77:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1437// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP77]], i32 0
1438// CHECK: [[CONV_I:%.*]] = trunc i64 [[VECEXT_I]] to i32
1439// CHECK: ret i32 [[CONV_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001440int test_mm512_reduce_min_epi32(__m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001441 return _mm512_reduce_min_epi32(__W);
1442}
1443
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00001444// CHECK-LABEL: define i32 @test_mm512_reduce_min_epu32(<8 x i64> %__W) #0 {
1445// CHECK: [[_COMPOUNDLITERAL_I_I17_I:%.*]] = alloca <8 x i64>, align 64
1446// CHECK: [[__A_ADDR_I18_I:%.*]] = alloca <8 x i64>, align 64
1447// CHECK: [[__B_ADDR_I19_I:%.*]] = alloca <8 x i64>, align 64
1448// CHECK: [[_COMPOUNDLITERAL_I_I14_I:%.*]] = alloca <8 x i64>, align 64
1449// CHECK: [[__A_ADDR_I15_I:%.*]] = alloca <8 x i64>, align 64
1450// CHECK: [[__B_ADDR_I16_I:%.*]] = alloca <8 x i64>, align 64
1451// CHECK: [[_COMPOUNDLITERAL_I_I11_I:%.*]] = alloca <8 x i64>, align 64
1452// CHECK: [[__A_ADDR_I12_I:%.*]] = alloca <8 x i64>, align 64
1453// CHECK: [[__B_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
1454// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
1455// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
1456// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
1457// CHECK: [[A_ADDR_I:%.*]] = alloca <8 x i64>, align 64
1458// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
1459// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
1460// CHECK: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
1461// CHECK: store <8 x i64> [[TMP0]], <8 x i64>* [[A_ADDR_I]], align 64
1462// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1463// CHECK: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to <16 x i32>
1464// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1465// CHECK: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP3]] to <16 x i32>
1466// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1467// CHECK: [[TMP5:%.*]] = bitcast <16 x i32> [[SHUFFLE_I]] to <8 x i64>
1468// CHECK: [[TMP6:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1469// CHECK: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP6]] to <16 x i32>
1470// CHECK: [[TMP8:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1471// CHECK: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP8]] to <16 x i32>
1472// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <16 x i32> [[TMP7]], <16 x i32> [[TMP9]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1473// CHECK: [[TMP10:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I]] to <8 x i64>
1474// CHECK: store <8 x i64> [[TMP5]], <8 x i64>* [[__A_ADDR_I_I]], align 64
1475// CHECK: store <8 x i64> [[TMP10]], <8 x i64>* [[__B_ADDR_I_I]], align 64
1476// CHECK: [[TMP11:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
1477// CHECK: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP11]] to <16 x i32>
1478// CHECK: [[TMP13:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
1479// CHECK: [[TMP14:%.*]] = bitcast <8 x i64> [[TMP13]] to <16 x i32>
1480// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1481// CHECK: [[TMP15:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1482// CHECK: [[TMP16:%.*]] = bitcast <8 x i64> [[TMP15]] to <16 x i32>
1483// CHECK: [[TMP17:%.*]] = icmp ult <16 x i32> [[TMP12]], [[TMP14]]
1484// CHECK: [[TMP18:%.*]] = select <16 x i1> [[TMP17]], <16 x i32> [[TMP12]], <16 x i32> [[TMP14]]
1485// CHECK: [[TMP19:%.*]] = bitcast <16 x i32> [[TMP18]] to <8 x i64>
1486// CHECK: store <8 x i64> [[TMP19]], <8 x i64>* [[A_ADDR_I]], align 64
1487// CHECK: [[TMP20:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1488// CHECK: [[TMP21:%.*]] = bitcast <8 x i64> [[TMP20]] to <16 x i32>
1489// CHECK: [[TMP22:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1490// CHECK: [[TMP23:%.*]] = bitcast <8 x i64> [[TMP22]] to <16 x i32>
1491// CHECK: [[SHUFFLE2_I:%.*]] = shufflevector <16 x i32> [[TMP21]], <16 x i32> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1492// CHECK: [[TMP24:%.*]] = bitcast <16 x i32> [[SHUFFLE2_I]] to <8 x i64>
1493// CHECK: [[TMP25:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1494// CHECK: [[TMP26:%.*]] = bitcast <8 x i64> [[TMP25]] to <16 x i32>
1495// CHECK: [[TMP27:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1496// CHECK: [[TMP28:%.*]] = bitcast <8 x i64> [[TMP27]] to <16 x i32>
1497// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <16 x i32> [[TMP26]], <16 x i32> [[TMP28]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1498// CHECK: [[TMP29:%.*]] = bitcast <16 x i32> [[SHUFFLE3_I]] to <8 x i64>
1499// CHECK: store <8 x i64> [[TMP24]], <8 x i64>* [[__A_ADDR_I18_I]], align 64
1500// CHECK: store <8 x i64> [[TMP29]], <8 x i64>* [[__B_ADDR_I19_I]], align 64
1501// CHECK: [[TMP30:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I18_I]], align 64
1502// CHECK: [[TMP31:%.*]] = bitcast <8 x i64> [[TMP30]] to <16 x i32>
1503// CHECK: [[TMP32:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I19_I]], align 64
1504// CHECK: [[TMP33:%.*]] = bitcast <8 x i64> [[TMP32]] to <16 x i32>
1505// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I17_I]], align 64
1506// CHECK: [[TMP34:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I17_I]], align 64
1507// CHECK: [[TMP35:%.*]] = bitcast <8 x i64> [[TMP34]] to <16 x i32>
1508// CHECK: [[TMP36:%.*]] = icmp ult <16 x i32> [[TMP31]], [[TMP33]]
1509// CHECK: [[TMP37:%.*]] = select <16 x i1> [[TMP36]], <16 x i32> [[TMP31]], <16 x i32> [[TMP33]]
1510// CHECK: [[TMP38:%.*]] = bitcast <16 x i32> [[TMP37]] to <8 x i64>
1511// CHECK: store <8 x i64> [[TMP38]], <8 x i64>* [[A_ADDR_I]], align 64
1512// CHECK: [[TMP39:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1513// CHECK: [[TMP40:%.*]] = bitcast <8 x i64> [[TMP39]] to <16 x i32>
1514// CHECK: [[TMP41:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1515// CHECK: [[TMP42:%.*]] = bitcast <8 x i64> [[TMP41]] to <16 x i32>
1516// CHECK: [[SHUFFLE5_I:%.*]] = shufflevector <16 x i32> [[TMP40]], <16 x i32> [[TMP42]], <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1517// CHECK: [[TMP43:%.*]] = bitcast <16 x i32> [[SHUFFLE5_I]] to <8 x i64>
1518// CHECK: [[TMP44:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1519// CHECK: [[TMP45:%.*]] = bitcast <8 x i64> [[TMP44]] to <16 x i32>
1520// CHECK: [[TMP46:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1521// CHECK: [[TMP47:%.*]] = bitcast <8 x i64> [[TMP46]] to <16 x i32>
1522// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <16 x i32> [[TMP45]], <16 x i32> [[TMP47]], <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1523// CHECK: [[TMP48:%.*]] = bitcast <16 x i32> [[SHUFFLE6_I]] to <8 x i64>
1524// CHECK: store <8 x i64> [[TMP43]], <8 x i64>* [[__A_ADDR_I15_I]], align 64
1525// CHECK: store <8 x i64> [[TMP48]], <8 x i64>* [[__B_ADDR_I16_I]], align 64
1526// CHECK: [[TMP49:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I15_I]], align 64
1527// CHECK: [[TMP50:%.*]] = bitcast <8 x i64> [[TMP49]] to <16 x i32>
1528// CHECK: [[TMP51:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I16_I]], align 64
1529// CHECK: [[TMP52:%.*]] = bitcast <8 x i64> [[TMP51]] to <16 x i32>
1530// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I14_I]], align 64
1531// CHECK: [[TMP53:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I14_I]], align 64
1532// CHECK: [[TMP54:%.*]] = bitcast <8 x i64> [[TMP53]] to <16 x i32>
1533// CHECK: [[TMP55:%.*]] = icmp ult <16 x i32> [[TMP50]], [[TMP52]]
1534// CHECK: [[TMP56:%.*]] = select <16 x i1> [[TMP55]], <16 x i32> [[TMP50]], <16 x i32> [[TMP52]]
1535// CHECK: [[TMP57:%.*]] = bitcast <16 x i32> [[TMP56]] to <8 x i64>
1536// CHECK: store <8 x i64> [[TMP57]], <8 x i64>* [[A_ADDR_I]], align 64
1537// CHECK: [[TMP58:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1538// CHECK: [[TMP59:%.*]] = bitcast <8 x i64> [[TMP58]] to <16 x i32>
1539// CHECK: [[TMP60:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1540// CHECK: [[TMP61:%.*]] = bitcast <8 x i64> [[TMP60]] to <16 x i32>
1541// CHECK: [[SHUFFLE8_I:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP61]], <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1542// CHECK: [[TMP62:%.*]] = bitcast <16 x i32> [[SHUFFLE8_I]] to <8 x i64>
1543// CHECK: [[TMP63:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1544// CHECK: [[TMP64:%.*]] = bitcast <8 x i64> [[TMP63]] to <16 x i32>
1545// CHECK: [[TMP65:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1546// CHECK: [[TMP66:%.*]] = bitcast <8 x i64> [[TMP65]] to <16 x i32>
1547// CHECK: [[SHUFFLE9_I:%.*]] = shufflevector <16 x i32> [[TMP64]], <16 x i32> [[TMP66]], <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1548// CHECK: [[TMP67:%.*]] = bitcast <16 x i32> [[SHUFFLE9_I]] to <8 x i64>
1549// CHECK: store <8 x i64> [[TMP62]], <8 x i64>* [[__A_ADDR_I12_I]], align 64
1550// CHECK: store <8 x i64> [[TMP67]], <8 x i64>* [[__B_ADDR_I13_I]], align 64
1551// CHECK: [[TMP68:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I12_I]], align 64
1552// CHECK: [[TMP69:%.*]] = bitcast <8 x i64> [[TMP68]] to <16 x i32>
1553// CHECK: [[TMP70:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I13_I]], align 64
1554// CHECK: [[TMP71:%.*]] = bitcast <8 x i64> [[TMP70]] to <16 x i32>
1555// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
1556// CHECK: [[TMP72:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
1557// CHECK: [[TMP73:%.*]] = bitcast <8 x i64> [[TMP72]] to <16 x i32>
1558// CHECK: [[TMP74:%.*]] = icmp ult <16 x i32> [[TMP69]], [[TMP71]]
1559// CHECK: [[TMP75:%.*]] = select <16 x i1> [[TMP74]], <16 x i32> [[TMP69]], <16 x i32> [[TMP71]]
1560// CHECK: [[TMP76:%.*]] = bitcast <16 x i32> [[TMP75]] to <8 x i64>
1561// CHECK: store <8 x i64> [[TMP76]], <8 x i64>* [[A_ADDR_I]], align 64
1562// CHECK: [[TMP77:%.*]] = load <8 x i64>, <8 x i64>* [[A_ADDR_I]], align 64
1563// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP77]], i32 0
1564// CHECK: [[CONV_I:%.*]] = trunc i64 [[VECEXT_I]] to i32
1565// CHECK: ret i32 [[CONV_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001566unsigned int test_mm512_reduce_min_epu32(__m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001567 return _mm512_reduce_min_epu32(__W);
1568}
1569
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00001570// CHECK-LABEL: define float @test_mm512_reduce_min_ps(<16 x float> %__W) #0 {
1571// CHECK: [[_COMPOUNDLITERAL_I_I17_I:%.*]] = alloca <16 x float>, align 64
1572// CHECK: [[__A_ADDR_I18_I:%.*]] = alloca <16 x float>, align 64
1573// CHECK: [[__B_ADDR_I19_I:%.*]] = alloca <16 x float>, align 64
1574// CHECK: [[_COMPOUNDLITERAL_I_I14_I:%.*]] = alloca <16 x float>, align 64
1575// CHECK: [[__A_ADDR_I15_I:%.*]] = alloca <16 x float>, align 64
1576// CHECK: [[__B_ADDR_I16_I:%.*]] = alloca <16 x float>, align 64
1577// CHECK: [[_COMPOUNDLITERAL_I_I11_I:%.*]] = alloca <16 x float>, align 64
1578// CHECK: [[__A_ADDR_I12_I:%.*]] = alloca <16 x float>, align 64
1579// CHECK: [[__B_ADDR_I13_I:%.*]] = alloca <16 x float>, align 64
1580// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <16 x float>, align 64
1581// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64
1582// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <16 x float>, align 64
1583// CHECK: [[A_ADDR_I:%.*]] = alloca <16 x float>, align 64
1584// CHECK: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64
1585// CHECK: store <16 x float> %__W, <16 x float>* [[__W_ADDR]], align 64
1586// CHECK: [[TMP0:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64
1587// CHECK: store <16 x float> [[TMP0]], <16 x float>* [[A_ADDR_I]], align 64
1588// CHECK: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1589// CHECK: [[TMP2:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1590// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1591// CHECK: [[TMP3:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1592// CHECK: [[TMP4:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1593// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <16 x float> [[TMP3]], <16 x float> [[TMP4]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1594// CHECK: store <16 x float> [[SHUFFLE_I]], <16 x float>* [[__A_ADDR_I_I]], align 64
1595// CHECK: store <16 x float> [[SHUFFLE1_I]], <16 x float>* [[__B_ADDR_I_I]], align 64
1596// CHECK: [[TMP5:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64
1597// CHECK: [[TMP6:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I_I]], align 64
1598// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1599// CHECK: [[TMP7:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1600// CHECK: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> [[TMP5]], <16 x float> [[TMP6]], <16 x float> [[TMP7]], i16 -1, i32 4) #2
1601// CHECK: store <16 x float> [[TMP8]], <16 x float>* [[A_ADDR_I]], align 64
1602// CHECK: [[TMP9:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1603// CHECK: [[TMP10:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1604// CHECK: [[SHUFFLE2_I:%.*]] = shufflevector <16 x float> [[TMP9]], <16 x float> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1605// CHECK: [[TMP11:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1606// CHECK: [[TMP12:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1607// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> [[TMP12]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1608// CHECK: store <16 x float> [[SHUFFLE2_I]], <16 x float>* [[__A_ADDR_I18_I]], align 64
1609// CHECK: store <16 x float> [[SHUFFLE3_I]], <16 x float>* [[__B_ADDR_I19_I]], align 64
1610// CHECK: [[TMP13:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I18_I]], align 64
1611// CHECK: [[TMP14:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I19_I]], align 64
1612// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I17_I]], align 64
1613// CHECK: [[TMP15:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I17_I]], align 64
1614// CHECK: [[TMP16:%.*]] = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> [[TMP13]], <16 x float> [[TMP14]], <16 x float> [[TMP15]], i16 -1, i32 4) #2
1615// CHECK: store <16 x float> [[TMP16]], <16 x float>* [[A_ADDR_I]], align 64
1616// CHECK: [[TMP17:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1617// CHECK: [[TMP18:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1618// CHECK: [[SHUFFLE5_I:%.*]] = shufflevector <16 x float> [[TMP17]], <16 x float> [[TMP18]], <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1619// CHECK: [[TMP19:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1620// CHECK: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1621// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1622// CHECK: store <16 x float> [[SHUFFLE5_I]], <16 x float>* [[__A_ADDR_I15_I]], align 64
1623// CHECK: store <16 x float> [[SHUFFLE6_I]], <16 x float>* [[__B_ADDR_I16_I]], align 64
1624// CHECK: [[TMP21:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I15_I]], align 64
1625// CHECK: [[TMP22:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I16_I]], align 64
1626// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I14_I]], align 64
1627// CHECK: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I14_I]], align 64
1628// CHECK: [[TMP24:%.*]] = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x float> [[TMP23]], i16 -1, i32 4) #2
1629// CHECK: store <16 x float> [[TMP24]], <16 x float>* [[A_ADDR_I]], align 64
1630// CHECK: [[TMP25:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1631// CHECK: [[TMP26:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1632// CHECK: [[SHUFFLE8_I:%.*]] = shufflevector <16 x float> [[TMP25]], <16 x float> [[TMP26]], <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1633// CHECK: [[TMP27:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1634// CHECK: [[TMP28:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1635// CHECK: [[SHUFFLE9_I:%.*]] = shufflevector <16 x float> [[TMP27]], <16 x float> [[TMP28]], <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1636// CHECK: store <16 x float> [[SHUFFLE8_I]], <16 x float>* [[__A_ADDR_I12_I]], align 64
1637// CHECK: store <16 x float> [[SHUFFLE9_I]], <16 x float>* [[__B_ADDR_I13_I]], align 64
1638// CHECK: [[TMP29:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I12_I]], align 64
1639// CHECK: [[TMP30:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I13_I]], align 64
1640// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
1641// CHECK: [[TMP31:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I11_I]], align 64
1642// CHECK: [[TMP32:%.*]] = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> [[TMP29]], <16 x float> [[TMP30]], <16 x float> [[TMP31]], i16 -1, i32 4) #2
1643// CHECK: store <16 x float> [[TMP32]], <16 x float>* [[A_ADDR_I]], align 64
1644// CHECK: [[TMP33:%.*]] = load <16 x float>, <16 x float>* [[A_ADDR_I]], align 64
1645// CHECK: [[VECEXT_I:%.*]] = extractelement <16 x float> [[TMP33]], i32 0
1646// CHECK: ret float [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001647float test_mm512_reduce_min_ps(__m512 __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001648 return _mm512_reduce_min_ps(__W);
1649}
1650
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00001651// CHECK-LABEL: define i32 @test_mm512_mask_reduce_max_epi32(i16 zeroext %__M, <8 x i64> %__W) #0 {
1652// CHECK: [[_COMPOUNDLITERAL_I_I18_I:%.*]] = alloca <8 x i64>, align 64
1653// CHECK: [[__A_ADDR_I19_I:%.*]] = alloca <8 x i64>, align 64
1654// CHECK: [[__B_ADDR_I20_I:%.*]] = alloca <8 x i64>, align 64
1655// CHECK: [[_COMPOUNDLITERAL_I_I15_I:%.*]] = alloca <8 x i64>, align 64
1656// CHECK: [[__A_ADDR_I16_I:%.*]] = alloca <8 x i64>, align 64
1657// CHECK: [[__B_ADDR_I17_I:%.*]] = alloca <8 x i64>, align 64
1658// CHECK: [[_COMPOUNDLITERAL_I_I12_I:%.*]] = alloca <8 x i64>, align 64
1659// CHECK: [[__A_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
1660// CHECK: [[__B_ADDR_I14_I:%.*]] = alloca <8 x i64>, align 64
1661// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
1662// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
1663// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
1664// CHECK: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4
1665// CHECK: [[_COMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64
1666// CHECK: [[__M_ADDR_I:%.*]] = alloca i16, align 2
1667// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64
1668// CHECK: [[__M_ADDR:%.*]] = alloca i16, align 2
1669// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
1670// CHECK: store i16 %__M, i16* [[__M_ADDR]], align 2
1671// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
1672// CHECK: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2
1673// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
1674// CHECK: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2
1675// CHECK: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64
1676// CHECK: [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2
1677// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1678// CHECK: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP3]] to <16 x i32>
1679// CHECK: store i32 -2147483648, i32* [[__S_ADDR_I_I]], align 4
1680// CHECK: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1681// CHECK: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP5]], i32 0
1682// CHECK: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1683// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP6]], i32 1
1684// CHECK: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1685// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP7]], i32 2
1686// CHECK: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1687// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP8]], i32 3
1688// CHECK: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1689// CHECK: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP9]], i32 4
1690// CHECK: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1691// CHECK: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP10]], i32 5
1692// CHECK: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1693// CHECK: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP11]], i32 6
1694// CHECK: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1695// CHECK: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP12]], i32 7
1696// CHECK: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1697// CHECK: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP13]], i32 8
1698// CHECK: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1699// CHECK: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP14]], i32 9
1700// CHECK: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1701// CHECK: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP15]], i32 10
1702// CHECK: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1703// CHECK: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP16]], i32 11
1704// CHECK: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1705// CHECK: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP17]], i32 12
1706// CHECK: [[TMP18:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1707// CHECK: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP18]], i32 13
1708// CHECK: [[TMP19:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1709// CHECK: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP19]], i32 14
1710// CHECK: [[TMP20:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1711// CHECK: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP20]], i32 15
1712// CHECK: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[_COMPOUNDLITERAL_I_I]], align 64
1713// CHECK: [[TMP21:%.*]] = load <16 x i32>, <16 x i32>* [[_COMPOUNDLITERAL_I_I]], align 64
1714// CHECK: [[TMP22:%.*]] = bitcast <16 x i32> [[TMP21]] to <8 x i64>
1715// CHECK: [[TMP23:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
1716// CHECK: [[TMP24:%.*]] = select <16 x i1> [[TMP23]], <16 x i32> [[TMP4]], <16 x i32> [[TMP21]]
1717// CHECK: [[TMP25:%.*]] = bitcast <16 x i32> [[TMP24]] to <8 x i64>
1718// CHECK: store <8 x i64> [[TMP25]], <8 x i64>* [[__V_ADDR_I]], align 64
1719// CHECK: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1720// CHECK: [[TMP27:%.*]] = bitcast <8 x i64> [[TMP26]] to <16 x i32>
1721// CHECK: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1722// CHECK: [[TMP29:%.*]] = bitcast <8 x i64> [[TMP28]] to <16 x i32>
1723// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i32> [[TMP27]], <16 x i32> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1724// CHECK: [[TMP30:%.*]] = bitcast <16 x i32> [[SHUFFLE_I]] to <8 x i64>
1725// CHECK: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1726// CHECK: [[TMP32:%.*]] = bitcast <8 x i64> [[TMP31]] to <16 x i32>
1727// CHECK: [[TMP33:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1728// CHECK: [[TMP34:%.*]] = bitcast <8 x i64> [[TMP33]] to <16 x i32>
1729// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <16 x i32> [[TMP32]], <16 x i32> [[TMP34]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1730// CHECK: [[TMP35:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I]] to <8 x i64>
1731// CHECK: store <8 x i64> [[TMP30]], <8 x i64>* [[__A_ADDR_I19_I]], align 64
1732// CHECK: store <8 x i64> [[TMP35]], <8 x i64>* [[__B_ADDR_I20_I]], align 64
1733// CHECK: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I19_I]], align 64
1734// CHECK: [[TMP37:%.*]] = bitcast <8 x i64> [[TMP36]] to <16 x i32>
1735// CHECK: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I20_I]], align 64
1736// CHECK: [[TMP39:%.*]] = bitcast <8 x i64> [[TMP38]] to <16 x i32>
1737// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I18_I]], align 64
1738// CHECK: [[TMP40:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I18_I]], align 64
1739// CHECK: [[TMP41:%.*]] = bitcast <8 x i64> [[TMP40]] to <16 x i32>
1740// CHECK: [[TMP42:%.*]] = icmp sgt <16 x i32> [[TMP37]], [[TMP39]]
1741// CHECK: [[TMP43:%.*]] = select <16 x i1> [[TMP42]], <16 x i32> [[TMP37]], <16 x i32> [[TMP39]]
1742// CHECK: [[TMP44:%.*]] = bitcast <16 x i32> [[TMP43]] to <8 x i64>
1743// CHECK: store <8 x i64> [[TMP44]], <8 x i64>* [[__V_ADDR_I]], align 64
1744// CHECK: [[TMP45:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1745// CHECK: [[TMP46:%.*]] = bitcast <8 x i64> [[TMP45]] to <16 x i32>
1746// CHECK: [[TMP47:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1747// CHECK: [[TMP48:%.*]] = bitcast <8 x i64> [[TMP47]] to <16 x i32>
1748// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <16 x i32> [[TMP46]], <16 x i32> [[TMP48]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1749// CHECK: [[TMP49:%.*]] = bitcast <16 x i32> [[SHUFFLE3_I]] to <8 x i64>
1750// CHECK: [[TMP50:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1751// CHECK: [[TMP51:%.*]] = bitcast <8 x i64> [[TMP50]] to <16 x i32>
1752// CHECK: [[TMP52:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1753// CHECK: [[TMP53:%.*]] = bitcast <8 x i64> [[TMP52]] to <16 x i32>
1754// CHECK: [[SHUFFLE4_I:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP53]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1755// CHECK: [[TMP54:%.*]] = bitcast <16 x i32> [[SHUFFLE4_I]] to <8 x i64>
1756// CHECK: store <8 x i64> [[TMP49]], <8 x i64>* [[__A_ADDR_I16_I]], align 64
1757// CHECK: store <8 x i64> [[TMP54]], <8 x i64>* [[__B_ADDR_I17_I]], align 64
1758// CHECK: [[TMP55:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I16_I]], align 64
1759// CHECK: [[TMP56:%.*]] = bitcast <8 x i64> [[TMP55]] to <16 x i32>
1760// CHECK: [[TMP57:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I17_I]], align 64
1761// CHECK: [[TMP58:%.*]] = bitcast <8 x i64> [[TMP57]] to <16 x i32>
1762// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I15_I]], align 64
1763// CHECK: [[TMP59:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I15_I]], align 64
1764// CHECK: [[TMP60:%.*]] = bitcast <8 x i64> [[TMP59]] to <16 x i32>
1765// CHECK: [[TMP61:%.*]] = icmp sgt <16 x i32> [[TMP56]], [[TMP58]]
1766// CHECK: [[TMP62:%.*]] = select <16 x i1> [[TMP61]], <16 x i32> [[TMP56]], <16 x i32> [[TMP58]]
1767// CHECK: [[TMP63:%.*]] = bitcast <16 x i32> [[TMP62]] to <8 x i64>
1768// CHECK: store <8 x i64> [[TMP63]], <8 x i64>* [[__V_ADDR_I]], align 64
1769// CHECK: [[TMP64:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1770// CHECK: [[TMP65:%.*]] = bitcast <8 x i64> [[TMP64]] to <16 x i32>
1771// CHECK: [[TMP66:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1772// CHECK: [[TMP67:%.*]] = bitcast <8 x i64> [[TMP66]] to <16 x i32>
1773// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <16 x i32> [[TMP65]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1774// CHECK: [[TMP68:%.*]] = bitcast <16 x i32> [[SHUFFLE6_I]] to <8 x i64>
1775// CHECK: [[TMP69:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1776// CHECK: [[TMP70:%.*]] = bitcast <8 x i64> [[TMP69]] to <16 x i32>
1777// CHECK: [[TMP71:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1778// CHECK: [[TMP72:%.*]] = bitcast <8 x i64> [[TMP71]] to <16 x i32>
1779// CHECK: [[SHUFFLE7_I:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP72]], <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1780// CHECK: [[TMP73:%.*]] = bitcast <16 x i32> [[SHUFFLE7_I]] to <8 x i64>
1781// CHECK: store <8 x i64> [[TMP68]], <8 x i64>* [[__A_ADDR_I13_I]], align 64
1782// CHECK: store <8 x i64> [[TMP73]], <8 x i64>* [[__B_ADDR_I14_I]], align 64
1783// CHECK: [[TMP74:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I13_I]], align 64
1784// CHECK: [[TMP75:%.*]] = bitcast <8 x i64> [[TMP74]] to <16 x i32>
1785// CHECK: [[TMP76:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I14_I]], align 64
1786// CHECK: [[TMP77:%.*]] = bitcast <8 x i64> [[TMP76]] to <16 x i32>
1787// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
1788// CHECK: [[TMP78:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
1789// CHECK: [[TMP79:%.*]] = bitcast <8 x i64> [[TMP78]] to <16 x i32>
1790// CHECK: [[TMP80:%.*]] = icmp sgt <16 x i32> [[TMP75]], [[TMP77]]
1791// CHECK: [[TMP81:%.*]] = select <16 x i1> [[TMP80]], <16 x i32> [[TMP75]], <16 x i32> [[TMP77]]
1792// CHECK: [[TMP82:%.*]] = bitcast <16 x i32> [[TMP81]] to <8 x i64>
1793// CHECK: store <8 x i64> [[TMP82]], <8 x i64>* [[__V_ADDR_I]], align 64
1794// CHECK: [[TMP83:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1795// CHECK: [[TMP84:%.*]] = bitcast <8 x i64> [[TMP83]] to <16 x i32>
1796// CHECK: [[TMP85:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1797// CHECK: [[TMP86:%.*]] = bitcast <8 x i64> [[TMP85]] to <16 x i32>
1798// CHECK: [[SHUFFLE9_I:%.*]] = shufflevector <16 x i32> [[TMP84]], <16 x i32> [[TMP86]], <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1799// CHECK: [[TMP87:%.*]] = bitcast <16 x i32> [[SHUFFLE9_I]] to <8 x i64>
1800// CHECK: [[TMP88:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1801// CHECK: [[TMP89:%.*]] = bitcast <8 x i64> [[TMP88]] to <16 x i32>
1802// CHECK: [[TMP90:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1803// CHECK: [[TMP91:%.*]] = bitcast <8 x i64> [[TMP90]] to <16 x i32>
1804// CHECK: [[SHUFFLE10_I:%.*]] = shufflevector <16 x i32> [[TMP89]], <16 x i32> [[TMP91]], <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1805// CHECK: [[TMP92:%.*]] = bitcast <16 x i32> [[SHUFFLE10_I]] to <8 x i64>
1806// CHECK: store <8 x i64> [[TMP87]], <8 x i64>* [[__A_ADDR_I_I]], align 64
1807// CHECK: store <8 x i64> [[TMP92]], <8 x i64>* [[__B_ADDR_I_I]], align 64
1808// CHECK: [[TMP93:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
1809// CHECK: [[TMP94:%.*]] = bitcast <8 x i64> [[TMP93]] to <16 x i32>
1810// CHECK: [[TMP95:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
1811// CHECK: [[TMP96:%.*]] = bitcast <8 x i64> [[TMP95]] to <16 x i32>
1812// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1813// CHECK: [[TMP97:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1814// CHECK: [[TMP98:%.*]] = bitcast <8 x i64> [[TMP97]] to <16 x i32>
1815// CHECK: [[TMP99:%.*]] = icmp sgt <16 x i32> [[TMP94]], [[TMP96]]
1816// CHECK: [[TMP100:%.*]] = select <16 x i1> [[TMP99]], <16 x i32> [[TMP94]], <16 x i32> [[TMP96]]
1817// CHECK: [[TMP101:%.*]] = bitcast <16 x i32> [[TMP100]] to <8 x i64>
1818// CHECK: store <8 x i64> [[TMP101]], <8 x i64>* [[__V_ADDR_I]], align 64
1819// CHECK: [[TMP102:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1820// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP102]], i32 0
1821// CHECK: [[CONV_I:%.*]] = trunc i64 [[VECEXT_I]] to i32
1822// CHECK: ret i32 [[CONV_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001823int test_mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001824 return _mm512_mask_reduce_max_epi32(__M, __W);
1825}
1826
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00001827// CHECK-LABEL: define i32 @test_mm512_mask_reduce_max_epu32(i16 zeroext %__M, <8 x i64> %__W) #0 {
1828// CHECK: [[_COMPOUNDLITERAL_I_I18_I:%.*]] = alloca <8 x i64>, align 64
1829// CHECK: [[__A_ADDR_I19_I:%.*]] = alloca <8 x i64>, align 64
1830// CHECK: [[__B_ADDR_I20_I:%.*]] = alloca <8 x i64>, align 64
1831// CHECK: [[_COMPOUNDLITERAL_I_I15_I:%.*]] = alloca <8 x i64>, align 64
1832// CHECK: [[__A_ADDR_I16_I:%.*]] = alloca <8 x i64>, align 64
1833// CHECK: [[__B_ADDR_I17_I:%.*]] = alloca <8 x i64>, align 64
1834// CHECK: [[_COMPOUNDLITERAL_I_I12_I:%.*]] = alloca <8 x i64>, align 64
1835// CHECK: [[__A_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
1836// CHECK: [[__B_ADDR_I14_I:%.*]] = alloca <8 x i64>, align 64
1837// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
1838// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
1839// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
1840// CHECK: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4
1841// CHECK: [[_COMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64
1842// CHECK: [[__M_ADDR_I:%.*]] = alloca i16, align 2
1843// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64
1844// CHECK: [[__M_ADDR:%.*]] = alloca i16, align 2
1845// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
1846// CHECK: store i16 %__M, i16* [[__M_ADDR]], align 2
1847// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
1848// CHECK: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2
1849// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
1850// CHECK: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2
1851// CHECK: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64
1852// CHECK: [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2
1853// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1854// CHECK: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP3]] to <16 x i32>
1855// CHECK: store i32 0, i32* [[__S_ADDR_I_I]], align 4
1856// CHECK: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1857// CHECK: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP5]], i32 0
1858// CHECK: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1859// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP6]], i32 1
1860// CHECK: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1861// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP7]], i32 2
1862// CHECK: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1863// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP8]], i32 3
1864// CHECK: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1865// CHECK: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP9]], i32 4
1866// CHECK: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1867// CHECK: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP10]], i32 5
1868// CHECK: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1869// CHECK: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP11]], i32 6
1870// CHECK: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1871// CHECK: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP12]], i32 7
1872// CHECK: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1873// CHECK: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP13]], i32 8
1874// CHECK: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1875// CHECK: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP14]], i32 9
1876// CHECK: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1877// CHECK: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP15]], i32 10
1878// CHECK: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1879// CHECK: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP16]], i32 11
1880// CHECK: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1881// CHECK: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP17]], i32 12
1882// CHECK: [[TMP18:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1883// CHECK: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP18]], i32 13
1884// CHECK: [[TMP19:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1885// CHECK: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP19]], i32 14
1886// CHECK: [[TMP20:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
1887// CHECK: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP20]], i32 15
1888// CHECK: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[_COMPOUNDLITERAL_I_I]], align 64
1889// CHECK: [[TMP21:%.*]] = load <16 x i32>, <16 x i32>* [[_COMPOUNDLITERAL_I_I]], align 64
1890// CHECK: [[TMP22:%.*]] = bitcast <16 x i32> [[TMP21]] to <8 x i64>
1891// CHECK: [[TMP23:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
1892// CHECK: [[TMP24:%.*]] = select <16 x i1> [[TMP23]], <16 x i32> [[TMP4]], <16 x i32> [[TMP21]]
1893// CHECK: [[TMP25:%.*]] = bitcast <16 x i32> [[TMP24]] to <8 x i64>
1894// CHECK: store <8 x i64> [[TMP25]], <8 x i64>* [[__V_ADDR_I]], align 64
1895// CHECK: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1896// CHECK: [[TMP27:%.*]] = bitcast <8 x i64> [[TMP26]] to <16 x i32>
1897// CHECK: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1898// CHECK: [[TMP29:%.*]] = bitcast <8 x i64> [[TMP28]] to <16 x i32>
1899// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i32> [[TMP27]], <16 x i32> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1900// CHECK: [[TMP30:%.*]] = bitcast <16 x i32> [[SHUFFLE_I]] to <8 x i64>
1901// CHECK: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1902// CHECK: [[TMP32:%.*]] = bitcast <8 x i64> [[TMP31]] to <16 x i32>
1903// CHECK: [[TMP33:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1904// CHECK: [[TMP34:%.*]] = bitcast <8 x i64> [[TMP33]] to <16 x i32>
1905// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <16 x i32> [[TMP32]], <16 x i32> [[TMP34]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1906// CHECK: [[TMP35:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I]] to <8 x i64>
1907// CHECK: store <8 x i64> [[TMP30]], <8 x i64>* [[__A_ADDR_I19_I]], align 64
1908// CHECK: store <8 x i64> [[TMP35]], <8 x i64>* [[__B_ADDR_I20_I]], align 64
1909// CHECK: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I19_I]], align 64
1910// CHECK: [[TMP37:%.*]] = bitcast <8 x i64> [[TMP36]] to <16 x i32>
1911// CHECK: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I20_I]], align 64
1912// CHECK: [[TMP39:%.*]] = bitcast <8 x i64> [[TMP38]] to <16 x i32>
1913// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I18_I]], align 64
1914// CHECK: [[TMP40:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I18_I]], align 64
1915// CHECK: [[TMP41:%.*]] = bitcast <8 x i64> [[TMP40]] to <16 x i32>
1916// CHECK: [[TMP42:%.*]] = icmp ugt <16 x i32> [[TMP37]], [[TMP39]]
1917// CHECK: [[TMP43:%.*]] = select <16 x i1> [[TMP42]], <16 x i32> [[TMP37]], <16 x i32> [[TMP39]]
1918// CHECK: [[TMP44:%.*]] = bitcast <16 x i32> [[TMP43]] to <8 x i64>
1919// CHECK: store <8 x i64> [[TMP44]], <8 x i64>* [[__V_ADDR_I]], align 64
1920// CHECK: [[TMP45:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1921// CHECK: [[TMP46:%.*]] = bitcast <8 x i64> [[TMP45]] to <16 x i32>
1922// CHECK: [[TMP47:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1923// CHECK: [[TMP48:%.*]] = bitcast <8 x i64> [[TMP47]] to <16 x i32>
1924// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <16 x i32> [[TMP46]], <16 x i32> [[TMP48]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1925// CHECK: [[TMP49:%.*]] = bitcast <16 x i32> [[SHUFFLE3_I]] to <8 x i64>
1926// CHECK: [[TMP50:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1927// CHECK: [[TMP51:%.*]] = bitcast <8 x i64> [[TMP50]] to <16 x i32>
1928// CHECK: [[TMP52:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1929// CHECK: [[TMP53:%.*]] = bitcast <8 x i64> [[TMP52]] to <16 x i32>
1930// CHECK: [[SHUFFLE4_I:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP53]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1931// CHECK: [[TMP54:%.*]] = bitcast <16 x i32> [[SHUFFLE4_I]] to <8 x i64>
1932// CHECK: store <8 x i64> [[TMP49]], <8 x i64>* [[__A_ADDR_I16_I]], align 64
1933// CHECK: store <8 x i64> [[TMP54]], <8 x i64>* [[__B_ADDR_I17_I]], align 64
1934// CHECK: [[TMP55:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I16_I]], align 64
1935// CHECK: [[TMP56:%.*]] = bitcast <8 x i64> [[TMP55]] to <16 x i32>
1936// CHECK: [[TMP57:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I17_I]], align 64
1937// CHECK: [[TMP58:%.*]] = bitcast <8 x i64> [[TMP57]] to <16 x i32>
1938// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I15_I]], align 64
1939// CHECK: [[TMP59:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I15_I]], align 64
1940// CHECK: [[TMP60:%.*]] = bitcast <8 x i64> [[TMP59]] to <16 x i32>
1941// CHECK: [[TMP61:%.*]] = icmp ugt <16 x i32> [[TMP56]], [[TMP58]]
1942// CHECK: [[TMP62:%.*]] = select <16 x i1> [[TMP61]], <16 x i32> [[TMP56]], <16 x i32> [[TMP58]]
1943// CHECK: [[TMP63:%.*]] = bitcast <16 x i32> [[TMP62]] to <8 x i64>
1944// CHECK: store <8 x i64> [[TMP63]], <8 x i64>* [[__V_ADDR_I]], align 64
1945// CHECK: [[TMP64:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1946// CHECK: [[TMP65:%.*]] = bitcast <8 x i64> [[TMP64]] to <16 x i32>
1947// CHECK: [[TMP66:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1948// CHECK: [[TMP67:%.*]] = bitcast <8 x i64> [[TMP66]] to <16 x i32>
1949// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <16 x i32> [[TMP65]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1950// CHECK: [[TMP68:%.*]] = bitcast <16 x i32> [[SHUFFLE6_I]] to <8 x i64>
1951// CHECK: [[TMP69:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1952// CHECK: [[TMP70:%.*]] = bitcast <8 x i64> [[TMP69]] to <16 x i32>
1953// CHECK: [[TMP71:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1954// CHECK: [[TMP72:%.*]] = bitcast <8 x i64> [[TMP71]] to <16 x i32>
1955// CHECK: [[SHUFFLE7_I:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP72]], <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1956// CHECK: [[TMP73:%.*]] = bitcast <16 x i32> [[SHUFFLE7_I]] to <8 x i64>
1957// CHECK: store <8 x i64> [[TMP68]], <8 x i64>* [[__A_ADDR_I13_I]], align 64
1958// CHECK: store <8 x i64> [[TMP73]], <8 x i64>* [[__B_ADDR_I14_I]], align 64
1959// CHECK: [[TMP74:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I13_I]], align 64
1960// CHECK: [[TMP75:%.*]] = bitcast <8 x i64> [[TMP74]] to <16 x i32>
1961// CHECK: [[TMP76:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I14_I]], align 64
1962// CHECK: [[TMP77:%.*]] = bitcast <8 x i64> [[TMP76]] to <16 x i32>
1963// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
1964// CHECK: [[TMP78:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
1965// CHECK: [[TMP79:%.*]] = bitcast <8 x i64> [[TMP78]] to <16 x i32>
1966// CHECK: [[TMP80:%.*]] = icmp ugt <16 x i32> [[TMP75]], [[TMP77]]
1967// CHECK: [[TMP81:%.*]] = select <16 x i1> [[TMP80]], <16 x i32> [[TMP75]], <16 x i32> [[TMP77]]
1968// CHECK: [[TMP82:%.*]] = bitcast <16 x i32> [[TMP81]] to <8 x i64>
1969// CHECK: store <8 x i64> [[TMP82]], <8 x i64>* [[__V_ADDR_I]], align 64
1970// CHECK: [[TMP83:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1971// CHECK: [[TMP84:%.*]] = bitcast <8 x i64> [[TMP83]] to <16 x i32>
1972// CHECK: [[TMP85:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1973// CHECK: [[TMP86:%.*]] = bitcast <8 x i64> [[TMP85]] to <16 x i32>
1974// CHECK: [[SHUFFLE9_I:%.*]] = shufflevector <16 x i32> [[TMP84]], <16 x i32> [[TMP86]], <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1975// CHECK: [[TMP87:%.*]] = bitcast <16 x i32> [[SHUFFLE9_I]] to <8 x i64>
1976// CHECK: [[TMP88:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1977// CHECK: [[TMP89:%.*]] = bitcast <8 x i64> [[TMP88]] to <16 x i32>
1978// CHECK: [[TMP90:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1979// CHECK: [[TMP91:%.*]] = bitcast <8 x i64> [[TMP90]] to <16 x i32>
1980// CHECK: [[SHUFFLE10_I:%.*]] = shufflevector <16 x i32> [[TMP89]], <16 x i32> [[TMP91]], <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1981// CHECK: [[TMP92:%.*]] = bitcast <16 x i32> [[SHUFFLE10_I]] to <8 x i64>
1982// CHECK: store <8 x i64> [[TMP87]], <8 x i64>* [[__A_ADDR_I_I]], align 64
1983// CHECK: store <8 x i64> [[TMP92]], <8 x i64>* [[__B_ADDR_I_I]], align 64
1984// CHECK: [[TMP93:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
1985// CHECK: [[TMP94:%.*]] = bitcast <8 x i64> [[TMP93]] to <16 x i32>
1986// CHECK: [[TMP95:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
1987// CHECK: [[TMP96:%.*]] = bitcast <8 x i64> [[TMP95]] to <16 x i32>
1988// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1989// CHECK: [[TMP97:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
1990// CHECK: [[TMP98:%.*]] = bitcast <8 x i64> [[TMP97]] to <16 x i32>
1991// CHECK: [[TMP99:%.*]] = icmp ugt <16 x i32> [[TMP94]], [[TMP96]]
1992// CHECK: [[TMP100:%.*]] = select <16 x i1> [[TMP99]], <16 x i32> [[TMP94]], <16 x i32> [[TMP96]]
1993// CHECK: [[TMP101:%.*]] = bitcast <16 x i32> [[TMP100]] to <8 x i64>
1994// CHECK: store <8 x i64> [[TMP101]], <8 x i64>* [[__V_ADDR_I]], align 64
1995// CHECK: [[TMP102:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
1996// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP102]], i32 0
1997// CHECK: [[CONV_I:%.*]] = trunc i64 [[VECEXT_I]] to i32
1998// CHECK: ret i32 [[CONV_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00001999unsigned int test_mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +00002000 return _mm512_mask_reduce_max_epu32(__M, __W);
2001}
2002
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00002003// CHECK-LABEL: define float @test_mm512_mask_reduce_max_ps(i16 zeroext %__M, <16 x float> %__W) #0 {
2004// CHECK: [[_COMPOUNDLITERAL_I_I18_I:%.*]] = alloca <16 x float>, align 64
2005// CHECK: [[__A_ADDR_I19_I:%.*]] = alloca <16 x float>, align 64
2006// CHECK: [[__B_ADDR_I20_I:%.*]] = alloca <16 x float>, align 64
2007// CHECK: [[_COMPOUNDLITERAL_I_I15_I:%.*]] = alloca <16 x float>, align 64
2008// CHECK: [[__A_ADDR_I16_I:%.*]] = alloca <16 x float>, align 64
2009// CHECK: [[__B_ADDR_I17_I:%.*]] = alloca <16 x float>, align 64
2010// CHECK: [[_COMPOUNDLITERAL_I_I12_I:%.*]] = alloca <16 x float>, align 64
2011// CHECK: [[__A_ADDR_I13_I:%.*]] = alloca <16 x float>, align 64
2012// CHECK: [[__B_ADDR_I14_I:%.*]] = alloca <16 x float>, align 64
2013// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <16 x float>, align 64
2014// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64
2015// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <16 x float>, align 64
2016// CHECK: [[__W_ADDR_I_I:%.*]] = alloca float, align 4
2017// CHECK: [[_COMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64
2018// CHECK: [[__M_ADDR_I:%.*]] = alloca i16, align 2
2019// CHECK: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64
2020// CHECK: [[__M_ADDR:%.*]] = alloca i16, align 2
2021// CHECK: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64
2022// CHECK: store i16 %__M, i16* [[__M_ADDR]], align 2
2023// CHECK: store <16 x float> %__W, <16 x float>* [[__W_ADDR]], align 64
2024// CHECK: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2
2025// CHECK: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64
2026// CHECK: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2
2027// CHECK: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64
2028// CHECK: [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2
2029// CHECK: [[TMP3:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2030// CHECK: store float 0x7FF0000000000000, float* [[__W_ADDR_I_I]], align 4
2031// CHECK: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2032// CHECK: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP4]], i32 0
2033// CHECK: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2034// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <16 x float> [[VECINIT_I_I]], float [[TMP5]], i32 1
2035// CHECK: [[TMP6:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2036// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <16 x float> [[VECINIT1_I_I]], float [[TMP6]], i32 2
2037// CHECK: [[TMP7:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2038// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <16 x float> [[VECINIT2_I_I]], float [[TMP7]], i32 3
2039// CHECK: [[TMP8:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2040// CHECK: [[VECINIT4_I_I:%.*]] = insertelement <16 x float> [[VECINIT3_I_I]], float [[TMP8]], i32 4
2041// CHECK: [[TMP9:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2042// CHECK: [[VECINIT5_I_I:%.*]] = insertelement <16 x float> [[VECINIT4_I_I]], float [[TMP9]], i32 5
2043// CHECK: [[TMP10:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2044// CHECK: [[VECINIT6_I_I:%.*]] = insertelement <16 x float> [[VECINIT5_I_I]], float [[TMP10]], i32 6
2045// CHECK: [[TMP11:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2046// CHECK: [[VECINIT7_I_I:%.*]] = insertelement <16 x float> [[VECINIT6_I_I]], float [[TMP11]], i32 7
2047// CHECK: [[TMP12:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2048// CHECK: [[VECINIT8_I_I:%.*]] = insertelement <16 x float> [[VECINIT7_I_I]], float [[TMP12]], i32 8
2049// CHECK: [[TMP13:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2050// CHECK: [[VECINIT9_I_I:%.*]] = insertelement <16 x float> [[VECINIT8_I_I]], float [[TMP13]], i32 9
2051// CHECK: [[TMP14:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2052// CHECK: [[VECINIT10_I_I:%.*]] = insertelement <16 x float> [[VECINIT9_I_I]], float [[TMP14]], i32 10
2053// CHECK: [[TMP15:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2054// CHECK: [[VECINIT11_I_I:%.*]] = insertelement <16 x float> [[VECINIT10_I_I]], float [[TMP15]], i32 11
2055// CHECK: [[TMP16:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2056// CHECK: [[VECINIT12_I_I:%.*]] = insertelement <16 x float> [[VECINIT11_I_I]], float [[TMP16]], i32 12
2057// CHECK: [[TMP17:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2058// CHECK: [[VECINIT13_I_I:%.*]] = insertelement <16 x float> [[VECINIT12_I_I]], float [[TMP17]], i32 13
2059// CHECK: [[TMP18:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2060// CHECK: [[VECINIT14_I_I:%.*]] = insertelement <16 x float> [[VECINIT13_I_I]], float [[TMP18]], i32 14
2061// CHECK: [[TMP19:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2062// CHECK: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP19]], i32 15
2063// CHECK: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[_COMPOUNDLITERAL_I_I]], align 64
2064// CHECK: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I]], align 64
2065// CHECK: [[SUB_I:%.*]] = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[TMP20]]
2066// CHECK: [[TMP21:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
2067// CHECK: [[TMP22:%.*]] = select <16 x i1> [[TMP21]], <16 x float> [[TMP3]], <16 x float> [[SUB_I]]
2068// CHECK: store <16 x float> [[TMP22]], <16 x float>* [[__V_ADDR_I]], align 64
2069// CHECK: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2070// CHECK: [[TMP24:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2071// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> [[TMP24]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2072// CHECK: [[TMP25:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2073// CHECK: [[TMP26:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2074// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <16 x float> [[TMP25]], <16 x float> [[TMP26]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2075// CHECK: store <16 x float> [[SHUFFLE_I]], <16 x float>* [[__A_ADDR_I19_I]], align 64
2076// CHECK: store <16 x float> [[SHUFFLE1_I]], <16 x float>* [[__B_ADDR_I20_I]], align 64
2077// CHECK: [[TMP27:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I19_I]], align 64
2078// CHECK: [[TMP28:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I20_I]], align 64
2079// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I18_I]], align 64
2080// CHECK: [[TMP29:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I18_I]], align 64
2081// CHECK: [[TMP30:%.*]] = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> [[TMP27]], <16 x float> [[TMP28]], <16 x float> [[TMP29]], i16 -1, i32 4) #2
2082// CHECK: store <16 x float> [[TMP30]], <16 x float>* [[__V_ADDR_I]], align 64
2083// CHECK: [[TMP31:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2084// CHECK: [[TMP32:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2085// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <16 x float> [[TMP31]], <16 x float> [[TMP32]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2086// CHECK: [[TMP33:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2087// CHECK: [[TMP34:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2088// CHECK: [[SHUFFLE4_I:%.*]] = shufflevector <16 x float> [[TMP33]], <16 x float> [[TMP34]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2089// CHECK: store <16 x float> [[SHUFFLE3_I]], <16 x float>* [[__A_ADDR_I16_I]], align 64
2090// CHECK: store <16 x float> [[SHUFFLE4_I]], <16 x float>* [[__B_ADDR_I17_I]], align 64
2091// CHECK: [[TMP35:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I16_I]], align 64
2092// CHECK: [[TMP36:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I17_I]], align 64
2093// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I15_I]], align 64
2094// CHECK: [[TMP37:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I15_I]], align 64
2095// CHECK: [[TMP38:%.*]] = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> [[TMP35]], <16 x float> [[TMP36]], <16 x float> [[TMP37]], i16 -1, i32 4) #2
2096// CHECK: store <16 x float> [[TMP38]], <16 x float>* [[__V_ADDR_I]], align 64
2097// CHECK: [[TMP39:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2098// CHECK: [[TMP40:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2099// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP40]], <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2100// CHECK: [[TMP41:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2101// CHECK: [[TMP42:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2102// CHECK: [[SHUFFLE7_I:%.*]] = shufflevector <16 x float> [[TMP41]], <16 x float> [[TMP42]], <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2103// CHECK: store <16 x float> [[SHUFFLE6_I]], <16 x float>* [[__A_ADDR_I13_I]], align 64
2104// CHECK: store <16 x float> [[SHUFFLE7_I]], <16 x float>* [[__B_ADDR_I14_I]], align 64
2105// CHECK: [[TMP43:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I13_I]], align 64
2106// CHECK: [[TMP44:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I14_I]], align 64
2107// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
2108// CHECK: [[TMP45:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
2109// CHECK: [[TMP46:%.*]] = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> [[TMP43]], <16 x float> [[TMP44]], <16 x float> [[TMP45]], i16 -1, i32 4) #2
2110// CHECK: store <16 x float> [[TMP46]], <16 x float>* [[__V_ADDR_I]], align 64
2111// CHECK: [[TMP47:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2112// CHECK: [[TMP48:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2113// CHECK: [[SHUFFLE9_I:%.*]] = shufflevector <16 x float> [[TMP47]], <16 x float> [[TMP48]], <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2114// CHECK: [[TMP49:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2115// CHECK: [[TMP50:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2116// CHECK: [[SHUFFLE10_I:%.*]] = shufflevector <16 x float> [[TMP49]], <16 x float> [[TMP50]], <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2117// CHECK: store <16 x float> [[SHUFFLE9_I]], <16 x float>* [[__A_ADDR_I_I]], align 64
2118// CHECK: store <16 x float> [[SHUFFLE10_I]], <16 x float>* [[__B_ADDR_I_I]], align 64
2119// CHECK: [[TMP51:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64
2120// CHECK: [[TMP52:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I_I]], align 64
2121// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I_I]], align 64
2122// CHECK: [[TMP53:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I_I]], align 64
2123// CHECK: [[TMP54:%.*]] = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> [[TMP51]], <16 x float> [[TMP52]], <16 x float> [[TMP53]], i16 -1, i32 4) #2
2124// CHECK: store <16 x float> [[TMP54]], <16 x float>* [[__V_ADDR_I]], align 64
2125// CHECK: [[TMP55:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2126// CHECK: [[VECEXT_I:%.*]] = extractelement <16 x float> [[TMP55]], i32 0
2127// CHECK: ret float [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00002128float test_mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +00002129 return _mm512_mask_reduce_max_ps(__M, __W);
2130}
2131
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00002132// CHECK-LABEL: define i32 @test_mm512_mask_reduce_min_epi32(i16 zeroext %__M, <8 x i64> %__W) #0 {
2133// CHECK: [[_COMPOUNDLITERAL_I_I18_I:%.*]] = alloca <8 x i64>, align 64
2134// CHECK: [[__A_ADDR_I19_I:%.*]] = alloca <8 x i64>, align 64
2135// CHECK: [[__B_ADDR_I20_I:%.*]] = alloca <8 x i64>, align 64
2136// CHECK: [[_COMPOUNDLITERAL_I_I15_I:%.*]] = alloca <8 x i64>, align 64
2137// CHECK: [[__A_ADDR_I16_I:%.*]] = alloca <8 x i64>, align 64
2138// CHECK: [[__B_ADDR_I17_I:%.*]] = alloca <8 x i64>, align 64
2139// CHECK: [[_COMPOUNDLITERAL_I_I12_I:%.*]] = alloca <8 x i64>, align 64
2140// CHECK: [[__A_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
2141// CHECK: [[__B_ADDR_I14_I:%.*]] = alloca <8 x i64>, align 64
2142// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
2143// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
2144// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
2145// CHECK: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4
2146// CHECK: [[_COMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64
2147// CHECK: [[__M_ADDR_I:%.*]] = alloca i16, align 2
2148// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64
2149// CHECK: [[__M_ADDR:%.*]] = alloca i16, align 2
2150// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
2151// CHECK: store i16 %__M, i16* [[__M_ADDR]], align 2
2152// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
2153// CHECK: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2
2154// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
2155// CHECK: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2
2156// CHECK: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64
2157// CHECK: [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2
2158// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2159// CHECK: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP3]] to <16 x i32>
2160// CHECK: store i32 2147483647, i32* [[__S_ADDR_I_I]], align 4
2161// CHECK: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2162// CHECK: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP5]], i32 0
2163// CHECK: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2164// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP6]], i32 1
2165// CHECK: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2166// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP7]], i32 2
2167// CHECK: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2168// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP8]], i32 3
2169// CHECK: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2170// CHECK: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP9]], i32 4
2171// CHECK: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2172// CHECK: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP10]], i32 5
2173// CHECK: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2174// CHECK: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP11]], i32 6
2175// CHECK: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2176// CHECK: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP12]], i32 7
2177// CHECK: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2178// CHECK: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP13]], i32 8
2179// CHECK: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2180// CHECK: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP14]], i32 9
2181// CHECK: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2182// CHECK: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP15]], i32 10
2183// CHECK: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2184// CHECK: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP16]], i32 11
2185// CHECK: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2186// CHECK: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP17]], i32 12
2187// CHECK: [[TMP18:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2188// CHECK: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP18]], i32 13
2189// CHECK: [[TMP19:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2190// CHECK: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP19]], i32 14
2191// CHECK: [[TMP20:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2192// CHECK: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP20]], i32 15
2193// CHECK: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[_COMPOUNDLITERAL_I_I]], align 64
2194// CHECK: [[TMP21:%.*]] = load <16 x i32>, <16 x i32>* [[_COMPOUNDLITERAL_I_I]], align 64
2195// CHECK: [[TMP22:%.*]] = bitcast <16 x i32> [[TMP21]] to <8 x i64>
2196// CHECK: [[TMP23:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
2197// CHECK: [[TMP24:%.*]] = select <16 x i1> [[TMP23]], <16 x i32> [[TMP4]], <16 x i32> [[TMP21]]
2198// CHECK: [[TMP25:%.*]] = bitcast <16 x i32> [[TMP24]] to <8 x i64>
2199// CHECK: store <8 x i64> [[TMP25]], <8 x i64>* [[__V_ADDR_I]], align 64
2200// CHECK: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2201// CHECK: [[TMP27:%.*]] = bitcast <8 x i64> [[TMP26]] to <16 x i32>
2202// CHECK: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2203// CHECK: [[TMP29:%.*]] = bitcast <8 x i64> [[TMP28]] to <16 x i32>
2204// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i32> [[TMP27]], <16 x i32> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2205// CHECK: [[TMP30:%.*]] = bitcast <16 x i32> [[SHUFFLE_I]] to <8 x i64>
2206// CHECK: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2207// CHECK: [[TMP32:%.*]] = bitcast <8 x i64> [[TMP31]] to <16 x i32>
2208// CHECK: [[TMP33:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2209// CHECK: [[TMP34:%.*]] = bitcast <8 x i64> [[TMP33]] to <16 x i32>
2210// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <16 x i32> [[TMP32]], <16 x i32> [[TMP34]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2211// CHECK: [[TMP35:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I]] to <8 x i64>
2212// CHECK: store <8 x i64> [[TMP30]], <8 x i64>* [[__A_ADDR_I19_I]], align 64
2213// CHECK: store <8 x i64> [[TMP35]], <8 x i64>* [[__B_ADDR_I20_I]], align 64
2214// CHECK: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I19_I]], align 64
2215// CHECK: [[TMP37:%.*]] = bitcast <8 x i64> [[TMP36]] to <16 x i32>
2216// CHECK: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I20_I]], align 64
2217// CHECK: [[TMP39:%.*]] = bitcast <8 x i64> [[TMP38]] to <16 x i32>
2218// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I18_I]], align 64
2219// CHECK: [[TMP40:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I18_I]], align 64
2220// CHECK: [[TMP41:%.*]] = bitcast <8 x i64> [[TMP40]] to <16 x i32>
2221// CHECK: [[TMP42:%.*]] = icmp slt <16 x i32> [[TMP37]], [[TMP39]]
2222// CHECK: [[TMP43:%.*]] = select <16 x i1> [[TMP42]], <16 x i32> [[TMP37]], <16 x i32> [[TMP39]]
2223// CHECK: [[TMP44:%.*]] = bitcast <16 x i32> [[TMP43]] to <8 x i64>
2224// CHECK: store <8 x i64> [[TMP44]], <8 x i64>* [[__V_ADDR_I]], align 64
2225// CHECK: [[TMP45:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2226// CHECK: [[TMP46:%.*]] = bitcast <8 x i64> [[TMP45]] to <16 x i32>
2227// CHECK: [[TMP47:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2228// CHECK: [[TMP48:%.*]] = bitcast <8 x i64> [[TMP47]] to <16 x i32>
2229// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <16 x i32> [[TMP46]], <16 x i32> [[TMP48]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2230// CHECK: [[TMP49:%.*]] = bitcast <16 x i32> [[SHUFFLE3_I]] to <8 x i64>
2231// CHECK: [[TMP50:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2232// CHECK: [[TMP51:%.*]] = bitcast <8 x i64> [[TMP50]] to <16 x i32>
2233// CHECK: [[TMP52:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2234// CHECK: [[TMP53:%.*]] = bitcast <8 x i64> [[TMP52]] to <16 x i32>
2235// CHECK: [[SHUFFLE4_I:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP53]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2236// CHECK: [[TMP54:%.*]] = bitcast <16 x i32> [[SHUFFLE4_I]] to <8 x i64>
2237// CHECK: store <8 x i64> [[TMP49]], <8 x i64>* [[__A_ADDR_I16_I]], align 64
2238// CHECK: store <8 x i64> [[TMP54]], <8 x i64>* [[__B_ADDR_I17_I]], align 64
2239// CHECK: [[TMP55:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I16_I]], align 64
2240// CHECK: [[TMP56:%.*]] = bitcast <8 x i64> [[TMP55]] to <16 x i32>
2241// CHECK: [[TMP57:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I17_I]], align 64
2242// CHECK: [[TMP58:%.*]] = bitcast <8 x i64> [[TMP57]] to <16 x i32>
2243// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I15_I]], align 64
2244// CHECK: [[TMP59:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I15_I]], align 64
2245// CHECK: [[TMP60:%.*]] = bitcast <8 x i64> [[TMP59]] to <16 x i32>
2246// CHECK: [[TMP61:%.*]] = icmp slt <16 x i32> [[TMP56]], [[TMP58]]
2247// CHECK: [[TMP62:%.*]] = select <16 x i1> [[TMP61]], <16 x i32> [[TMP56]], <16 x i32> [[TMP58]]
2248// CHECK: [[TMP63:%.*]] = bitcast <16 x i32> [[TMP62]] to <8 x i64>
2249// CHECK: store <8 x i64> [[TMP63]], <8 x i64>* [[__V_ADDR_I]], align 64
2250// CHECK: [[TMP64:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2251// CHECK: [[TMP65:%.*]] = bitcast <8 x i64> [[TMP64]] to <16 x i32>
2252// CHECK: [[TMP66:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2253// CHECK: [[TMP67:%.*]] = bitcast <8 x i64> [[TMP66]] to <16 x i32>
2254// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <16 x i32> [[TMP65]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2255// CHECK: [[TMP68:%.*]] = bitcast <16 x i32> [[SHUFFLE6_I]] to <8 x i64>
2256// CHECK: [[TMP69:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2257// CHECK: [[TMP70:%.*]] = bitcast <8 x i64> [[TMP69]] to <16 x i32>
2258// CHECK: [[TMP71:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2259// CHECK: [[TMP72:%.*]] = bitcast <8 x i64> [[TMP71]] to <16 x i32>
2260// CHECK: [[SHUFFLE7_I:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP72]], <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2261// CHECK: [[TMP73:%.*]] = bitcast <16 x i32> [[SHUFFLE7_I]] to <8 x i64>
2262// CHECK: store <8 x i64> [[TMP68]], <8 x i64>* [[__A_ADDR_I13_I]], align 64
2263// CHECK: store <8 x i64> [[TMP73]], <8 x i64>* [[__B_ADDR_I14_I]], align 64
2264// CHECK: [[TMP74:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I13_I]], align 64
2265// CHECK: [[TMP75:%.*]] = bitcast <8 x i64> [[TMP74]] to <16 x i32>
2266// CHECK: [[TMP76:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I14_I]], align 64
2267// CHECK: [[TMP77:%.*]] = bitcast <8 x i64> [[TMP76]] to <16 x i32>
2268// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
2269// CHECK: [[TMP78:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
2270// CHECK: [[TMP79:%.*]] = bitcast <8 x i64> [[TMP78]] to <16 x i32>
2271// CHECK: [[TMP80:%.*]] = icmp slt <16 x i32> [[TMP75]], [[TMP77]]
2272// CHECK: [[TMP81:%.*]] = select <16 x i1> [[TMP80]], <16 x i32> [[TMP75]], <16 x i32> [[TMP77]]
2273// CHECK: [[TMP82:%.*]] = bitcast <16 x i32> [[TMP81]] to <8 x i64>
2274// CHECK: store <8 x i64> [[TMP82]], <8 x i64>* [[__V_ADDR_I]], align 64
2275// CHECK: [[TMP83:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2276// CHECK: [[TMP84:%.*]] = bitcast <8 x i64> [[TMP83]] to <16 x i32>
2277// CHECK: [[TMP85:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2278// CHECK: [[TMP86:%.*]] = bitcast <8 x i64> [[TMP85]] to <16 x i32>
2279// CHECK: [[SHUFFLE9_I:%.*]] = shufflevector <16 x i32> [[TMP84]], <16 x i32> [[TMP86]], <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2280// CHECK: [[TMP87:%.*]] = bitcast <16 x i32> [[SHUFFLE9_I]] to <8 x i64>
2281// CHECK: [[TMP88:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2282// CHECK: [[TMP89:%.*]] = bitcast <8 x i64> [[TMP88]] to <16 x i32>
2283// CHECK: [[TMP90:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2284// CHECK: [[TMP91:%.*]] = bitcast <8 x i64> [[TMP90]] to <16 x i32>
2285// CHECK: [[SHUFFLE10_I:%.*]] = shufflevector <16 x i32> [[TMP89]], <16 x i32> [[TMP91]], <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2286// CHECK: [[TMP92:%.*]] = bitcast <16 x i32> [[SHUFFLE10_I]] to <8 x i64>
2287// CHECK: store <8 x i64> [[TMP87]], <8 x i64>* [[__A_ADDR_I_I]], align 64
2288// CHECK: store <8 x i64> [[TMP92]], <8 x i64>* [[__B_ADDR_I_I]], align 64
2289// CHECK: [[TMP93:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
2290// CHECK: [[TMP94:%.*]] = bitcast <8 x i64> [[TMP93]] to <16 x i32>
2291// CHECK: [[TMP95:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
2292// CHECK: [[TMP96:%.*]] = bitcast <8 x i64> [[TMP95]] to <16 x i32>
2293// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
2294// CHECK: [[TMP97:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
2295// CHECK: [[TMP98:%.*]] = bitcast <8 x i64> [[TMP97]] to <16 x i32>
2296// CHECK: [[TMP99:%.*]] = icmp slt <16 x i32> [[TMP94]], [[TMP96]]
2297// CHECK: [[TMP100:%.*]] = select <16 x i1> [[TMP99]], <16 x i32> [[TMP94]], <16 x i32> [[TMP96]]
2298// CHECK: [[TMP101:%.*]] = bitcast <16 x i32> [[TMP100]] to <8 x i64>
2299// CHECK: store <8 x i64> [[TMP101]], <8 x i64>* [[__V_ADDR_I]], align 64
2300// CHECK: [[TMP102:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2301// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP102]], i32 0
2302// CHECK: [[CONV_I:%.*]] = trunc i64 [[VECEXT_I]] to i32
2303// CHECK: ret i32 [[CONV_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00002304int test_mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +00002305 return _mm512_mask_reduce_min_epi32(__M, __W);
2306}
2307
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00002308// CHECK-LABEL: define i32 @test_mm512_mask_reduce_min_epu32(i16 zeroext %__M, <8 x i64> %__W) #0 {
2309// CHECK: [[_COMPOUNDLITERAL_I_I18_I:%.*]] = alloca <8 x i64>, align 64
2310// CHECK: [[__A_ADDR_I19_I:%.*]] = alloca <8 x i64>, align 64
2311// CHECK: [[__B_ADDR_I20_I:%.*]] = alloca <8 x i64>, align 64
2312// CHECK: [[_COMPOUNDLITERAL_I_I15_I:%.*]] = alloca <8 x i64>, align 64
2313// CHECK: [[__A_ADDR_I16_I:%.*]] = alloca <8 x i64>, align 64
2314// CHECK: [[__B_ADDR_I17_I:%.*]] = alloca <8 x i64>, align 64
2315// CHECK: [[_COMPOUNDLITERAL_I_I12_I:%.*]] = alloca <8 x i64>, align 64
2316// CHECK: [[__A_ADDR_I13_I:%.*]] = alloca <8 x i64>, align 64
2317// CHECK: [[__B_ADDR_I14_I:%.*]] = alloca <8 x i64>, align 64
2318// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <8 x i64>, align 64
2319// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
2320// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <8 x i64>, align 64
2321// CHECK: [[__S_ADDR_I_I:%.*]] = alloca i32, align 4
2322// CHECK: [[_COMPOUNDLITERAL_I_I:%.*]] = alloca <16 x i32>, align 64
2323// CHECK: [[__M_ADDR_I:%.*]] = alloca i16, align 2
2324// CHECK: [[__V_ADDR_I:%.*]] = alloca <8 x i64>, align 64
2325// CHECK: [[__M_ADDR:%.*]] = alloca i16, align 2
2326// CHECK: [[__W_ADDR:%.*]] = alloca <8 x i64>, align 64
2327// CHECK: store i16 %__M, i16* [[__M_ADDR]], align 2
2328// CHECK: store <8 x i64> %__W, <8 x i64>* [[__W_ADDR]], align 64
2329// CHECK: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2
2330// CHECK: [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[__W_ADDR]], align 64
2331// CHECK: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2
2332// CHECK: store <8 x i64> [[TMP1]], <8 x i64>* [[__V_ADDR_I]], align 64
2333// CHECK: [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2
2334// CHECK: [[TMP3:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2335// CHECK: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP3]] to <16 x i32>
2336// CHECK: store i32 -1, i32* [[__S_ADDR_I_I]], align 4
2337// CHECK: [[TMP5:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2338// CHECK: [[VECINIT_I_I:%.*]] = insertelement <16 x i32> undef, i32 [[TMP5]], i32 0
2339// CHECK: [[TMP6:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2340// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <16 x i32> [[VECINIT_I_I]], i32 [[TMP6]], i32 1
2341// CHECK: [[TMP7:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2342// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <16 x i32> [[VECINIT1_I_I]], i32 [[TMP7]], i32 2
2343// CHECK: [[TMP8:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2344// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <16 x i32> [[VECINIT2_I_I]], i32 [[TMP8]], i32 3
2345// CHECK: [[TMP9:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2346// CHECK: [[VECINIT4_I_I:%.*]] = insertelement <16 x i32> [[VECINIT3_I_I]], i32 [[TMP9]], i32 4
2347// CHECK: [[TMP10:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2348// CHECK: [[VECINIT5_I_I:%.*]] = insertelement <16 x i32> [[VECINIT4_I_I]], i32 [[TMP10]], i32 5
2349// CHECK: [[TMP11:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2350// CHECK: [[VECINIT6_I_I:%.*]] = insertelement <16 x i32> [[VECINIT5_I_I]], i32 [[TMP11]], i32 6
2351// CHECK: [[TMP12:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2352// CHECK: [[VECINIT7_I_I:%.*]] = insertelement <16 x i32> [[VECINIT6_I_I]], i32 [[TMP12]], i32 7
2353// CHECK: [[TMP13:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2354// CHECK: [[VECINIT8_I_I:%.*]] = insertelement <16 x i32> [[VECINIT7_I_I]], i32 [[TMP13]], i32 8
2355// CHECK: [[TMP14:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2356// CHECK: [[VECINIT9_I_I:%.*]] = insertelement <16 x i32> [[VECINIT8_I_I]], i32 [[TMP14]], i32 9
2357// CHECK: [[TMP15:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2358// CHECK: [[VECINIT10_I_I:%.*]] = insertelement <16 x i32> [[VECINIT9_I_I]], i32 [[TMP15]], i32 10
2359// CHECK: [[TMP16:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2360// CHECK: [[VECINIT11_I_I:%.*]] = insertelement <16 x i32> [[VECINIT10_I_I]], i32 [[TMP16]], i32 11
2361// CHECK: [[TMP17:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2362// CHECK: [[VECINIT12_I_I:%.*]] = insertelement <16 x i32> [[VECINIT11_I_I]], i32 [[TMP17]], i32 12
2363// CHECK: [[TMP18:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2364// CHECK: [[VECINIT13_I_I:%.*]] = insertelement <16 x i32> [[VECINIT12_I_I]], i32 [[TMP18]], i32 13
2365// CHECK: [[TMP19:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2366// CHECK: [[VECINIT14_I_I:%.*]] = insertelement <16 x i32> [[VECINIT13_I_I]], i32 [[TMP19]], i32 14
2367// CHECK: [[TMP20:%.*]] = load i32, i32* [[__S_ADDR_I_I]], align 4
2368// CHECK: [[VECINIT15_I_I:%.*]] = insertelement <16 x i32> [[VECINIT14_I_I]], i32 [[TMP20]], i32 15
2369// CHECK: store <16 x i32> [[VECINIT15_I_I]], <16 x i32>* [[_COMPOUNDLITERAL_I_I]], align 64
2370// CHECK: [[TMP21:%.*]] = load <16 x i32>, <16 x i32>* [[_COMPOUNDLITERAL_I_I]], align 64
2371// CHECK: [[TMP22:%.*]] = bitcast <16 x i32> [[TMP21]] to <8 x i64>
2372// CHECK: [[TMP23:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
2373// CHECK: [[TMP24:%.*]] = select <16 x i1> [[TMP23]], <16 x i32> [[TMP4]], <16 x i32> [[TMP21]]
2374// CHECK: [[TMP25:%.*]] = bitcast <16 x i32> [[TMP24]] to <8 x i64>
2375// CHECK: store <8 x i64> [[TMP25]], <8 x i64>* [[__V_ADDR_I]], align 64
2376// CHECK: [[TMP26:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2377// CHECK: [[TMP27:%.*]] = bitcast <8 x i64> [[TMP26]] to <16 x i32>
2378// CHECK: [[TMP28:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2379// CHECK: [[TMP29:%.*]] = bitcast <8 x i64> [[TMP28]] to <16 x i32>
2380// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i32> [[TMP27]], <16 x i32> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2381// CHECK: [[TMP30:%.*]] = bitcast <16 x i32> [[SHUFFLE_I]] to <8 x i64>
2382// CHECK: [[TMP31:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2383// CHECK: [[TMP32:%.*]] = bitcast <8 x i64> [[TMP31]] to <16 x i32>
2384// CHECK: [[TMP33:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2385// CHECK: [[TMP34:%.*]] = bitcast <8 x i64> [[TMP33]] to <16 x i32>
2386// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <16 x i32> [[TMP32]], <16 x i32> [[TMP34]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2387// CHECK: [[TMP35:%.*]] = bitcast <16 x i32> [[SHUFFLE1_I]] to <8 x i64>
2388// CHECK: store <8 x i64> [[TMP30]], <8 x i64>* [[__A_ADDR_I19_I]], align 64
2389// CHECK: store <8 x i64> [[TMP35]], <8 x i64>* [[__B_ADDR_I20_I]], align 64
2390// CHECK: [[TMP36:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I19_I]], align 64
2391// CHECK: [[TMP37:%.*]] = bitcast <8 x i64> [[TMP36]] to <16 x i32>
2392// CHECK: [[TMP38:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I20_I]], align 64
2393// CHECK: [[TMP39:%.*]] = bitcast <8 x i64> [[TMP38]] to <16 x i32>
2394// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I18_I]], align 64
2395// CHECK: [[TMP40:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I18_I]], align 64
2396// CHECK: [[TMP41:%.*]] = bitcast <8 x i64> [[TMP40]] to <16 x i32>
2397// CHECK: [[TMP42:%.*]] = icmp ult <16 x i32> [[TMP37]], [[TMP39]]
2398// CHECK: [[TMP43:%.*]] = select <16 x i1> [[TMP42]], <16 x i32> [[TMP37]], <16 x i32> [[TMP39]]
2399// CHECK: [[TMP44:%.*]] = bitcast <16 x i32> [[TMP43]] to <8 x i64>
2400// CHECK: store <8 x i64> [[TMP44]], <8 x i64>* [[__V_ADDR_I]], align 64
2401// CHECK: [[TMP45:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2402// CHECK: [[TMP46:%.*]] = bitcast <8 x i64> [[TMP45]] to <16 x i32>
2403// CHECK: [[TMP47:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2404// CHECK: [[TMP48:%.*]] = bitcast <8 x i64> [[TMP47]] to <16 x i32>
2405// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <16 x i32> [[TMP46]], <16 x i32> [[TMP48]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2406// CHECK: [[TMP49:%.*]] = bitcast <16 x i32> [[SHUFFLE3_I]] to <8 x i64>
2407// CHECK: [[TMP50:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2408// CHECK: [[TMP51:%.*]] = bitcast <8 x i64> [[TMP50]] to <16 x i32>
2409// CHECK: [[TMP52:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2410// CHECK: [[TMP53:%.*]] = bitcast <8 x i64> [[TMP52]] to <16 x i32>
2411// CHECK: [[SHUFFLE4_I:%.*]] = shufflevector <16 x i32> [[TMP51]], <16 x i32> [[TMP53]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2412// CHECK: [[TMP54:%.*]] = bitcast <16 x i32> [[SHUFFLE4_I]] to <8 x i64>
2413// CHECK: store <8 x i64> [[TMP49]], <8 x i64>* [[__A_ADDR_I16_I]], align 64
2414// CHECK: store <8 x i64> [[TMP54]], <8 x i64>* [[__B_ADDR_I17_I]], align 64
2415// CHECK: [[TMP55:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I16_I]], align 64
2416// CHECK: [[TMP56:%.*]] = bitcast <8 x i64> [[TMP55]] to <16 x i32>
2417// CHECK: [[TMP57:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I17_I]], align 64
2418// CHECK: [[TMP58:%.*]] = bitcast <8 x i64> [[TMP57]] to <16 x i32>
2419// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I15_I]], align 64
2420// CHECK: [[TMP59:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I15_I]], align 64
2421// CHECK: [[TMP60:%.*]] = bitcast <8 x i64> [[TMP59]] to <16 x i32>
2422// CHECK: [[TMP61:%.*]] = icmp ult <16 x i32> [[TMP56]], [[TMP58]]
2423// CHECK: [[TMP62:%.*]] = select <16 x i1> [[TMP61]], <16 x i32> [[TMP56]], <16 x i32> [[TMP58]]
2424// CHECK: [[TMP63:%.*]] = bitcast <16 x i32> [[TMP62]] to <8 x i64>
2425// CHECK: store <8 x i64> [[TMP63]], <8 x i64>* [[__V_ADDR_I]], align 64
2426// CHECK: [[TMP64:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2427// CHECK: [[TMP65:%.*]] = bitcast <8 x i64> [[TMP64]] to <16 x i32>
2428// CHECK: [[TMP66:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2429// CHECK: [[TMP67:%.*]] = bitcast <8 x i64> [[TMP66]] to <16 x i32>
2430// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <16 x i32> [[TMP65]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2431// CHECK: [[TMP68:%.*]] = bitcast <16 x i32> [[SHUFFLE6_I]] to <8 x i64>
2432// CHECK: [[TMP69:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2433// CHECK: [[TMP70:%.*]] = bitcast <8 x i64> [[TMP69]] to <16 x i32>
2434// CHECK: [[TMP71:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2435// CHECK: [[TMP72:%.*]] = bitcast <8 x i64> [[TMP71]] to <16 x i32>
2436// CHECK: [[SHUFFLE7_I:%.*]] = shufflevector <16 x i32> [[TMP70]], <16 x i32> [[TMP72]], <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2437// CHECK: [[TMP73:%.*]] = bitcast <16 x i32> [[SHUFFLE7_I]] to <8 x i64>
2438// CHECK: store <8 x i64> [[TMP68]], <8 x i64>* [[__A_ADDR_I13_I]], align 64
2439// CHECK: store <8 x i64> [[TMP73]], <8 x i64>* [[__B_ADDR_I14_I]], align 64
2440// CHECK: [[TMP74:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I13_I]], align 64
2441// CHECK: [[TMP75:%.*]] = bitcast <8 x i64> [[TMP74]] to <16 x i32>
2442// CHECK: [[TMP76:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I14_I]], align 64
2443// CHECK: [[TMP77:%.*]] = bitcast <8 x i64> [[TMP76]] to <16 x i32>
2444// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
2445// CHECK: [[TMP78:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
2446// CHECK: [[TMP79:%.*]] = bitcast <8 x i64> [[TMP78]] to <16 x i32>
2447// CHECK: [[TMP80:%.*]] = icmp ult <16 x i32> [[TMP75]], [[TMP77]]
2448// CHECK: [[TMP81:%.*]] = select <16 x i1> [[TMP80]], <16 x i32> [[TMP75]], <16 x i32> [[TMP77]]
2449// CHECK: [[TMP82:%.*]] = bitcast <16 x i32> [[TMP81]] to <8 x i64>
2450// CHECK: store <8 x i64> [[TMP82]], <8 x i64>* [[__V_ADDR_I]], align 64
2451// CHECK: [[TMP83:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2452// CHECK: [[TMP84:%.*]] = bitcast <8 x i64> [[TMP83]] to <16 x i32>
2453// CHECK: [[TMP85:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2454// CHECK: [[TMP86:%.*]] = bitcast <8 x i64> [[TMP85]] to <16 x i32>
2455// CHECK: [[SHUFFLE9_I:%.*]] = shufflevector <16 x i32> [[TMP84]], <16 x i32> [[TMP86]], <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2456// CHECK: [[TMP87:%.*]] = bitcast <16 x i32> [[SHUFFLE9_I]] to <8 x i64>
2457// CHECK: [[TMP88:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2458// CHECK: [[TMP89:%.*]] = bitcast <8 x i64> [[TMP88]] to <16 x i32>
2459// CHECK: [[TMP90:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2460// CHECK: [[TMP91:%.*]] = bitcast <8 x i64> [[TMP90]] to <16 x i32>
2461// CHECK: [[SHUFFLE10_I:%.*]] = shufflevector <16 x i32> [[TMP89]], <16 x i32> [[TMP91]], <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2462// CHECK: [[TMP92:%.*]] = bitcast <16 x i32> [[SHUFFLE10_I]] to <8 x i64>
2463// CHECK: store <8 x i64> [[TMP87]], <8 x i64>* [[__A_ADDR_I_I]], align 64
2464// CHECK: store <8 x i64> [[TMP92]], <8 x i64>* [[__B_ADDR_I_I]], align 64
2465// CHECK: [[TMP93:%.*]] = load <8 x i64>, <8 x i64>* [[__A_ADDR_I_I]], align 64
2466// CHECK: [[TMP94:%.*]] = bitcast <8 x i64> [[TMP93]] to <16 x i32>
2467// CHECK: [[TMP95:%.*]] = load <8 x i64>, <8 x i64>* [[__B_ADDR_I_I]], align 64
2468// CHECK: [[TMP96:%.*]] = bitcast <8 x i64> [[TMP95]] to <16 x i32>
2469// CHECK: store <8 x i64> zeroinitializer, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
2470// CHECK: [[TMP97:%.*]] = load <8 x i64>, <8 x i64>* [[_COMPOUNDLITERAL_I_I_I]], align 64
2471// CHECK: [[TMP98:%.*]] = bitcast <8 x i64> [[TMP97]] to <16 x i32>
2472// CHECK: [[TMP99:%.*]] = icmp ult <16 x i32> [[TMP94]], [[TMP96]]
2473// CHECK: [[TMP100:%.*]] = select <16 x i1> [[TMP99]], <16 x i32> [[TMP94]], <16 x i32> [[TMP96]]
2474// CHECK: [[TMP101:%.*]] = bitcast <16 x i32> [[TMP100]] to <8 x i64>
2475// CHECK: store <8 x i64> [[TMP101]], <8 x i64>* [[__V_ADDR_I]], align 64
2476// CHECK: [[TMP102:%.*]] = load <8 x i64>, <8 x i64>* [[__V_ADDR_I]], align 64
2477// CHECK: [[VECEXT_I:%.*]] = extractelement <8 x i64> [[TMP102]], i32 0
2478// CHECK: [[CONV_I:%.*]] = trunc i64 [[VECEXT_I]] to i32
2479// CHECK: ret i32 [[CONV_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00002480unsigned int test_mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +00002481 return _mm512_mask_reduce_min_epu32(__M, __W);
2482}
2483
Sanjay Patel1acd2cf2017-09-25 19:56:57 +00002484// CHECK-LABEL: define float @test_mm512_mask_reduce_min_ps(i16 zeroext %__M, <16 x float> %__W) #0 {
2485// CHECK: [[_COMPOUNDLITERAL_I_I18_I:%.*]] = alloca <16 x float>, align 64
2486// CHECK: [[__A_ADDR_I19_I:%.*]] = alloca <16 x float>, align 64
2487// CHECK: [[__B_ADDR_I20_I:%.*]] = alloca <16 x float>, align 64
2488// CHECK: [[_COMPOUNDLITERAL_I_I15_I:%.*]] = alloca <16 x float>, align 64
2489// CHECK: [[__A_ADDR_I16_I:%.*]] = alloca <16 x float>, align 64
2490// CHECK: [[__B_ADDR_I17_I:%.*]] = alloca <16 x float>, align 64
2491// CHECK: [[_COMPOUNDLITERAL_I_I12_I:%.*]] = alloca <16 x float>, align 64
2492// CHECK: [[__A_ADDR_I13_I:%.*]] = alloca <16 x float>, align 64
2493// CHECK: [[__B_ADDR_I14_I:%.*]] = alloca <16 x float>, align 64
2494// CHECK: [[_COMPOUNDLITERAL_I_I_I:%.*]] = alloca <16 x float>, align 64
2495// CHECK: [[__A_ADDR_I_I:%.*]] = alloca <16 x float>, align 64
2496// CHECK: [[__B_ADDR_I_I:%.*]] = alloca <16 x float>, align 64
2497// CHECK: [[__W_ADDR_I_I:%.*]] = alloca float, align 4
2498// CHECK: [[_COMPOUNDLITERAL_I_I:%.*]] = alloca <16 x float>, align 64
2499// CHECK: [[__M_ADDR_I:%.*]] = alloca i16, align 2
2500// CHECK: [[__V_ADDR_I:%.*]] = alloca <16 x float>, align 64
2501// CHECK: [[__M_ADDR:%.*]] = alloca i16, align 2
2502// CHECK: [[__W_ADDR:%.*]] = alloca <16 x float>, align 64
2503// CHECK: store i16 %__M, i16* [[__M_ADDR]], align 2
2504// CHECK: store <16 x float> %__W, <16 x float>* [[__W_ADDR]], align 64
2505// CHECK: [[TMP0:%.*]] = load i16, i16* [[__M_ADDR]], align 2
2506// CHECK: [[TMP1:%.*]] = load <16 x float>, <16 x float>* [[__W_ADDR]], align 64
2507// CHECK: store i16 [[TMP0]], i16* [[__M_ADDR_I]], align 2
2508// CHECK: store <16 x float> [[TMP1]], <16 x float>* [[__V_ADDR_I]], align 64
2509// CHECK: [[TMP2:%.*]] = load i16, i16* [[__M_ADDR_I]], align 2
2510// CHECK: [[TMP3:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2511// CHECK: store float 0x7FF0000000000000, float* [[__W_ADDR_I_I]], align 4
2512// CHECK: [[TMP4:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2513// CHECK: [[VECINIT_I_I:%.*]] = insertelement <16 x float> undef, float [[TMP4]], i32 0
2514// CHECK: [[TMP5:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2515// CHECK: [[VECINIT1_I_I:%.*]] = insertelement <16 x float> [[VECINIT_I_I]], float [[TMP5]], i32 1
2516// CHECK: [[TMP6:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2517// CHECK: [[VECINIT2_I_I:%.*]] = insertelement <16 x float> [[VECINIT1_I_I]], float [[TMP6]], i32 2
2518// CHECK: [[TMP7:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2519// CHECK: [[VECINIT3_I_I:%.*]] = insertelement <16 x float> [[VECINIT2_I_I]], float [[TMP7]], i32 3
2520// CHECK: [[TMP8:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2521// CHECK: [[VECINIT4_I_I:%.*]] = insertelement <16 x float> [[VECINIT3_I_I]], float [[TMP8]], i32 4
2522// CHECK: [[TMP9:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2523// CHECK: [[VECINIT5_I_I:%.*]] = insertelement <16 x float> [[VECINIT4_I_I]], float [[TMP9]], i32 5
2524// CHECK: [[TMP10:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2525// CHECK: [[VECINIT6_I_I:%.*]] = insertelement <16 x float> [[VECINIT5_I_I]], float [[TMP10]], i32 6
2526// CHECK: [[TMP11:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2527// CHECK: [[VECINIT7_I_I:%.*]] = insertelement <16 x float> [[VECINIT6_I_I]], float [[TMP11]], i32 7
2528// CHECK: [[TMP12:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2529// CHECK: [[VECINIT8_I_I:%.*]] = insertelement <16 x float> [[VECINIT7_I_I]], float [[TMP12]], i32 8
2530// CHECK: [[TMP13:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2531// CHECK: [[VECINIT9_I_I:%.*]] = insertelement <16 x float> [[VECINIT8_I_I]], float [[TMP13]], i32 9
2532// CHECK: [[TMP14:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2533// CHECK: [[VECINIT10_I_I:%.*]] = insertelement <16 x float> [[VECINIT9_I_I]], float [[TMP14]], i32 10
2534// CHECK: [[TMP15:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2535// CHECK: [[VECINIT11_I_I:%.*]] = insertelement <16 x float> [[VECINIT10_I_I]], float [[TMP15]], i32 11
2536// CHECK: [[TMP16:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2537// CHECK: [[VECINIT12_I_I:%.*]] = insertelement <16 x float> [[VECINIT11_I_I]], float [[TMP16]], i32 12
2538// CHECK: [[TMP17:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2539// CHECK: [[VECINIT13_I_I:%.*]] = insertelement <16 x float> [[VECINIT12_I_I]], float [[TMP17]], i32 13
2540// CHECK: [[TMP18:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2541// CHECK: [[VECINIT14_I_I:%.*]] = insertelement <16 x float> [[VECINIT13_I_I]], float [[TMP18]], i32 14
2542// CHECK: [[TMP19:%.*]] = load float, float* [[__W_ADDR_I_I]], align 4
2543// CHECK: [[VECINIT15_I_I:%.*]] = insertelement <16 x float> [[VECINIT14_I_I]], float [[TMP19]], i32 15
2544// CHECK: store <16 x float> [[VECINIT15_I_I]], <16 x float>* [[_COMPOUNDLITERAL_I_I]], align 64
2545// CHECK: [[TMP20:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I]], align 64
2546// CHECK: [[TMP21:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
2547// CHECK: [[TMP22:%.*]] = select <16 x i1> [[TMP21]], <16 x float> [[TMP3]], <16 x float> [[TMP20]]
2548// CHECK: store <16 x float> [[TMP22]], <16 x float>* [[__V_ADDR_I]], align 64
2549// CHECK: [[TMP23:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2550// CHECK: [[TMP24:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2551// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> [[TMP24]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2552// CHECK: [[TMP25:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2553// CHECK: [[TMP26:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2554// CHECK: [[SHUFFLE1_I:%.*]] = shufflevector <16 x float> [[TMP25]], <16 x float> [[TMP26]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2555// CHECK: store <16 x float> [[SHUFFLE_I]], <16 x float>* [[__A_ADDR_I19_I]], align 64
2556// CHECK: store <16 x float> [[SHUFFLE1_I]], <16 x float>* [[__B_ADDR_I20_I]], align 64
2557// CHECK: [[TMP27:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I19_I]], align 64
2558// CHECK: [[TMP28:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I20_I]], align 64
2559// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I18_I]], align 64
2560// CHECK: [[TMP29:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I18_I]], align 64
2561// CHECK: [[TMP30:%.*]] = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> [[TMP27]], <16 x float> [[TMP28]], <16 x float> [[TMP29]], i16 -1, i32 4) #2
2562// CHECK: store <16 x float> [[TMP30]], <16 x float>* [[__V_ADDR_I]], align 64
2563// CHECK: [[TMP31:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2564// CHECK: [[TMP32:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2565// CHECK: [[SHUFFLE3_I:%.*]] = shufflevector <16 x float> [[TMP31]], <16 x float> [[TMP32]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2566// CHECK: [[TMP33:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2567// CHECK: [[TMP34:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2568// CHECK: [[SHUFFLE4_I:%.*]] = shufflevector <16 x float> [[TMP33]], <16 x float> [[TMP34]], <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2569// CHECK: store <16 x float> [[SHUFFLE3_I]], <16 x float>* [[__A_ADDR_I16_I]], align 64
2570// CHECK: store <16 x float> [[SHUFFLE4_I]], <16 x float>* [[__B_ADDR_I17_I]], align 64
2571// CHECK: [[TMP35:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I16_I]], align 64
2572// CHECK: [[TMP36:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I17_I]], align 64
2573// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I15_I]], align 64
2574// CHECK: [[TMP37:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I15_I]], align 64
2575// CHECK: [[TMP38:%.*]] = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> [[TMP35]], <16 x float> [[TMP36]], <16 x float> [[TMP37]], i16 -1, i32 4) #2
2576// CHECK: store <16 x float> [[TMP38]], <16 x float>* [[__V_ADDR_I]], align 64
2577// CHECK: [[TMP39:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2578// CHECK: [[TMP40:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2579// CHECK: [[SHUFFLE6_I:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP40]], <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2580// CHECK: [[TMP41:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2581// CHECK: [[TMP42:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2582// CHECK: [[SHUFFLE7_I:%.*]] = shufflevector <16 x float> [[TMP41]], <16 x float> [[TMP42]], <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2583// CHECK: store <16 x float> [[SHUFFLE6_I]], <16 x float>* [[__A_ADDR_I13_I]], align 64
2584// CHECK: store <16 x float> [[SHUFFLE7_I]], <16 x float>* [[__B_ADDR_I14_I]], align 64
2585// CHECK: [[TMP43:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I13_I]], align 64
2586// CHECK: [[TMP44:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I14_I]], align 64
2587// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
2588// CHECK: [[TMP45:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I12_I]], align 64
2589// CHECK: [[TMP46:%.*]] = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> [[TMP43]], <16 x float> [[TMP44]], <16 x float> [[TMP45]], i16 -1, i32 4) #2
2590// CHECK: store <16 x float> [[TMP46]], <16 x float>* [[__V_ADDR_I]], align 64
2591// CHECK: [[TMP47:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2592// CHECK: [[TMP48:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2593// CHECK: [[SHUFFLE9_I:%.*]] = shufflevector <16 x float> [[TMP47]], <16 x float> [[TMP48]], <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2594// CHECK: [[TMP49:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2595// CHECK: [[TMP50:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2596// CHECK: [[SHUFFLE10_I:%.*]] = shufflevector <16 x float> [[TMP49]], <16 x float> [[TMP50]], <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2597// CHECK: store <16 x float> [[SHUFFLE9_I]], <16 x float>* [[__A_ADDR_I_I]], align 64
2598// CHECK: store <16 x float> [[SHUFFLE10_I]], <16 x float>* [[__B_ADDR_I_I]], align 64
2599// CHECK: [[TMP51:%.*]] = load <16 x float>, <16 x float>* [[__A_ADDR_I_I]], align 64
2600// CHECK: [[TMP52:%.*]] = load <16 x float>, <16 x float>* [[__B_ADDR_I_I]], align 64
2601// CHECK: store <16 x float> zeroinitializer, <16 x float>* [[_COMPOUNDLITERAL_I_I_I]], align 64
2602// CHECK: [[TMP53:%.*]] = load <16 x float>, <16 x float>* [[_COMPOUNDLITERAL_I_I_I]], align 64
2603// CHECK: [[TMP54:%.*]] = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> [[TMP51]], <16 x float> [[TMP52]], <16 x float> [[TMP53]], i16 -1, i32 4) #2
2604// CHECK: store <16 x float> [[TMP54]], <16 x float>* [[__V_ADDR_I]], align 64
2605// CHECK: [[TMP55:%.*]] = load <16 x float>, <16 x float>* [[__V_ADDR_I]], align 64
2606// CHECK: [[VECEXT_I:%.*]] = extractelement <16 x float> [[TMP55]], i32 0
2607// CHECK: ret float [[VECEXT_I]]
Michael Zuckerman25eb4202016-10-29 10:29:20 +00002608float test_mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __W){
Michael Zuckerman25eb4202016-10-29 10:29:20 +00002609 return _mm512_mask_reduce_min_ps(__M, __W);
2610}
2611