blob: ef813fa7205b1eaa8dd6c53c679d3c7dbbdce442 [file] [log] [blame]
Amara Emerson836b0f42017-05-10 09:42:49 +00001; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -expand-reductions -S | FileCheck %s
3; Tests without a target which should expand all reductions
4declare i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64>)
5declare i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64>)
6declare i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64>)
7declare i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64>)
8declare i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64>)
9
10declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
11declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
12
13declare i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64>)
14declare i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64>)
15declare i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64>)
16declare i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64>)
17
18declare double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double>)
19declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double>)
20
21
22define i64 @add_i64(<2 x i64> %vec) {
23; CHECK-LABEL: @add_i64(
24; CHECK-NEXT: entry:
25; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
26; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[VEC]], [[RDX_SHUF]]
27; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
28; CHECK-NEXT: ret i64 [[TMP0]]
29;
30entry:
31 %r = call i64 @llvm.experimental.vector.reduce.add.i64.v2i64(<2 x i64> %vec)
32 ret i64 %r
33}
34
35define i64 @mul_i64(<2 x i64> %vec) {
36; CHECK-LABEL: @mul_i64(
37; CHECK-NEXT: entry:
38; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
39; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <2 x i64> [[VEC]], [[RDX_SHUF]]
40; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
41; CHECK-NEXT: ret i64 [[TMP0]]
42;
43entry:
44 %r = call i64 @llvm.experimental.vector.reduce.mul.i64.v2i64(<2 x i64> %vec)
45 ret i64 %r
46}
47
48define i64 @and_i64(<2 x i64> %vec) {
49; CHECK-LABEL: @and_i64(
50; CHECK-NEXT: entry:
51; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
52; CHECK-NEXT: [[BIN_RDX:%.*]] = and <2 x i64> [[VEC]], [[RDX_SHUF]]
53; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
54; CHECK-NEXT: ret i64 [[TMP0]]
55;
56entry:
57 %r = call i64 @llvm.experimental.vector.reduce.and.i64.v2i64(<2 x i64> %vec)
58 ret i64 %r
59}
60
61define i64 @or_i64(<2 x i64> %vec) {
62; CHECK-LABEL: @or_i64(
63; CHECK-NEXT: entry:
64; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
65; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[VEC]], [[RDX_SHUF]]
66; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
67; CHECK-NEXT: ret i64 [[TMP0]]
68;
69entry:
70 %r = call i64 @llvm.experimental.vector.reduce.or.i64.v2i64(<2 x i64> %vec)
71 ret i64 %r
72}
73
74define i64 @xor_i64(<2 x i64> %vec) {
75; CHECK-LABEL: @xor_i64(
76; CHECK-NEXT: entry:
77; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
78; CHECK-NEXT: [[BIN_RDX:%.*]] = xor <2 x i64> [[VEC]], [[RDX_SHUF]]
79; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[BIN_RDX]], i32 0
80; CHECK-NEXT: ret i64 [[TMP0]]
81;
82entry:
83 %r = call i64 @llvm.experimental.vector.reduce.xor.i64.v2i64(<2 x i64> %vec)
84 ret i64 %r
85}
86
87define float @fadd_f32(<4 x float> %vec) {
88; CHECK-LABEL: @fadd_f32(
89; CHECK-NEXT: entry:
90; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
91; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[VEC]], [[RDX_SHUF]]
92; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
93; CHECK-NEXT: [[BIN_RDX2:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
94; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
95; CHECK-NEXT: ret float [[TMP0]]
96;
97entry:
98 %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
99 ret float %r
100}
101
102define float @fadd_f32_strict(<4 x float> %vec) {
103; CHECK-LABEL: @fadd_f32_strict(
104; CHECK-NEXT: entry:
105; CHECK-NEXT: [[R:%.*]] = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float undef, <4 x float> [[VEC:%.*]])
106; CHECK-NEXT: ret float [[R]]
107;
108entry:
109 %r = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %vec)
110 ret float %r
111}
112
113define float @fmul_f32(<4 x float> %vec) {
114; CHECK-LABEL: @fmul_f32(
115; CHECK-NEXT: entry:
116; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[VEC:%.*]], <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
117; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[VEC]], [[RDX_SHUF]]
118; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
119; CHECK-NEXT: [[BIN_RDX2:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF1]]
120; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[BIN_RDX2]], i32 0
121; CHECK-NEXT: ret float [[TMP0]]
122;
123entry:
124 %r = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %vec)
125 ret float %r
126}
127
128define i64 @smax_i64(<2 x i64> %vec) {
129; CHECK-LABEL: @smax_i64(
130; CHECK-NEXT: entry:
131; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
132; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]]
133; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
134; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
135; CHECK-NEXT: ret i64 [[TMP0]]
136;
137entry:
138 %r = call i64 @llvm.experimental.vector.reduce.smax.i64.v2i64(<2 x i64> %vec)
139 ret i64 %r
140}
141
142define i64 @smin_i64(<2 x i64> %vec) {
143; CHECK-LABEL: @smin_i64(
144; CHECK-NEXT: entry:
145; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
146; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]]
147; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
148; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
149; CHECK-NEXT: ret i64 [[TMP0]]
150;
151entry:
152 %r = call i64 @llvm.experimental.vector.reduce.smin.i64.v2i64(<2 x i64> %vec)
153 ret i64 %r
154}
155
156define i64 @umax_i64(<2 x i64> %vec) {
157; CHECK-LABEL: @umax_i64(
158; CHECK-NEXT: entry:
159; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
160; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]]
161; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
162; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
163; CHECK-NEXT: ret i64 [[TMP0]]
164;
165entry:
166 %r = call i64 @llvm.experimental.vector.reduce.umax.i64.v2i64(<2 x i64> %vec)
167 ret i64 %r
168}
169
170define i64 @umin_i64(<2 x i64> %vec) {
171; CHECK-LABEL: @umin_i64(
172; CHECK-NEXT: entry:
173; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
174; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]]
175; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
176; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
177; CHECK-NEXT: ret i64 [[TMP0]]
178;
179entry:
180 %r = call i64 @llvm.experimental.vector.reduce.umin.i64.v2i64(<2 x i64> %vec)
181 ret i64 %r
182}
183
184define double @fmax_f64(<2 x double> %vec) {
185; CHECK-LABEL: @fmax_f64(
186; CHECK-NEXT: entry:
187; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
188; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt <2 x double> [[VEC]], [[RDX_SHUF]]
189; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
190; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
191; CHECK-NEXT: ret double [[TMP0]]
192;
193entry:
194 %r = call double @llvm.experimental.vector.reduce.fmax.f64.v2f64(<2 x double> %vec)
195 ret double %r
196}
197
198define double @fmin_f64(<2 x double> %vec) {
199; CHECK-LABEL: @fmin_f64(
200; CHECK-NEXT: entry:
201; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x double> [[VEC:%.*]], <2 x double> undef, <2 x i32> <i32 1, i32 undef>
202; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <2 x double> [[VEC]], [[RDX_SHUF]]
203; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x double> [[VEC]], <2 x double> [[RDX_SHUF]]
204; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[RDX_MINMAX_SELECT]], i32 0
205; CHECK-NEXT: ret double [[TMP0]]
206;
207entry:
208 %r = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %vec)
209 ret double %r
210}