blob: f521b623fad22662c07b34437ff2c203ff6bf804 [file] [log] [blame]
Sanjay Patelb653de12014-09-10 17:58:16 +00001; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
Nadav Rotem4f7f7272012-10-19 23:05:40 +00002
3target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
Nadav Rotem4f7f7272012-10-19 23:05:40 +00004
Stephen Linc1c7a132013-07-14 01:42:54 +00005;CHECK-LABEL: @reduction_sum(
Nadav Rotem4f7f7272012-10-19 23:05:40 +00006;CHECK: phi <4 x i32>
7;CHECK: load <4 x i32>
8;CHECK: add <4 x i32>
Benjamin Kramerf0e5d2f2012-12-18 18:40:20 +00009;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
10;CHECK: add <4 x i32>
11;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
12;CHECK: add <4 x i32>
13;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
Nadav Rotem4f7f7272012-10-19 23:05:40 +000014;CHECK: ret i32
15define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
16 %1 = icmp sgt i32 %n, 0
17 br i1 %1, label %.lr.ph, label %._crit_edge
18
19.lr.ph: ; preds = %0, %.lr.ph
20 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
21 %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
David Blaikie79e6c742015-02-27 19:29:02 +000022 %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +000023 %3 = load i32, i32* %2, align 4
David Blaikie79e6c742015-02-27 19:29:02 +000024 %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +000025 %5 = load i32, i32* %4, align 4
Nadav Rotem4f7f7272012-10-19 23:05:40 +000026 %6 = trunc i64 %indvars.iv to i32
27 %7 = add i32 %sum.02, %6
28 %8 = add i32 %7, %3
29 %9 = add i32 %8, %5
30 %indvars.iv.next = add i64 %indvars.iv, 1
31 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
32 %exitcond = icmp eq i32 %lftr.wideiv, %n
33 br i1 %exitcond, label %._crit_edge, label %.lr.ph
34
35._crit_edge: ; preds = %.lr.ph, %0
36 %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
37 ret i32 %sum.0.lcssa
38}
39
Stephen Linc1c7a132013-07-14 01:42:54 +000040;CHECK-LABEL: @reduction_prod(
Nadav Rotem4f7f7272012-10-19 23:05:40 +000041;CHECK: phi <4 x i32>
42;CHECK: load <4 x i32>
43;CHECK: mul <4 x i32>
Benjamin Kramerf0e5d2f2012-12-18 18:40:20 +000044;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
45;CHECK: mul <4 x i32>
46;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
47;CHECK: mul <4 x i32>
48;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
Nadav Rotem4f7f7272012-10-19 23:05:40 +000049;CHECK: ret i32
50define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
51 %1 = icmp sgt i32 %n, 0
52 br i1 %1, label %.lr.ph, label %._crit_edge
53
54.lr.ph: ; preds = %0, %.lr.ph
55 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
56 %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ]
David Blaikie79e6c742015-02-27 19:29:02 +000057 %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +000058 %3 = load i32, i32* %2, align 4
David Blaikie79e6c742015-02-27 19:29:02 +000059 %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +000060 %5 = load i32, i32* %4, align 4
Nadav Rotem4f7f7272012-10-19 23:05:40 +000061 %6 = trunc i64 %indvars.iv to i32
62 %7 = mul i32 %prod.02, %6
63 %8 = mul i32 %7, %3
64 %9 = mul i32 %8, %5
65 %indvars.iv.next = add i64 %indvars.iv, 1
66 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
67 %exitcond = icmp eq i32 %lftr.wideiv, %n
68 br i1 %exitcond, label %._crit_edge, label %.lr.ph
69
70._crit_edge: ; preds = %.lr.ph, %0
71 %prod.0.lcssa = phi i32 [ 1, %0 ], [ %9, %.lr.ph ]
72 ret i32 %prod.0.lcssa
73}
74
Stephen Linc1c7a132013-07-14 01:42:54 +000075;CHECK-LABEL: @reduction_mix(
Nadav Rotem4f7f7272012-10-19 23:05:40 +000076;CHECK: phi <4 x i32>
77;CHECK: load <4 x i32>
Nadav Rotem4cb8cda2012-10-31 21:40:39 +000078;CHECK: mul nsw <4 x i32>
Benjamin Kramerf0e5d2f2012-12-18 18:40:20 +000079;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
80;CHECK: add <4 x i32>
81;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
82;CHECK: add <4 x i32>
83;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
Nadav Rotem4f7f7272012-10-19 23:05:40 +000084;CHECK: ret i32
85define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
86 %1 = icmp sgt i32 %n, 0
87 br i1 %1, label %.lr.ph, label %._crit_edge
88
89.lr.ph: ; preds = %0, %.lr.ph
90 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
91 %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
David Blaikie79e6c742015-02-27 19:29:02 +000092 %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +000093 %3 = load i32, i32* %2, align 4
David Blaikie79e6c742015-02-27 19:29:02 +000094 %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +000095 %5 = load i32, i32* %4, align 4
Nadav Rotem4f7f7272012-10-19 23:05:40 +000096 %6 = mul nsw i32 %5, %3
97 %7 = trunc i64 %indvars.iv to i32
98 %8 = add i32 %sum.02, %7
99 %9 = add i32 %8, %6
100 %indvars.iv.next = add i64 %indvars.iv, 1
101 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
102 %exitcond = icmp eq i32 %lftr.wideiv, %n
103 br i1 %exitcond, label %._crit_edge, label %.lr.ph
104
105._crit_edge: ; preds = %.lr.ph, %0
106 %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
107 ret i32 %sum.0.lcssa
108}
109
Stephen Linc1c7a132013-07-14 01:42:54 +0000110;CHECK-LABEL: @reduction_mul(
Nadav Rotemc1679a92012-10-21 05:52:51 +0000111;CHECK: mul <4 x i32>
Benjamin Kramerf0e5d2f2012-12-18 18:40:20 +0000112;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
113;CHECK: mul <4 x i32>
114;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
115;CHECK: mul <4 x i32>
116;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
Nadav Rotem4f7f7272012-10-19 23:05:40 +0000117;CHECK: ret i32
Nadav Rotemc1679a92012-10-21 05:52:51 +0000118define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
Nadav Rotem4f7f7272012-10-19 23:05:40 +0000119 %1 = icmp sgt i32 %n, 0
120 br i1 %1, label %.lr.ph, label %._crit_edge
121
122.lr.ph: ; preds = %0, %.lr.ph
123 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
Nadav Rotemc1679a92012-10-21 05:52:51 +0000124 %sum.02 = phi i32 [ %9, %.lr.ph ], [ 19, %0 ]
David Blaikie79e6c742015-02-27 19:29:02 +0000125 %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000126 %3 = load i32, i32* %2, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000127 %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000128 %5 = load i32, i32* %4, align 4
Nadav Rotem4f7f7272012-10-19 23:05:40 +0000129 %6 = trunc i64 %indvars.iv to i32
130 %7 = add i32 %3, %6
131 %8 = add i32 %7, %5
132 %9 = mul i32 %8, %sum.02
133 %indvars.iv.next = add i64 %indvars.iv, 1
134 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
135 %exitcond = icmp eq i32 %lftr.wideiv, %n
136 br i1 %exitcond, label %._crit_edge, label %.lr.ph
137
138._crit_edge: ; preds = %.lr.ph, %0
139 %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
140 ret i32 %sum.0.lcssa
141}
Nadav Rotemc1679a92012-10-21 05:52:51 +0000142
Stephen Linc1c7a132013-07-14 01:42:54 +0000143;CHECK-LABEL: @start_at_non_zero(
Nadav Rotemc1679a92012-10-21 05:52:51 +0000144;CHECK: phi <4 x i32>
145;CHECK: <i32 120, i32 0, i32 0, i32 0>
Benjamin Kramerf0e5d2f2012-12-18 18:40:20 +0000146;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
147;CHECK: add <4 x i32>
148;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
149;CHECK: add <4 x i32>
150;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
Nadav Rotemc1679a92012-10-21 05:52:51 +0000151;CHECK: ret i32
152define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp {
153entry:
154 %cmp7 = icmp sgt i32 %n, 0
155 br i1 %cmp7, label %for.body, label %for.end
156
157for.body: ; preds = %entry, %for.body
158 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
159 %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ]
David Blaikie79e6c742015-02-27 19:29:02 +0000160 %arrayidx = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000161 %0 = load i32, i32* %arrayidx, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000162 %arrayidx2 = getelementptr inbounds i32, i32* %coeff, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000163 %1 = load i32, i32* %arrayidx2, align 4
Nadav Rotemc1679a92012-10-21 05:52:51 +0000164 %mul = mul nsw i32 %1, %0
165 %add = add nsw i32 %mul, %sum.09
166 %indvars.iv.next = add i64 %indvars.iv, 1
167 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
168 %exitcond = icmp eq i32 %lftr.wideiv, %n
169 br i1 %exitcond, label %for.end, label %for.body
170
171for.end: ; preds = %for.body, %entry
172 %sum.0.lcssa = phi i32 [ 120, %entry ], [ %add, %for.body ]
173 ret i32 %sum.0.lcssa
174}
175
Stephen Linc1c7a132013-07-14 01:42:54 +0000176;CHECK-LABEL: @reduction_and(
Nadav Rotemb3e8e682012-10-30 18:12:36 +0000177;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
James Molloy1e583702015-09-02 10:15:39 +0000178;CHECK: and <4 x i32>
Benjamin Kramerf0e5d2f2012-12-18 18:40:20 +0000179;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
180;CHECK: and <4 x i32>
181;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
182;CHECK: and <4 x i32>
183;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
Nadav Rotem5ffb0492012-10-25 00:08:41 +0000184;CHECK: ret i32
185define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
186entry:
187 %cmp7 = icmp sgt i32 %n, 0
188 br i1 %cmp7, label %for.body, label %for.end
Nadav Rotemc1679a92012-10-21 05:52:51 +0000189
Nadav Rotem5ffb0492012-10-25 00:08:41 +0000190for.body: ; preds = %entry, %for.body
191 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
192 %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
David Blaikie79e6c742015-02-27 19:29:02 +0000193 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000194 %0 = load i32, i32* %arrayidx, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000195 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000196 %1 = load i32, i32* %arrayidx2, align 4
Nadav Rotem5ffb0492012-10-25 00:08:41 +0000197 %add = add nsw i32 %1, %0
198 %and = and i32 %add, %result.08
199 %indvars.iv.next = add i64 %indvars.iv, 1
200 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
201 %exitcond = icmp eq i32 %lftr.wideiv, %n
202 br i1 %exitcond, label %for.end, label %for.body
203
204for.end: ; preds = %for.body, %entry
205 %result.0.lcssa = phi i32 [ -1, %entry ], [ %and, %for.body ]
206 ret i32 %result.0.lcssa
207}
208
Stephen Linc1c7a132013-07-14 01:42:54 +0000209;CHECK-LABEL: @reduction_or(
Nadav Rotem5ffb0492012-10-25 00:08:41 +0000210;CHECK: or <4 x i32>
Benjamin Kramerf0e5d2f2012-12-18 18:40:20 +0000211;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
212;CHECK: or <4 x i32>
213;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
214;CHECK: or <4 x i32>
215;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
Nadav Rotem5ffb0492012-10-25 00:08:41 +0000216;CHECK: ret i32
217define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
218entry:
219 %cmp7 = icmp sgt i32 %n, 0
220 br i1 %cmp7, label %for.body, label %for.end
221
222for.body: ; preds = %entry, %for.body
223 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
224 %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
David Blaikie79e6c742015-02-27 19:29:02 +0000225 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000226 %0 = load i32, i32* %arrayidx, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000227 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000228 %1 = load i32, i32* %arrayidx2, align 4
Nadav Rotem5ffb0492012-10-25 00:08:41 +0000229 %add = add nsw i32 %1, %0
230 %or = or i32 %add, %result.08
231 %indvars.iv.next = add i64 %indvars.iv, 1
232 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
233 %exitcond = icmp eq i32 %lftr.wideiv, %n
234 br i1 %exitcond, label %for.end, label %for.body
235
236for.end: ; preds = %for.body, %entry
237 %result.0.lcssa = phi i32 [ 0, %entry ], [ %or, %for.body ]
238 ret i32 %result.0.lcssa
239}
240
Stephen Linc1c7a132013-07-14 01:42:54 +0000241;CHECK-LABEL: @reduction_xor(
Nadav Rotem5ffb0492012-10-25 00:08:41 +0000242;CHECK: xor <4 x i32>
Benjamin Kramerf0e5d2f2012-12-18 18:40:20 +0000243;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
244;CHECK: xor <4 x i32>
245;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
246;CHECK: xor <4 x i32>
247;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
Nadav Rotem5ffb0492012-10-25 00:08:41 +0000248;CHECK: ret i32
249define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
250entry:
251 %cmp7 = icmp sgt i32 %n, 0
252 br i1 %cmp7, label %for.body, label %for.end
253
254for.body: ; preds = %entry, %for.body
255 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
256 %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
David Blaikie79e6c742015-02-27 19:29:02 +0000257 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000258 %0 = load i32, i32* %arrayidx, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000259 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000260 %1 = load i32, i32* %arrayidx2, align 4
Nadav Rotem5ffb0492012-10-25 00:08:41 +0000261 %add = add nsw i32 %1, %0
262 %xor = xor i32 %add, %result.08
263 %indvars.iv.next = add i64 %indvars.iv, 1
264 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
265 %exitcond = icmp eq i32 %lftr.wideiv, %n
266 br i1 %exitcond, label %for.end, label %for.body
267
268for.end: ; preds = %for.body, %entry
269 %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
270 ret i32 %result.0.lcssa
271}
Paul Redmond874f01e2013-01-04 22:10:16 +0000272
Nadav Roteme9f5bfd2013-01-05 01:15:47 +0000273; In this code the subtracted variable is on the RHS and this is not an induction variable.
Stephen Linc1c7a132013-07-14 01:42:54 +0000274;CHECK-LABEL: @reduction_sub_rhs(
Paul Redmond874f01e2013-01-04 22:10:16 +0000275;CHECK-NOT: phi <4 x i32>
276;CHECK-NOT: sub nsw <4 x i32>
277;CHECK: ret i32
Nadav Roteme9f5bfd2013-01-05 01:15:47 +0000278define i32 @reduction_sub_rhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
279entry:
280 %cmp4 = icmp sgt i32 %n, 0
281 br i1 %cmp4, label %for.body, label %for.end
282
283for.body: ; preds = %entry, %for.body
284 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
285 %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
David Blaikie79e6c742015-02-27 19:29:02 +0000286 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000287 %0 = load i32, i32* %arrayidx, align 4
Nadav Roteme9f5bfd2013-01-05 01:15:47 +0000288 %sub = sub nsw i32 %0, %x.05
289 %indvars.iv.next = add i64 %indvars.iv, 1
290 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
291 %exitcond = icmp eq i32 %lftr.wideiv, %n
292 br i1 %exitcond, label %for.end, label %for.body
293
294for.end: ; preds = %for.body, %entry
295 %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
296 ret i32 %x.0.lcssa
297}
298
299
300; In this test the reduction variable is on the LHS and we can vectorize it.
Stephen Linc1c7a132013-07-14 01:42:54 +0000301;CHECK-LABEL: @reduction_sub_lhs(
Nadav Roteme9f5bfd2013-01-05 01:15:47 +0000302;CHECK: phi <4 x i32>
303;CHECK: sub nsw <4 x i32>
304;CHECK: ret i32
Paul Redmond874f01e2013-01-04 22:10:16 +0000305define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
306entry:
307 %cmp4 = icmp sgt i32 %n, 0
308 br i1 %cmp4, label %for.body, label %for.end
309
310for.body: ; preds = %entry, %for.body
311 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
312 %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
David Blaikie79e6c742015-02-27 19:29:02 +0000313 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000314 %0 = load i32, i32* %arrayidx, align 4
Nadav Roteme9f5bfd2013-01-05 01:15:47 +0000315 %sub = sub nsw i32 %x.05, %0
Paul Redmond874f01e2013-01-04 22:10:16 +0000316 %indvars.iv.next = add i64 %indvars.iv, 1
317 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
318 %exitcond = icmp eq i32 %lftr.wideiv, %n
319 br i1 %exitcond, label %for.end, label %for.body
320
321for.end: ; preds = %for.body, %entry
322 %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
323 ret i32 %x.0.lcssa
324}
Arnold Schwaighofer36101392013-05-07 21:55:37 +0000325
326; We can vectorize conditional reductions with multi-input phis.
327; CHECK: reduction_conditional
328; CHECK: fadd <4 x float>
329
330define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
331entry:
332 br label %for.body
333
334for.body:
335 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
336 %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
David Blaikie79e6c742015-02-27 19:29:02 +0000337 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000338 %0 = load float, float* %arrayidx, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000339 %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000340 %1 = load float, float* %arrayidx2, align 4
Arnold Schwaighofer36101392013-05-07 21:55:37 +0000341 %cmp3 = fcmp ogt float %0, %1
342 br i1 %cmp3, label %if.then, label %for.inc
343
344if.then:
345 %cmp6 = fcmp ogt float %1, 1.000000e+00
346 br i1 %cmp6, label %if.then8, label %if.else
347
348if.then8:
349 %add = fadd fast float %sum.033, %0
350 br label %for.inc
351
352if.else:
353 %cmp14 = fcmp ogt float %0, 2.000000e+00
354 br i1 %cmp14, label %if.then16, label %for.inc
355
356if.then16:
357 %add19 = fadd fast float %sum.033, %1
358 br label %for.inc
359
360for.inc:
361 %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ]
362 %indvars.iv.next = add i64 %indvars.iv, 1
363 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
364 %exitcond = icmp ne i32 %lftr.wideiv, 128
365 br i1 %exitcond, label %for.body, label %for.end
366
367for.end:
368 %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
369 ret float %sum.1.lcssa
370}
371
372; We can't vectorize reductions with phi inputs from outside the reduction.
373; CHECK: noreduction_phi
374; CHECK-NOT: fadd <4 x float>
375define float @noreduction_phi(float* %A, float* %B, float* %C, float %S) {
376entry:
377 br label %for.body
378
379for.body:
380 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
381 %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
David Blaikie79e6c742015-02-27 19:29:02 +0000382 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000383 %0 = load float, float* %arrayidx, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000384 %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000385 %1 = load float, float* %arrayidx2, align 4
Arnold Schwaighofer36101392013-05-07 21:55:37 +0000386 %cmp3 = fcmp ogt float %0, %1
387 br i1 %cmp3, label %if.then, label %for.inc
388
389if.then:
390 %cmp6 = fcmp ogt float %1, 1.000000e+00
391 br i1 %cmp6, label %if.then8, label %if.else
392
393if.then8:
394 %add = fadd fast float %sum.033, %0
395 br label %for.inc
396
397if.else:
398 %cmp14 = fcmp ogt float %0, 2.000000e+00
399 br i1 %cmp14, label %if.then16, label %for.inc
400
401if.then16:
402 %add19 = fadd fast float %sum.033, %1
403 br label %for.inc
404
405for.inc:
406 %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ 0.000000e+00, %if.else ], [ %sum.033, %for.body ]
407 %indvars.iv.next = add i64 %indvars.iv, 1
408 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
409 %exitcond = icmp ne i32 %lftr.wideiv, 128
410 br i1 %exitcond, label %for.body, label %for.end
411
412for.end:
413 %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
414 ret float %sum.1.lcssa
415}
416
417; We can't vectorize reductions that feed another header PHI.
418; CHECK: noredux_header_phi
419; CHECK-NOT: fadd <4 x float>
420
421define float @noredux_header_phi(float* %A, float* %B, float* %C, float %S) {
422entry:
423 br label %for.body
424
425for.body:
426 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
427 %sum2.09 = phi float [ 0.000000e+00, %entry ], [ %add1, %for.body ]
428 %sum.08 = phi float [ %S, %entry ], [ %add, %for.body ]
David Blaikie79e6c742015-02-27 19:29:02 +0000429 %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000430 %0 = load float, float* %arrayidx, align 4
Arnold Schwaighofer36101392013-05-07 21:55:37 +0000431 %add = fadd fast float %sum.08, %0
432 %add1 = fadd fast float %sum2.09, %add
433 %indvars.iv.next = add i64 %indvars.iv, 1
434 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
435 %exitcond = icmp ne i32 %lftr.wideiv, 128
436 br i1 %exitcond, label %for.body, label %for.end
437
438for.end:
439 %add1.lcssa = phi float [ %add1, %for.body ]
440 %add.lcssa = phi float [ %add, %for.body ]
441 %add2 = fadd fast float %add.lcssa, %add1.lcssa
442 ret float %add2
443}
Arnold Schwaighofera92eeeb2013-07-13 19:09:29 +0000444
445
446; When vectorizing a reduction whose loop header phi value is used outside the
447; loop special care must be taken. Otherwise, the reduced value feeding into the
448; outside user misses a few iterations (VF-1) of the loop.
449; PR16522
450
Stephen Linc1c7a132013-07-14 01:42:54 +0000451; CHECK-LABEL: @phivalueredux(
Arnold Schwaighofera92eeeb2013-07-13 19:09:29 +0000452; CHECK-NOT: x i32>
453
454define i32 @phivalueredux(i32 %p) {
455entry:
456 br label %for.body
457
458for.body:
459 %t.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
460 %p.addr.02 = phi i32 [ %p, %entry ], [ %xor, %for.body ]
461 %xor = xor i32 %p.addr.02, -1
462 %inc = add nsw i32 %t.03, 1
463 %exitcond = icmp eq i32 %inc, 16
464 br i1 %exitcond, label %for.end, label %for.body
465
466for.end:
467 ret i32 %p.addr.02
468}
Arnold Schwaighofer0caddfc2013-10-07 21:05:43 +0000469
470; Don't vectorize a reduction value that is not the last in a reduction cyle. We
471; would loose iterations (VF-1) on the operations after that use.
472; PR17498
473
474; CHECK-LABEL: not_last_operation
475; CHECK-NOT: x i32>
476define i32 @not_last_operation(i32 %p, i32 %val) {
477entry:
478 %tobool = icmp eq i32 %p, 0
479 br label %for.body
480
481for.body:
482 %inc613.1 = phi i32 [ 0, %entry ], [ %inc6.1, %for.body ]
483 %inc511.1 = phi i32 [ %val, %entry ], [ %inc5.1, %for.body ]
484 %0 = zext i1 %tobool to i32
485 %inc4.1 = xor i32 %0, 1
486 %inc511.1.inc4.1 = add nsw i32 %inc511.1, %inc4.1
487 %inc5.1 = add nsw i32 %inc511.1.inc4.1, 1
488 %inc6.1 = add nsw i32 %inc613.1, 1
489 %exitcond.1 = icmp eq i32 %inc6.1, 22
490 br i1 %exitcond.1, label %exit, label %for.body
491
492exit:
493 %inc.2 = add nsw i32 %inc511.1.inc4.1, 2
494 ret i32 %inc.2
495}
Michael Kuperstein7cefb402017-01-18 19:02:52 +0000496
497;CHECK-LABEL: @reduction_sum_multiuse(
498;CHECK: phi <4 x i32>
499;CHECK: load <4 x i32>
500;CHECK: add <4 x i32>
501;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
502;CHECK: add <4 x i32>
503;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
504;CHECK: add <4 x i32>
505;CHECK: extractelement <4 x i32> %{{.*}}, i32 0
506;CHECK: %sum.lcssa = phi i32 [ %[[SCALAR:.*]], %.lr.ph ], [ %[[VECTOR:.*]], %middle.block ]
507;CHECK: %sum.copy = phi i32 [ %[[SCALAR]], %.lr.ph ], [ %[[VECTOR]], %middle.block ]
508;CHECK: ret i32
509define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) {
510 %1 = icmp sgt i32 %n, 0
511 br i1 %1, label %.lr.ph.preheader, label %end
512.lr.ph.preheader: ; preds = %0
513 br label %.lr.ph
514
515.lr.ph: ; preds = %0, %.lr.ph
516 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
517 %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %.lr.ph.preheader ]
518 %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
519 %3 = load i32, i32* %2, align 4
520 %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
521 %5 = load i32, i32* %4, align 4
522 %6 = trunc i64 %indvars.iv to i32
523 %7 = add i32 %sum.02, %6
524 %8 = add i32 %7, %3
525 %9 = add i32 %8, %5
526 %indvars.iv.next = add i64 %indvars.iv, 1
527 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
528 %exitcond = icmp eq i32 %lftr.wideiv, %n
529 br i1 %exitcond, label %._crit_edge, label %.lr.ph
530
531._crit_edge: ; preds = %.lr.ph, %0
532 %sum.lcssa = phi i32 [ %9, %.lr.ph ]
533 %sum.copy = phi i32 [ %9, %.lr.ph ]
534 br label %end
535
536end:
537 %f1 = phi i32 [ 0, %0 ], [ %sum.lcssa, %._crit_edge ]
538 %f2 = phi i32 [ 0, %0 ], [ %sum.copy, %._crit_edge ]
539 %final = add i32 %f1, %f2
540 ret i32 %final
541}