blob: bea9cc39ab4bd1bcafe61ff4e82e08ef22dfb7c5 [file] [log] [blame]
Sanjay Patelb653de12014-09-10 17:58:16 +00001; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S | FileCheck %s
2; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s -check-prefix=WIDTH
Alon Kom682cfc12017-09-14 07:40:02 +00003; RUN: opt -S -loop-vectorize -force-vector-width=4 < %s | FileCheck %s -check-prefix=RIGHTVF
4; RUN: opt -S -loop-vectorize -force-vector-width=8 < %s | FileCheck %s -check-prefix=WRONGVF
Arnold Schwaighoferb252c112013-06-24 12:09:15 +00005
6target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
7
8; Vectorization with dependence checks.
9
10; No plausible dependence - can be vectorized.
11; for (i = 0; i < 1024; ++i)
12; A[i] = A[i + 1] + 1;
13
Matt Arsenaulte64c7c72013-10-02 20:29:00 +000014; CHECK-LABEL: @f1_vec(
Arnold Schwaighoferb252c112013-06-24 12:09:15 +000015; CHECK: <2 x i32>
16
17define void @f1_vec(i32* %A) {
18entry:
19 br label %for.body
20
21for.body:
22 %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
23 %indvars.iv.next = add i32 %indvars.iv, 1
David Blaikie79e6c742015-02-27 19:29:02 +000024 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv.next
David Blaikiea79ac142015-02-27 21:17:42 +000025 %0 = load i32, i32* %arrayidx, align 4
Arnold Schwaighoferb252c112013-06-24 12:09:15 +000026 %add1 = add nsw i32 %0, 1
David Blaikie79e6c742015-02-27 19:29:02 +000027 %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
Arnold Schwaighoferb252c112013-06-24 12:09:15 +000028 store i32 %add1, i32* %arrayidx3, align 4
29 %exitcond = icmp ne i32 %indvars.iv.next, 1024
30 br i1 %exitcond, label %for.body, label %for.end
31
32for.end:
33 ret void
34}
35
36; Plausible dependence of distance 1 - can't be vectorized.
37; for (i = 0; i < 1024; ++i)
38; A[i+1] = A[i] + 1;
39
Matt Arsenaulte64c7c72013-10-02 20:29:00 +000040; CHECK-LABEL: @f2_novec(
Arnold Schwaighoferb252c112013-06-24 12:09:15 +000041; CHECK-NOT: <2 x i32>
42
43define void @f2_novec(i32* %A) {
44entry:
45 br label %for.body
46
47for.body:
48 %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
David Blaikie79e6c742015-02-27 19:29:02 +000049 %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +000050 %0 = load i32, i32* %arrayidx, align 4
Arnold Schwaighoferb252c112013-06-24 12:09:15 +000051 %add = add nsw i32 %0, 1
52 %indvars.iv.next = add i32 %indvars.iv, 1
David Blaikie79e6c742015-02-27 19:29:02 +000053 %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv.next
Arnold Schwaighoferb252c112013-06-24 12:09:15 +000054 store i32 %add, i32* %arrayidx3, align 4
55 %exitcond = icmp ne i32 %indvars.iv.next, 1024
56 br i1 %exitcond, label %for.body, label %for.end
57
58for.end:
59 ret void
60}
61
62; Plausible dependence of distance 2 - can be vectorized with a width of 2.
63; for (i = 0; i < 1024; ++i)
64; A[i+2] = A[i] + 1;
65
Matt Arsenaulte64c7c72013-10-02 20:29:00 +000066; CHECK-LABEL: @f3_vec_len(
Arnold Schwaighoferb252c112013-06-24 12:09:15 +000067; CHECK: <2 x i32>
68
69; WIDTH: f3_vec_len
70; WIDTH-NOT: <4 x i32>
71
72define void @f3_vec_len(i32* %A) {
73entry:
74 br label %for.body
75
76for.body:
77 %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
78 %idxprom = sext i32 %i.01 to i64
David Blaikie79e6c742015-02-27 19:29:02 +000079 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
David Blaikiea79ac142015-02-27 21:17:42 +000080 %0 = load i32, i32* %arrayidx, align 4
Arnold Schwaighoferb252c112013-06-24 12:09:15 +000081 %add = add nsw i32 %0, 1
82 %add1 = add nsw i32 %i.01, 2
83 %idxprom2 = sext i32 %add1 to i64
David Blaikie79e6c742015-02-27 19:29:02 +000084 %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %idxprom2
Arnold Schwaighoferb252c112013-06-24 12:09:15 +000085 store i32 %add, i32* %arrayidx3, align 4
86 %inc = add nsw i32 %i.01, 1
87 %cmp = icmp slt i32 %inc, 1024
88 br i1 %cmp, label %for.body, label %for.end
89
90for.end:
91 ret void
92}
93
94; Plausible dependence of distance 1 - cannot be vectorized (without reordering
95; accesses).
96; for (i = 0; i < 1024; ++i) {
97; B[i] = A[i];
98; A[i] = B[i + 1];
99; }
100
Matt Arsenaulte64c7c72013-10-02 20:29:00 +0000101; CHECK-LABEL: @f5(
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000102; CHECK-NOT: <2 x i32>
103
104define void @f5(i32* %A, i32* %B) {
105entry:
106 br label %for.body
107
108for.body:
109 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
David Blaikie79e6c742015-02-27 19:29:02 +0000110 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000111 %0 = load i32, i32* %arrayidx, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000112 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000113 store i32 %0, i32* %arrayidx2, align 4
114 %indvars.iv.next = add nsw i64 %indvars.iv, 1
David Blaikie79e6c742015-02-27 19:29:02 +0000115 %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv.next
David Blaikiea79ac142015-02-27 21:17:42 +0000116 %1 = load i32, i32* %arrayidx4, align 4
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000117 store i32 %1, i32* %arrayidx, align 4
118 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
119 %exitcond = icmp ne i32 %lftr.wideiv, 1024
120 br i1 %exitcond, label %for.body, label %for.end
121
122for.end:
123 ret void
124}
125
126; Dependence through a phi node - must not vectorize.
127; for (i = 0; i < 1024; ++i) {
128; a[i+1] = tmp;
129; tmp = a[i];
130; }
131
Matt Arsenaulte64c7c72013-10-02 20:29:00 +0000132; CHECK-LABEL: @f6
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000133; CHECK-NOT: <2 x i32>
134
135define i32 @f6(i32* %a, i32 %tmp) {
136entry:
137 br label %for.body
138
139for.body:
140 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
141 %tmp.addr.08 = phi i32 [ %tmp, %entry ], [ %0, %for.body ]
142 %indvars.iv.next = add nsw i64 %indvars.iv, 1
David Blaikie79e6c742015-02-27 19:29:02 +0000143 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000144 store i32 %tmp.addr.08, i32* %arrayidx, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000145 %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000146 %0 = load i32, i32* %arrayidx3, align 4
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000147 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
148 %exitcond = icmp ne i32 %lftr.wideiv, 1024
149 br i1 %exitcond, label %for.body, label %for.end
150
151for.end:
152 ret i32 undef
153}
154
155; Don't vectorize true loop carried dependencies that are not a multiple of the
156; vector width.
157; Example:
158; for (int i = ...; ++i) {
159; a[i] = a[i-3] + ...;
160; It is a bad idea to vectorize this loop because store-load forwarding will not
161; happen.
162;
163
Stephen Linc1c7a132013-07-14 01:42:54 +0000164; CHECK-LABEL: @nostoreloadforward(
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000165; CHECK-NOT: <2 x i32>
166
167define void @nostoreloadforward(i32* %A) {
168entry:
169 br label %for.body
170
171for.body:
172 %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
173 %0 = add nsw i64 %indvars.iv, -3
David Blaikie79e6c742015-02-27 19:29:02 +0000174 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
David Blaikiea79ac142015-02-27 21:17:42 +0000175 %1 = load i32, i32* %arrayidx, align 4
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000176 %2 = add nsw i64 %indvars.iv, 4
David Blaikie79e6c742015-02-27 19:29:02 +0000177 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %2
David Blaikiea79ac142015-02-27 21:17:42 +0000178 %3 = load i32, i32* %arrayidx2, align 4
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000179 %add3 = add nsw i32 %3, %1
David Blaikie79e6c742015-02-27 19:29:02 +0000180 %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000181 store i32 %add3, i32* %arrayidx5, align 4
182 %indvars.iv.next = add i64 %indvars.iv, 1
183 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
184 %exitcond = icmp ne i32 %lftr.wideiv, 128
185 br i1 %exitcond, label %for.body, label %for.end
186
187for.end:
188 ret void
189}
190
191; Example:
192; for (int i = ...; ++i) {
193; a[i] = b[i];
194; c[i] = a[i-3] + ...;
195; It is a bad idea to vectorize this loop because store-load forwarding will not
196; happen.
197;
198
Stephen Linc1c7a132013-07-14 01:42:54 +0000199; CHECK-LABEL: @nostoreloadforward2(
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000200; CHECK-NOT: <2 x i32>
201
202define void @nostoreloadforward2(i32* noalias %A, i32* noalias %B, i32* noalias %C) {
203entry:
204 br label %for.body
205
206for.body:
207 %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
David Blaikie79e6c742015-02-27 19:29:02 +0000208 %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
David Blaikiea79ac142015-02-27 21:17:42 +0000209 %0 = load i32, i32* %arrayidx, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000210 %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000211 store i32 %0, i32* %arrayidx2, align 4
212 %1 = add nsw i64 %indvars.iv, -3
David Blaikie79e6c742015-02-27 19:29:02 +0000213 %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %1
David Blaikiea79ac142015-02-27 21:17:42 +0000214 %2 = load i32, i32* %arrayidx4, align 4
David Blaikie79e6c742015-02-27 19:29:02 +0000215 %arrayidx6 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
Arnold Schwaighoferb252c112013-06-24 12:09:15 +0000216 store i32 %2, i32* %arrayidx6, align 4
217 %indvars.iv.next = add i64 %indvars.iv, 1
218 %lftr.wideiv = trunc i64 %indvars.iv.next to i32
219 %exitcond = icmp ne i32 %lftr.wideiv, 128
220 br i1 %exitcond, label %for.body, label %for.end
221
222for.end:
223 ret void
224}
Alon Kom682cfc12017-09-14 07:40:02 +0000225
226
227;Check the new calculation of the maximum safe distance in bits which can be vectorized.
228;The previous behavior did not take account that the stride was 2.
229;Therefore the maxVF was computed as 8 instead of 4, as the dependence distance here is 6 iterations, given by |N-(N-12)|/2.
230
231;#define M 32
232;#define N 2 * M
233;unsigned int a [N];
234;void pr34283(){
235; unsigned int j=0;
236; for (j = 0; j < M - 6; ++j)
237; {
238; a[N - 2 * j] = 69;
239; a[N - 12 - 2 * j] = 7;
240; }
241;
242;}
243
244; RIGHTVF-LABEL: @pr34283
245; RIGHTVF: <4 x i64>
246
247; WRONGVF-LABLE: @pr34283
248; WRONGVF-NOT: <8 x i64>
249
250@a = common local_unnamed_addr global [64 x i32] zeroinitializer, align 16
251
252; Function Attrs: norecurse nounwind uwtable
253define void @pr34283() local_unnamed_addr {
254entry:
255 br label %for.body
256
257for.body:
258 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
259 %0 = shl i64 %indvars.iv, 1
260 %1 = sub nuw nsw i64 64, %0
261 %arrayidx = getelementptr inbounds [64 x i32], [64 x i32]* @a, i64 0, i64 %1
262 store i32 69, i32* %arrayidx, align 8
263 %2 = sub nuw nsw i64 52, %0
264 %arrayidx4 = getelementptr inbounds [64 x i32], [64 x i32]* @a, i64 0, i64 %2
265 store i32 7, i32* %arrayidx4, align 8
266 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
267 %exitcond = icmp eq i64 %indvars.iv.next, 26
268 br i1 %exitcond, label %for.end, label %for.body
269
270for.end:
271 ret void
272}
273