blob: 038a0e3c4d22f9c8e6f81e0b7ab070f94e2a7c21 [file] [log] [blame]
Silviu Barangaea63a7f2016-02-08 17:02:45 +00001; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s -check-prefix=LAA
2; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV
3
4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
5
6; For this loop:
7; unsigned index = 0;
8; for (int i = 0; i < n; i++) {
9; A[2 * index] = A[2 * index] + B[i];
10; index++;
11; }
12;
13; SCEV is unable to prove that A[2 * i] does not overflow.
14;
15; Analyzing the IR does not help us because the GEPs are not
16; affine AddRecExprs. However, we can turn them into AddRecExprs
17; using SCEV Predicates.
18;
19; Once we have an affine expression we need to add an additional NUSW
20; to check that the pointers don't wrap since the GEPs are not
21; inbound.
22
23; LAA-LABEL: f1
24; LAA: Memory dependences are safe{{$}}
25; LAA: SCEV assumptions:
26; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nusw>
27; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
28
29; The expression for %mul_ext as analyzed by SCEV is
30; (zext i32 {0,+,2}<%for.body> to i64)
31; We have added the nusw flag to turn this expression into the SCEV expression:
32; i64 {0,+,2}<%for.body>
33
34; LV-LABEL: f1
35; LV-LABEL: for.body.lver.check
36; LV: [[PredCheck0:%[^ ]*]] = icmp ne i128
37; LV: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
38; LV: [[PredCheck1:%[^ ]*]] = icmp ne i128
39; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
40; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
41define void @f1(i16* noalias %a,
42 i16* noalias %b, i64 %N) {
43entry:
44 br label %for.body
45
46for.body: ; preds = %for.body, %entry
47 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
48 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
49
50 %mul = mul i32 %ind1, 2
51 %mul_ext = zext i32 %mul to i64
52
53 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
54 %loadA = load i16, i16* %arrayidxA, align 2
55
56 %arrayidxB = getelementptr i16, i16* %b, i64 %ind
57 %loadB = load i16, i16* %arrayidxB, align 2
58
59 %add = mul i16 %loadA, %loadB
60
61 store i16 %add, i16* %arrayidxA, align 2
62
63 %inc = add nuw nsw i64 %ind, 1
64 %inc1 = add i32 %ind1, 1
65
66 %exitcond = icmp eq i64 %inc, %N
67 br i1 %exitcond, label %for.end, label %for.body
68
69for.end: ; preds = %for.body
70 ret void
71}
72
73; For this loop:
74; unsigned index = n;
75; for (int i = 0; i < n; i++) {
76; A[2 * index] = A[2 * index] + B[i];
77; index--;
78; }
79;
80; the SCEV expression for 2 * index is not an AddRecExpr
81; (and implictly not affine). However, we are able to make assumptions
82; that will turn the expression into an affine one and continue the
83; analysis.
84;
85; Once we have an affine expression we need to add an additional NUSW
86; to check that the pointers don't wrap since the GEPs are not
87; inbounds.
88;
89; This loop has a negative stride for A, and the nusw flag is required in
90; order to properly extend the increment from i32 -4 to i64 -4.
91
92; LAA-LABEL: f2
93; LAA: Memory dependences are safe{{$}}
94; LAA: SCEV assumptions:
95; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nusw>
96; LAA-NEXT: {((2 * (zext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
97
98; The expression for %mul_ext as analyzed by SCEV is
99; (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
100; We have added the nusw flag to turn this expression into the following SCEV:
101; i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
102
103; LV-LABEL: f2
104; LV-LABEL: for.body.lver.check
105; LV: [[PredCheck0:%[^ ]*]] = icmp ne i128
106; LV: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
107; LV: [[PredCheck1:%[^ ]*]] = icmp ne i128
108; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
109; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
110define void @f2(i16* noalias %a,
111 i16* noalias %b, i64 %N) {
112entry:
113 %TruncN = trunc i64 %N to i32
114 br label %for.body
115
116for.body: ; preds = %for.body, %entry
117 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
118 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
119
120 %mul = mul i32 %ind1, 2
121 %mul_ext = zext i32 %mul to i64
122
123 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
124 %loadA = load i16, i16* %arrayidxA, align 2
125
126 %arrayidxB = getelementptr i16, i16* %b, i64 %ind
127 %loadB = load i16, i16* %arrayidxB, align 2
128
129 %add = mul i16 %loadA, %loadB
130
131 store i16 %add, i16* %arrayidxA, align 2
132
133 %inc = add nuw nsw i64 %ind, 1
134 %dec = sub i32 %ind1, 1
135
136 %exitcond = icmp eq i64 %inc, %N
137 br i1 %exitcond, label %for.end, label %for.body
138
139for.end: ; preds = %for.body
140 ret void
141}
142
143; We replicate the tests above, but this time sign extend 2 * index instead
144; of zero extending it.
145
146; LAA-LABEL: f3
147; LAA: Memory dependences are safe{{$}}
148; LAA: SCEV assumptions:
149; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nssw>
150; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
151
152; The expression for %mul_ext as analyzed by SCEV is
153; i64 (sext i32 {0,+,2}<%for.body> to i64)
154; We have added the nssw flag to turn this expression into the following SCEV:
155; i64 {0,+,2}<%for.body>
156
157; LV-LABEL: f3
158; LV-LABEL: for.body.lver.check
159; LV: [[PredCheck0:%[^ ]*]] = icmp ne i128
160; LV: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
161; LV: [[PredCheck1:%[^ ]*]] = icmp ne i128
162; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
163; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
164define void @f3(i16* noalias %a,
165 i16* noalias %b, i64 %N) {
166entry:
167 br label %for.body
168
169for.body: ; preds = %for.body, %entry
170 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
171 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
172
173 %mul = mul i32 %ind1, 2
174 %mul_ext = sext i32 %mul to i64
175
176 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
177 %loadA = load i16, i16* %arrayidxA, align 2
178
179 %arrayidxB = getelementptr i16, i16* %b, i64 %ind
180 %loadB = load i16, i16* %arrayidxB, align 2
181
182 %add = mul i16 %loadA, %loadB
183
184 store i16 %add, i16* %arrayidxA, align 2
185
186 %inc = add nuw nsw i64 %ind, 1
187 %inc1 = add i32 %ind1, 1
188
189 %exitcond = icmp eq i64 %inc, %N
190 br i1 %exitcond, label %for.end, label %for.body
191
192for.end: ; preds = %for.body
193 ret void
194}
195
196; LAA-LABEL: f4
197; LAA: Memory dependences are safe{{$}}
198; LAA: SCEV assumptions:
199; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
200; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
201
202; The expression for %mul_ext as analyzed by SCEV is
203; i64 (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
204; We have added the nssw flag to turn this expression into the following SCEV:
205; i64 {sext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
206
207; LV-LABEL: f4
208; LV-LABEL: for.body.lver.check
209; LV: [[PredCheck0:%[^ ]*]] = icmp ne i128
210; LV: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
211; LV: [[PredCheck1:%[^ ]*]] = icmp ne i128
212; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
213; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
214define void @f4(i16* noalias %a,
215 i16* noalias %b, i64 %N) {
216entry:
217 %TruncN = trunc i64 %N to i32
218 br label %for.body
219
220for.body: ; preds = %for.body, %entry
221 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
222 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
223
224 %mul = mul i32 %ind1, 2
225 %mul_ext = sext i32 %mul to i64
226
227 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
228 %loadA = load i16, i16* %arrayidxA, align 2
229
230 %arrayidxB = getelementptr i16, i16* %b, i64 %ind
231 %loadB = load i16, i16* %arrayidxB, align 2
232
233 %add = mul i16 %loadA, %loadB
234
235 store i16 %add, i16* %arrayidxA, align 2
236
237 %inc = add nuw nsw i64 %ind, 1
238 %dec = sub i32 %ind1, 1
239
240 %exitcond = icmp eq i64 %inc, %N
241 br i1 %exitcond, label %for.end, label %for.body
242
243for.end: ; preds = %for.body
244 ret void
245}
246
247; The following function is similar to the one above, but has the GEP
248; to pointer %A inbounds. The index %mul doesn't have the nsw flag.
249; This means that the SCEV expression for %mul can wrap and we need
250; a SCEV predicate to continue analysis.
251;
252; We can still analyze this by adding the required no wrap SCEV predicates.
253
254; LAA-LABEL: f5
255; LAA: Memory dependences are safe{{$}}
256; LAA: SCEV assumptions:
257; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
258; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
259
260; LV-LABEL: f5
261; LV-LABEL: for.body.lver.check
262define void @f5(i16* noalias %a,
263 i16* noalias %b, i64 %N) {
264entry:
265 %TruncN = trunc i64 %N to i32
266 br label %for.body
267
268for.body: ; preds = %for.body, %entry
269 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
270 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
271
272 %mul = mul i32 %ind1, 2
273
274 %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul
275 %loadA = load i16, i16* %arrayidxA, align 2
276
277 %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
278 %loadB = load i16, i16* %arrayidxB, align 2
279
280 %add = mul i16 %loadA, %loadB
281
282 store i16 %add, i16* %arrayidxA, align 2
283
284 %inc = add nuw nsw i64 %ind, 1
285 %dec = sub i32 %ind1, 1
286
287 %exitcond = icmp eq i64 %inc, %N
288 br i1 %exitcond, label %for.end, label %for.body
289
290for.end: ; preds = %for.body
291 ret void
292}