blob: ee9078c6607746e072cbac83bcaa48f1c3088131 [file] [log] [blame]
Silviu Barangaea63a7f2016-02-08 17:02:45 +00001; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s -check-prefix=LAA
2; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV
3
4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
5
6; For this loop:
7; unsigned index = 0;
8; for (int i = 0; i < n; i++) {
9; A[2 * index] = A[2 * index] + B[i];
10; index++;
11; }
12;
13; SCEV is unable to prove that A[2 * i] does not overflow.
14;
15; Analyzing the IR does not help us because the GEPs are not
16; affine AddRecExprs. However, we can turn them into AddRecExprs
17; using SCEV Predicates.
18;
19; Once we have an affine expression we need to add an additional NUSW
20; to check that the pointers don't wrap since the GEPs are not
21; inbound.
22
23; LAA-LABEL: f1
24; LAA: Memory dependences are safe{{$}}
25; LAA: SCEV assumptions:
26; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nusw>
27; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
28
29; The expression for %mul_ext as analyzed by SCEV is
30; (zext i32 {0,+,2}<%for.body> to i64)
31; We have added the nusw flag to turn this expression into the SCEV expression:
32; i64 {0,+,2}<%for.body>
33
Silviu Barangab77365b2016-04-14 16:08:45 +000034; LAA: [PSE] %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
35; LAA-NEXT: ((2 * (zext i32 {0,+,2}<%for.body> to i64)) + %a)
36; LAA-NEXT: --> {%a,+,4}<%for.body>
37
38
Silviu Barangaea63a7f2016-02-08 17:02:45 +000039; LV-LABEL: f1
40; LV-LABEL: for.body.lver.check
41; LV: [[PredCheck0:%[^ ]*]] = icmp ne i128
42; LV: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
43; LV: [[PredCheck1:%[^ ]*]] = icmp ne i128
44; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
45; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
46define void @f1(i16* noalias %a,
47 i16* noalias %b, i64 %N) {
48entry:
49 br label %for.body
50
51for.body: ; preds = %for.body, %entry
52 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
53 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
54
55 %mul = mul i32 %ind1, 2
56 %mul_ext = zext i32 %mul to i64
57
58 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
59 %loadA = load i16, i16* %arrayidxA, align 2
60
61 %arrayidxB = getelementptr i16, i16* %b, i64 %ind
62 %loadB = load i16, i16* %arrayidxB, align 2
63
64 %add = mul i16 %loadA, %loadB
65
66 store i16 %add, i16* %arrayidxA, align 2
67
68 %inc = add nuw nsw i64 %ind, 1
69 %inc1 = add i32 %ind1, 1
70
71 %exitcond = icmp eq i64 %inc, %N
72 br i1 %exitcond, label %for.end, label %for.body
73
74for.end: ; preds = %for.body
75 ret void
76}
77
78; For this loop:
79; unsigned index = n;
80; for (int i = 0; i < n; i++) {
81; A[2 * index] = A[2 * index] + B[i];
82; index--;
83; }
84;
85; the SCEV expression for 2 * index is not an AddRecExpr
86; (and implictly not affine). However, we are able to make assumptions
87; that will turn the expression into an affine one and continue the
88; analysis.
89;
90; Once we have an affine expression we need to add an additional NUSW
91; to check that the pointers don't wrap since the GEPs are not
92; inbounds.
93;
94; This loop has a negative stride for A, and the nusw flag is required in
95; order to properly extend the increment from i32 -4 to i64 -4.
96
97; LAA-LABEL: f2
98; LAA: Memory dependences are safe{{$}}
99; LAA: SCEV assumptions:
100; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nusw>
101; LAA-NEXT: {((2 * (zext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
102
103; The expression for %mul_ext as analyzed by SCEV is
104; (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
105; We have added the nusw flag to turn this expression into the following SCEV:
106; i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
107
Silviu Barangab77365b2016-04-14 16:08:45 +0000108; LAA: [PSE] %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
109; LAA-NEXT: ((2 * (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)) + %a)
110; LAA-NEXT: --> {((2 * (zext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body>
111
Silviu Barangaea63a7f2016-02-08 17:02:45 +0000112; LV-LABEL: f2
113; LV-LABEL: for.body.lver.check
114; LV: [[PredCheck0:%[^ ]*]] = icmp ne i128
115; LV: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
116; LV: [[PredCheck1:%[^ ]*]] = icmp ne i128
117; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
118; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
119define void @f2(i16* noalias %a,
120 i16* noalias %b, i64 %N) {
121entry:
122 %TruncN = trunc i64 %N to i32
123 br label %for.body
124
125for.body: ; preds = %for.body, %entry
126 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
127 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
128
129 %mul = mul i32 %ind1, 2
130 %mul_ext = zext i32 %mul to i64
131
132 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
133 %loadA = load i16, i16* %arrayidxA, align 2
134
135 %arrayidxB = getelementptr i16, i16* %b, i64 %ind
136 %loadB = load i16, i16* %arrayidxB, align 2
137
138 %add = mul i16 %loadA, %loadB
139
140 store i16 %add, i16* %arrayidxA, align 2
141
142 %inc = add nuw nsw i64 %ind, 1
143 %dec = sub i32 %ind1, 1
144
145 %exitcond = icmp eq i64 %inc, %N
146 br i1 %exitcond, label %for.end, label %for.body
147
148for.end: ; preds = %for.body
149 ret void
150}
151
152; We replicate the tests above, but this time sign extend 2 * index instead
153; of zero extending it.
154
155; LAA-LABEL: f3
156; LAA: Memory dependences are safe{{$}}
157; LAA: SCEV assumptions:
158; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nssw>
159; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
160
161; The expression for %mul_ext as analyzed by SCEV is
162; i64 (sext i32 {0,+,2}<%for.body> to i64)
163; We have added the nssw flag to turn this expression into the following SCEV:
164; i64 {0,+,2}<%for.body>
165
Silviu Barangab77365b2016-04-14 16:08:45 +0000166; LAA: [PSE] %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
167; LAA-NEXT: ((2 * (sext i32 {0,+,2}<%for.body> to i64)) + %a)
168; LAA-NEXT: --> {%a,+,4}<%for.body>
169
Silviu Barangaea63a7f2016-02-08 17:02:45 +0000170; LV-LABEL: f3
171; LV-LABEL: for.body.lver.check
172; LV: [[PredCheck0:%[^ ]*]] = icmp ne i128
173; LV: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
174; LV: [[PredCheck1:%[^ ]*]] = icmp ne i128
175; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
176; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
177define void @f3(i16* noalias %a,
178 i16* noalias %b, i64 %N) {
179entry:
180 br label %for.body
181
182for.body: ; preds = %for.body, %entry
183 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
184 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
185
186 %mul = mul i32 %ind1, 2
187 %mul_ext = sext i32 %mul to i64
188
189 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
190 %loadA = load i16, i16* %arrayidxA, align 2
191
192 %arrayidxB = getelementptr i16, i16* %b, i64 %ind
193 %loadB = load i16, i16* %arrayidxB, align 2
194
195 %add = mul i16 %loadA, %loadB
196
197 store i16 %add, i16* %arrayidxA, align 2
198
199 %inc = add nuw nsw i64 %ind, 1
200 %inc1 = add i32 %ind1, 1
201
202 %exitcond = icmp eq i64 %inc, %N
203 br i1 %exitcond, label %for.end, label %for.body
204
205for.end: ; preds = %for.body
206 ret void
207}
208
209; LAA-LABEL: f4
210; LAA: Memory dependences are safe{{$}}
211; LAA: SCEV assumptions:
212; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
213; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
214
215; The expression for %mul_ext as analyzed by SCEV is
216; i64 (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
217; We have added the nssw flag to turn this expression into the following SCEV:
218; i64 {sext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
219
Silviu Barangab77365b2016-04-14 16:08:45 +0000220; LAA: [PSE] %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
221; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)) + %a)
222; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body>
223
Silviu Barangaea63a7f2016-02-08 17:02:45 +0000224; LV-LABEL: f4
225; LV-LABEL: for.body.lver.check
226; LV: [[PredCheck0:%[^ ]*]] = icmp ne i128
227; LV: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
228; LV: [[PredCheck1:%[^ ]*]] = icmp ne i128
229; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
230; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
231define void @f4(i16* noalias %a,
232 i16* noalias %b, i64 %N) {
233entry:
234 %TruncN = trunc i64 %N to i32
235 br label %for.body
236
237for.body: ; preds = %for.body, %entry
238 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
239 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
240
241 %mul = mul i32 %ind1, 2
242 %mul_ext = sext i32 %mul to i64
243
244 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
245 %loadA = load i16, i16* %arrayidxA, align 2
246
247 %arrayidxB = getelementptr i16, i16* %b, i64 %ind
248 %loadB = load i16, i16* %arrayidxB, align 2
249
250 %add = mul i16 %loadA, %loadB
251
252 store i16 %add, i16* %arrayidxA, align 2
253
254 %inc = add nuw nsw i64 %ind, 1
255 %dec = sub i32 %ind1, 1
256
257 %exitcond = icmp eq i64 %inc, %N
258 br i1 %exitcond, label %for.end, label %for.body
259
260for.end: ; preds = %for.body
261 ret void
262}
263
264; The following function is similar to the one above, but has the GEP
265; to pointer %A inbounds. The index %mul doesn't have the nsw flag.
266; This means that the SCEV expression for %mul can wrap and we need
267; a SCEV predicate to continue analysis.
268;
269; We can still analyze this by adding the required no wrap SCEV predicates.
270
271; LAA-LABEL: f5
272; LAA: Memory dependences are safe{{$}}
273; LAA: SCEV assumptions:
274; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
275; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
276
Silviu Barangab77365b2016-04-14 16:08:45 +0000277; LAA: [PSE] %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul:
278; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)<nsw>
279; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body>
280
Silviu Barangaea63a7f2016-02-08 17:02:45 +0000281; LV-LABEL: f5
282; LV-LABEL: for.body.lver.check
283define void @f5(i16* noalias %a,
284 i16* noalias %b, i64 %N) {
285entry:
286 %TruncN = trunc i64 %N to i32
287 br label %for.body
288
289for.body: ; preds = %for.body, %entry
290 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
291 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
292
293 %mul = mul i32 %ind1, 2
294
295 %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul
296 %loadA = load i16, i16* %arrayidxA, align 2
297
298 %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
299 %loadB = load i16, i16* %arrayidxB, align 2
300
301 %add = mul i16 %loadA, %loadB
302
303 store i16 %add, i16* %arrayidxA, align 2
304
305 %inc = add nuw nsw i64 %ind, 1
306 %dec = sub i32 %ind1, 1
307
308 %exitcond = icmp eq i64 %inc, %N
309 br i1 %exitcond, label %for.end, label %for.body
310
311for.end: ; preds = %for.body
312 ret void
313}