blob: 5f305cebc2778798216a199f237aeb1614422220 [file] [log] [blame]
Silviu Barangaea63a7f2016-02-08 17:02:45 +00001; RUN: opt -basicaa -loop-accesses -analyze < %s | FileCheck %s -check-prefix=LAA
2; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV
3
4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
5
6; For this loop:
7; unsigned index = 0;
8; for (int i = 0; i < n; i++) {
9; A[2 * index] = A[2 * index] + B[i];
10; index++;
11; }
12;
13; SCEV is unable to prove that A[2 * i] does not overflow.
14;
15; Analyzing the IR does not help us because the GEPs are not
16; affine AddRecExprs. However, we can turn them into AddRecExprs
17; using SCEV Predicates.
18;
19; Once we have an affine expression we need to add an additional NUSW
20; to check that the pointers don't wrap since the GEPs are not
21; inbound.
22
23; LAA-LABEL: f1
24; LAA: Memory dependences are safe{{$}}
25; LAA: SCEV assumptions:
26; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nusw>
27; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
28
29; The expression for %mul_ext as analyzed by SCEV is
30; (zext i32 {0,+,2}<%for.body> to i64)
31; We have added the nusw flag to turn this expression into the SCEV expression:
32; i64 {0,+,2}<%for.body>
33
Silviu Barangab77365b2016-04-14 16:08:45 +000034; LAA: [PSE] %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
35; LAA-NEXT: ((2 * (zext i32 {0,+,2}<%for.body> to i64)) + %a)
36; LAA-NEXT: --> {%a,+,4}<%for.body>
37
38
Silviu Barangaea63a7f2016-02-08 17:02:45 +000039; LV-LABEL: f1
40; LV-LABEL: for.body.lver.check
Silviu Baranga795c6292016-04-25 09:27:16 +000041
42; LV: [[BETrunc:%[^ ]*]] = trunc i64 [[BE:%[^ ]*]] to i32
43; LV-NEXT: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc]])
44; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
45; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
46; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 0, [[OFMulResult]]
47; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 0, [[OFMulResult]]
48; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp ugt i32 [[SubEnd]], 0
49; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp ult i32 [[AddEnd]], 0
50; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg]], i1 [[CmpPos]]
51; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
52; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
53; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
54
55; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
56
57; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
58; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
59; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
60; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
61; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
62; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
63; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
64; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
65; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
66
Silviu Barangaea63a7f2016-02-08 17:02:45 +000067; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
68; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
69define void @f1(i16* noalias %a,
70 i16* noalias %b, i64 %N) {
71entry:
72 br label %for.body
73
74for.body: ; preds = %for.body, %entry
75 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
76 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
77
78 %mul = mul i32 %ind1, 2
79 %mul_ext = zext i32 %mul to i64
80
81 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
82 %loadA = load i16, i16* %arrayidxA, align 2
83
84 %arrayidxB = getelementptr i16, i16* %b, i64 %ind
85 %loadB = load i16, i16* %arrayidxB, align 2
86
87 %add = mul i16 %loadA, %loadB
88
89 store i16 %add, i16* %arrayidxA, align 2
90
91 %inc = add nuw nsw i64 %ind, 1
92 %inc1 = add i32 %ind1, 1
93
94 %exitcond = icmp eq i64 %inc, %N
95 br i1 %exitcond, label %for.end, label %for.body
96
97for.end: ; preds = %for.body
98 ret void
99}
100
101; For this loop:
102; unsigned index = n;
103; for (int i = 0; i < n; i++) {
104; A[2 * index] = A[2 * index] + B[i];
105; index--;
106; }
107;
108; the SCEV expression for 2 * index is not an AddRecExpr
109; (and implictly not affine). However, we are able to make assumptions
110; that will turn the expression into an affine one and continue the
111; analysis.
112;
113; Once we have an affine expression we need to add an additional NUSW
114; to check that the pointers don't wrap since the GEPs are not
115; inbounds.
116;
117; This loop has a negative stride for A, and the nusw flag is required in
118; order to properly extend the increment from i32 -4 to i64 -4.
119
120; LAA-LABEL: f2
121; LAA: Memory dependences are safe{{$}}
122; LAA: SCEV assumptions:
123; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nusw>
124; LAA-NEXT: {((2 * (zext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
125
126; The expression for %mul_ext as analyzed by SCEV is
127; (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
128; We have added the nusw flag to turn this expression into the following SCEV:
129; i64 {zext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
130
Silviu Barangab77365b2016-04-14 16:08:45 +0000131; LAA: [PSE] %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
132; LAA-NEXT: ((2 * (zext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)) + %a)
133; LAA-NEXT: --> {((2 * (zext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body>
134
Silviu Barangaea63a7f2016-02-08 17:02:45 +0000135; LV-LABEL: f2
136; LV-LABEL: for.body.lver.check
Silviu Baranga795c6292016-04-25 09:27:16 +0000137
138; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
139; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
140; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
141; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 [[Start:%[^ ]*]], [[OFMulResult]]
142; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 [[Start]], [[OFMulResult]]
143; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp ugt i32 [[SubEnd]], [[Start]]
144; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp ult i32 [[AddEnd]], [[Start]]
145; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg]], i1 [[CmpPos]]
146; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
147; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
148; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
149
150; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
151
152; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
153; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
154; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
155; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[Start:%[^ ]*]], [[OFMulResult1]]
156; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[Start]], [[OFMulResult1]]
157; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[Start]]
158; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[Start]]
159; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
160; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
161
Silviu Barangaea63a7f2016-02-08 17:02:45 +0000162; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
163; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
164define void @f2(i16* noalias %a,
165 i16* noalias %b, i64 %N) {
166entry:
167 %TruncN = trunc i64 %N to i32
168 br label %for.body
169
170for.body: ; preds = %for.body, %entry
171 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
172 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
173
174 %mul = mul i32 %ind1, 2
175 %mul_ext = zext i32 %mul to i64
176
177 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
178 %loadA = load i16, i16* %arrayidxA, align 2
179
180 %arrayidxB = getelementptr i16, i16* %b, i64 %ind
181 %loadB = load i16, i16* %arrayidxB, align 2
182
183 %add = mul i16 %loadA, %loadB
184
185 store i16 %add, i16* %arrayidxA, align 2
186
187 %inc = add nuw nsw i64 %ind, 1
188 %dec = sub i32 %ind1, 1
189
190 %exitcond = icmp eq i64 %inc, %N
191 br i1 %exitcond, label %for.end, label %for.body
192
193for.end: ; preds = %for.body
194 ret void
195}
196
197; We replicate the tests above, but this time sign extend 2 * index instead
198; of zero extending it.
199
200; LAA-LABEL: f3
201; LAA: Memory dependences are safe{{$}}
202; LAA: SCEV assumptions:
203; LAA-NEXT: {0,+,2}<%for.body> Added Flags: <nssw>
204; LAA-NEXT: {%a,+,4}<%for.body> Added Flags: <nusw>
205
206; The expression for %mul_ext as analyzed by SCEV is
207; i64 (sext i32 {0,+,2}<%for.body> to i64)
208; We have added the nssw flag to turn this expression into the following SCEV:
209; i64 {0,+,2}<%for.body>
210
Silviu Barangab77365b2016-04-14 16:08:45 +0000211; LAA: [PSE] %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
212; LAA-NEXT: ((2 * (sext i32 {0,+,2}<%for.body> to i64)) + %a)
213; LAA-NEXT: --> {%a,+,4}<%for.body>
214
Silviu Barangaea63a7f2016-02-08 17:02:45 +0000215; LV-LABEL: f3
216; LV-LABEL: for.body.lver.check
Silviu Baranga795c6292016-04-25 09:27:16 +0000217
218; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
219; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
220; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
221; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 0, [[OFMulResult]]
222; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 0, [[OFMulResult]]
223; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp sgt i32 [[SubEnd]], 0
224; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp slt i32 [[AddEnd]], 0
225; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg]], i1 [[CmpPos]]
226; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
227; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
228; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
229
230; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
231
232; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
233; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
234; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
235; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
236; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
237; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
238; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
239; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
240; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
241
Silviu Barangaea63a7f2016-02-08 17:02:45 +0000242; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
243; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
244define void @f3(i16* noalias %a,
245 i16* noalias %b, i64 %N) {
246entry:
247 br label %for.body
248
249for.body: ; preds = %for.body, %entry
250 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
251 %ind1 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
252
253 %mul = mul i32 %ind1, 2
254 %mul_ext = sext i32 %mul to i64
255
256 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
257 %loadA = load i16, i16* %arrayidxA, align 2
258
259 %arrayidxB = getelementptr i16, i16* %b, i64 %ind
260 %loadB = load i16, i16* %arrayidxB, align 2
261
262 %add = mul i16 %loadA, %loadB
263
264 store i16 %add, i16* %arrayidxA, align 2
265
266 %inc = add nuw nsw i64 %ind, 1
267 %inc1 = add i32 %ind1, 1
268
269 %exitcond = icmp eq i64 %inc, %N
270 br i1 %exitcond, label %for.end, label %for.body
271
272for.end: ; preds = %for.body
273 ret void
274}
275
276; LAA-LABEL: f4
277; LAA: Memory dependences are safe{{$}}
278; LAA: SCEV assumptions:
279; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
280; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
281
282; The expression for %mul_ext as analyzed by SCEV is
283; i64 (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)
284; We have added the nssw flag to turn this expression into the following SCEV:
285; i64 {sext i32 (2 * (trunc i64 %N to i32)) to i64,+,-2}<%for.body>
286
Silviu Barangab77365b2016-04-14 16:08:45 +0000287; LAA: [PSE] %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext:
288; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64)) + %a)
289; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body>
290
Silviu Barangaea63a7f2016-02-08 17:02:45 +0000291; LV-LABEL: f4
292; LV-LABEL: for.body.lver.check
Silviu Baranga795c6292016-04-25 09:27:16 +0000293
294; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
295; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
296; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
297; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 [[Start:%[^ ]*]], [[OFMulResult]]
298; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 [[Start]], [[OFMulResult]]
299; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp sgt i32 [[SubEnd]], [[Start]]
300; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp slt i32 [[AddEnd]], [[Start]]
301; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg]], i1 [[CmpPos]]
302; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
303; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
304; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
305
306; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
307
308; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
309; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
310; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
311; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[Start:%[^ ]*]], [[OFMulResult1]]
312; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[Start]], [[OFMulResult1]]
313; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[Start]]
314; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[Start]]
315; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
316; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
317
Silviu Barangaea63a7f2016-02-08 17:02:45 +0000318; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
319; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
320define void @f4(i16* noalias %a,
321 i16* noalias %b, i64 %N) {
322entry:
323 %TruncN = trunc i64 %N to i32
324 br label %for.body
325
326for.body: ; preds = %for.body, %entry
327 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
328 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
329
330 %mul = mul i32 %ind1, 2
331 %mul_ext = sext i32 %mul to i64
332
333 %arrayidxA = getelementptr i16, i16* %a, i64 %mul_ext
334 %loadA = load i16, i16* %arrayidxA, align 2
335
336 %arrayidxB = getelementptr i16, i16* %b, i64 %ind
337 %loadB = load i16, i16* %arrayidxB, align 2
338
339 %add = mul i16 %loadA, %loadB
340
341 store i16 %add, i16* %arrayidxA, align 2
342
343 %inc = add nuw nsw i64 %ind, 1
344 %dec = sub i32 %ind1, 1
345
346 %exitcond = icmp eq i64 %inc, %N
347 br i1 %exitcond, label %for.end, label %for.body
348
349for.end: ; preds = %for.body
350 ret void
351}
352
353; The following function is similar to the one above, but has the GEP
354; to pointer %A inbounds. The index %mul doesn't have the nsw flag.
355; This means that the SCEV expression for %mul can wrap and we need
356; a SCEV predicate to continue analysis.
357;
358; We can still analyze this by adding the required no wrap SCEV predicates.
359
360; LAA-LABEL: f5
361; LAA: Memory dependences are safe{{$}}
362; LAA: SCEV assumptions:
363; LAA-NEXT: {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> Added Flags: <nssw>
364; LAA-NEXT: {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body> Added Flags: <nusw>
365
Silviu Barangab77365b2016-04-14 16:08:45 +0000366; LAA: [PSE] %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul:
367; LAA-NEXT: ((2 * (sext i32 {(2 * (trunc i64 %N to i32)),+,-2}<%for.body> to i64))<nsw> + %a)<nsw>
368; LAA-NEXT: --> {((2 * (sext i32 (2 * (trunc i64 %N to i32)) to i64)) + %a),+,-4}<%for.body>
369
Silviu Barangaea63a7f2016-02-08 17:02:45 +0000370; LV-LABEL: f5
371; LV-LABEL: for.body.lver.check
Silviu Baranga795c6292016-04-25 09:27:16 +0000372; LV: [[OFMul:%[^ ]*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[BETrunc:%[^ ]*]])
373; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 0
374; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i32, i1 } [[OFMul]], 1
375; LV-NEXT: [[AddEnd:%[^ ]*]] = add i32 [[Start:%[^ ]*]], [[OFMulResult]]
376; LV-NEXT: [[SubEnd:%[^ ]*]] = sub i32 [[Start]], [[OFMulResult]]
377; LV-NEXT: [[CmpNeg:%[^ ]*]] = icmp sgt i32 [[SubEnd]], [[Start]]
378; LV-NEXT: [[CmpPos:%[^ ]*]] = icmp slt i32 [[AddEnd]], [[Start]]
379; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg]], i1 [[CmpPos]]
380; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295
381; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]]
382; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]]
383
384; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]]
385
386; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
387; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
388; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
389; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[Start:%[^ ]*]], [[OFMulResult1]]
390; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[Start]], [[OFMulResult1]]
391; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[Start]]
392; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[Start]]
393; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]]
394; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
395
396; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]]
397; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph
Silviu Barangaea63a7f2016-02-08 17:02:45 +0000398define void @f5(i16* noalias %a,
399 i16* noalias %b, i64 %N) {
400entry:
401 %TruncN = trunc i64 %N to i32
402 br label %for.body
403
404for.body: ; preds = %for.body, %entry
405 %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
406 %ind1 = phi i32 [ %TruncN, %entry ], [ %dec, %for.body ]
407
408 %mul = mul i32 %ind1, 2
409
410 %arrayidxA = getelementptr inbounds i16, i16* %a, i32 %mul
411 %loadA = load i16, i16* %arrayidxA, align 2
412
413 %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
414 %loadB = load i16, i16* %arrayidxB, align 2
415
416 %add = mul i16 %loadA, %loadB
417
418 store i16 %add, i16* %arrayidxA, align 2
419
420 %inc = add nuw nsw i64 %ind, 1
421 %dec = sub i32 %ind1, 1
422
423 %exitcond = icmp eq i64 %inc, %N
424 br i1 %exitcond, label %for.end, label %for.body
425
426for.end: ; preds = %for.body
427 ret void
428}