blob: 02c5f668734b7beff5462508cf8f680b829f62d3 [file] [log] [blame]
Evgeny Stupachenkodc8a2542016-09-28 23:39:39 +00001; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -indvars -S | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-p:64:64:64-n8:16:32:64-S128"
5
6; When widening IV and its users, trunc and zext/sext are not needed
7; if the original 32-bit user is known to be non-negative, whether
8; the IV is considered signed or unsigned.
9define void @foo(i32* %A, i32* %B, i32* %C, i32 %N) {
10; CHECK-LABEL: @foo(
11; CHECK-NEXT: entry:
12; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %N
13; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end
14; CHECK: for.body.lr.ph:
15; CHECK-NEXT: br label %for.body
16; CHECK: for.body:
17; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ]
18; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %B, i64 [[INDVARS_IV]]
19; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
20; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
21; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* %C, i64 [[TMP1]]
22; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
23; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP0]], [[TMP2]]
24; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* %A, i64 [[INDVARS_IV]]
25; CHECK-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX5]], align 4
26; CHECK-NEXT: br label %for.inc
27; CHECK: for.inc:
28; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
29; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %N to i64
30; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
31; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge
32; CHECK: for.cond.for.end_crit_edge:
33; CHECK-NEXT: br label %for.end
34; CHECK: for.end:
35; CHECK-NEXT: ret void
36;
37entry:
38 %cmp1 = icmp slt i32 0, %N
39 br i1 %cmp1, label %for.body.lr.ph, label %for.end
40
41for.body.lr.ph: ; preds = %entry
42 br label %for.body
43
44for.body: ; preds = %for.body.lr.ph, %for.inc
45 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
46 %idxprom = sext i32 %i.02 to i64
47 %arrayidx = getelementptr inbounds i32, i32* %B, i64 %idxprom
48 %0 = load i32, i32* %arrayidx, align 4
49 %add = add nsw i32 %i.02, 2
50 %idxprom1 = zext i32 %add to i64
51 %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %idxprom1
52 %1 = load i32, i32* %arrayidx2, align 4
53 %add3 = add nsw i32 %0, %1
54 %idxprom4 = zext i32 %i.02 to i64
55 %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %idxprom4
56 store i32 %add3, i32* %arrayidx5, align 4
57 br label %for.inc
58
59for.inc: ; preds = %for.body
60 %inc = add nsw i32 %i.02, 1
61 %cmp = icmp slt i32 %inc, %N
62 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
63
64for.cond.for.end_crit_edge: ; preds = %for.inc
65 br label %for.end
66
67for.end: ; preds = %for.cond.for.end_crit_edge, %entry
68 ret void
69}
70
71define void @foo1(i32* %A, i32* %B, i32* %C, i32 %N) {
72; CHECK-LABEL: @foo1(
73; CHECK-NEXT: entry:
74; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %N
75; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end
76; CHECK: for.body.lr.ph:
77; CHECK-NEXT: br label %for.body
78; CHECK: for.body:
79; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ]
80; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* %B, i64 [[INDVARS_IV]]
81; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
82; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2
83; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* %C, i64 [[TMP1]]
84; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
85; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP0]], [[TMP2]]
86; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* %A, i64 [[INDVARS_IV]]
87; CHECK-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX5]], align 4
88; CHECK-NEXT: br label %for.inc
89; CHECK: for.inc:
90; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
91; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %N to i64
92; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
93; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge
94; CHECK: for.cond.for.end_crit_edge:
95; CHECK-NEXT: br label %for.end
96; CHECK: for.end:
97; CHECK-NEXT: ret void
98;
99entry:
100 %cmp1 = icmp slt i32 0, %N
101 br i1 %cmp1, label %for.body.lr.ph, label %for.end
102
103for.body.lr.ph: ; preds = %entry
104 br label %for.body
105
106for.body: ; preds = %for.body.lr.ph, %for.inc
107 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
108 %idxprom = zext i32 %i.02 to i64
109 %arrayidx = getelementptr inbounds i32, i32* %B, i64 %idxprom
110 %0 = load i32, i32* %arrayidx, align 4
111 %add = add nsw i32 %i.02, 2
112 %idxprom1 = sext i32 %add to i64
113 %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %idxprom1
114 %1 = load i32, i32* %arrayidx2, align 4
115 %add3 = add nsw i32 %0, %1
116 %idxprom4 = sext i32 %i.02 to i64
117 %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %idxprom4
118 store i32 %add3, i32* %arrayidx5, align 4
119 br label %for.inc
120
121for.inc: ; preds = %for.body
122 %inc = add nsw i32 %i.02, 1
123 %cmp = icmp slt i32 %inc, %N
124 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
125
126for.cond.for.end_crit_edge: ; preds = %for.inc
127 br label %for.end
128
129for.end: ; preds = %for.cond.for.end_crit_edge, %entry
130 ret void
131}
132
133
134@a = common global [100 x i32] zeroinitializer, align 16
135@b = common global [100 x i32] zeroinitializer, align 16
136
137define i32 @foo2(i32 %M) {
138; CHECK-LABEL: @foo2(
139; CHECK-NEXT: entry:
140; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %M
141; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end
142; CHECK: for.body.lr.ph:
143; CHECK-NEXT: [[TMP0:%.*]] = sext i32 %M to i64
144; CHECK-NEXT: br label %for.body
145; CHECK: for.body:
146; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ]
147; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[INDVARS_IV]]
148; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
149; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 [[INDVARS_IV]]
150; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
151; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
152; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP0]]
153; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[TMP3]]
154; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX5]], align 4
155; CHECK-NEXT: br label %for.inc
156; CHECK: for.inc:
157; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
158; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %M to i64
159; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
160; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge
161; CHECK: for.cond.for.end_crit_edge:
162; CHECK-NEXT: br label %for.end
163; CHECK: for.end:
164; CHECK-NEXT: [[CALL:%.*]] = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0))
165; CHECK-NEXT: ret i32 0
166;
167entry:
168 %cmp1 = icmp slt i32 0, %M
169 br i1 %cmp1, label %for.body.lr.ph, label %for.end
170
171for.body.lr.ph: ; preds = %entry
172 br label %for.body
173
174for.body: ; preds = %for.body.lr.ph, %for.inc
175 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
176 %idxprom = zext i32 %i.02 to i64
177 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 %idxprom
178 %0 = load i32, i32* %arrayidx, align 4
179 %idxprom1 = sext i32 %i.02 to i64
180 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 %idxprom1
181 %1 = load i32, i32* %arrayidx2, align 4
182 %add = add nsw i32 %0, %1
183 %add3 = add nsw i32 %i.02, %M
184 %idxprom4 = sext i32 %add3 to i64
185 %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 %idxprom4
186 store i32 %add, i32* %arrayidx5, align 4
187 br label %for.inc
188
189for.inc: ; preds = %for.body
190 %inc = add nsw i32 %i.02, 1
191 %cmp = icmp slt i32 %inc, %M
192 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
193
194for.cond.for.end_crit_edge: ; preds = %for.inc
195 br label %for.end
196
197for.end: ; preds = %for.cond.for.end_crit_edge, %entry
198 %call = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0))
199 ret i32 0
200}
201
202declare i32 @dummy(i32*, i32*)
203
204; A case where zext should not be eliminated when its operands could only be extended by sext.
205define i32 @foo3(i32 %M) {
206; CHECK-LABEL: @foo3(
207; CHECK-NEXT: entry:
208; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, %M
209; CHECK-NEXT: br i1 [[CMP1]], label %for.body.lr.ph, label %for.end
210; CHECK: for.body.lr.ph:
211; CHECK-NEXT: [[TMP0:%.*]] = sext i32 %M to i64
212; CHECK-NEXT: br label %for.body
213; CHECK: for.body:
214; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV:%.*]].next, %for.inc ], [ 0, %for.body.lr.ph ]
215; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[INDVARS_IV]]
216; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
217; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 [[INDVARS_IV]]
218; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
219; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
220; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP0]]
221; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
222; CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP4]] to i64
223; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 [[IDXPROM4]]
224; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX5]], align 4
225; CHECK-NEXT: br label %for.inc
226; CHECK: for.inc:
227; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
228; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 %M to i64
229; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
230; CHECK-NEXT: br i1 [[EXITCOND]], label %for.body, label %for.cond.for.end_crit_edge
231; CHECK: for.cond.for.end_crit_edge:
232; CHECK-NEXT: br label %for.end
233; CHECK: for.end:
234; CHECK-NEXT: [[CALL:%.*]] = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0))
235; CHECK-NEXT: ret i32 0
236;
237entry:
238 %cmp1 = icmp slt i32 0, %M
239 br i1 %cmp1, label %for.body.lr.ph, label %for.end
240
241for.body.lr.ph: ; preds = %entry
242 br label %for.body
243
244for.body: ; preds = %for.body.lr.ph, %for.inc
245 %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
246 %idxprom = sext i32 %i.02 to i64
247 %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 %idxprom
248 %0 = load i32, i32* %arrayidx, align 4
249 %idxprom1 = sext i32 %i.02 to i64
250 %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* @b, i64 0, i64 %idxprom1
251 %1 = load i32, i32* %arrayidx2, align 4
252 %add = add nsw i32 %0, %1
253 %add3 = add nsw i32 %i.02, %M
254 %idxprom4 = zext i32 %add3 to i64
255 %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* @a, i64 0, i64 %idxprom4
256 store i32 %add, i32* %arrayidx5, align 4
257 br label %for.inc
258
259for.inc: ; preds = %for.body
260 %inc = add nsw i32 %i.02, 1
261 %cmp = icmp slt i32 %inc, %M
262 br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
263
264for.cond.for.end_crit_edge: ; preds = %for.inc
265 br label %for.end
266
267for.end: ; preds = %for.cond.for.end_crit_edge, %entry
268 %call = call i32 @dummy(i32* getelementptr inbounds ([100 x i32], [100 x i32]* @a, i32 0, i32 0), i32* getelementptr inbounds ([100 x i32], [100 x i32]* @b, i32 0, i32 0))
269 ret i32 0
270}